DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
@ 2015-07-31  8:17 Cunming Liang
  2015-07-31  9:21 ` Ananyev, Konstantin
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Cunming Liang @ 2015-07-31  8:17 UTC (permalink / raw)
  To: dev

The patch removes the restriction of burst size on a constant 32.

On receive side, the burst size floor now aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
According to this rule, the burst size less than 4 still won't receive anything.
(Before this change, the burst size less than 32 can't receive anything.)

On transmit side, the max burst size no longer bind with a constant, however it still
require to check the cross tx_rs_thresh violation.

There's no obvious performance drop found on both recv_pkts_vec
and recv_scattered_pkts_vec on burst size 32.

Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 drivers/net/ixgbe/ixgbe_rxtx.c     |  3 ++-
 drivers/net/ixgbe/ixgbe_rxtx.h     |  4 +---
 drivers/net/ixgbe/ixgbe_rxtx_vec.c | 35 ++++++++++++++++-------------------
 3 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 3f808b3..dbdb761 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -4008,7 +4008,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	 */
 	} else if (adapter->rx_vec_allowed) {
 		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
-				   "burst size no less than 32.");
+				    "burst size no less than 4 (port=%d).",
+			     dev->data->port_id);
 
 		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
 	} else if (adapter->rx_bulk_alloc_allowed) {
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 113682a..eb931fe 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -47,9 +47,7 @@
 	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
 
 #ifdef RTE_IXGBE_INC_VECTOR
-#define RTE_IXGBE_VPMD_RX_BURST         32
-#define RTE_IXGBE_VPMD_TX_BURST         32
-#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
+#define RTE_IXGBE_RXQ_REARM_THRESH      32
 #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
 #endif
 
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
index 1c16dec..b72f817 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
@@ -199,13 +199,11 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 #endif
 
 /*
- * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
- * in one loop
+ * vPMD raw receive routine
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
- * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
- *   numbers of DD bit
+ * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
+ * - 'nb_pkts < 4' causes 0 packet receiving
  * - don't support ol_flags for rss and csum err
  */
 static inline uint16_t
@@ -240,8 +238,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	__m128i dd_check, eop_check;
 #endif /* RTE_NEXT_ABI */
 
-	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
-		return 0;
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
 
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles */
@@ -310,7 +307,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 * D. fill info. from desc to mbuf
 	 */
 #endif /* RTE_NEXT_ABI */
-	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
 			pos += RTE_IXGBE_DESCS_PER_LOOP,
 			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
 		__m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
@@ -450,13 +447,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 }
 
 /*
- * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
- * in one loop
+ * vPMD receive routine
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
- * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
- *   numbers of DD bit
+ * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
+ * - 'nb_pkts < 4' causes 0 packet receiving
  * - don't support ol_flags for rss and csum err
  */
 uint16_t
@@ -470,12 +465,11 @@ static inline uint16_t
 reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 		uint16_t nb_bufs, uint8_t *split_flags)
 {
-	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
+	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
 	struct rte_mbuf *start = rxq->pkt_first_seg;
 	struct rte_mbuf *end =  rxq->pkt_last_seg;
 	unsigned pkt_idx, buf_idx;
 
-
 	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
 		if (end != NULL) {
 			/* processing a split packet */
@@ -535,14 +529,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
  *
  * Notice:
  * - don't support ol_flags for rss and csum err
- * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
+ * - will floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
+ * - 'nb_pkts < 4' causes 0 packet receiving
  */
 uint16_t
 ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts)
 {
 	struct ixgbe_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
+	uint8_t split_flags[nb_pkts];
+
+	memset(split_flags, 0, nb_pkts);
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
@@ -667,8 +664,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
 	int i;
 
-	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
-		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
+	/* cross rx_thresh boundary is not allowed */
+	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
 
 	if (txq->nb_tx_free < txq->tx_free_thresh)
 		ixgbe_tx_free_bufs(txq);
-- 
2.4.3

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
  2015-07-31  8:17 [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction Cunming Liang
@ 2015-07-31  9:21 ` Ananyev, Konstantin
  2015-07-31 10:03 ` Zoltan Kiss
  2015-08-04  7:32 ` [dpdk-dev] [PATCH v2] " Cunming Liang
  2 siblings, 0 replies; 18+ messages in thread
From: Ananyev, Konstantin @ 2015-07-31  9:21 UTC (permalink / raw)
  To: Liang, Cunming, dev



> -----Original Message-----
> From: Liang, Cunming
> Sent: Friday, July 31, 2015 9:18 AM
> To: dev@dpdk.org
> Cc: Ananyev, Konstantin; Liang, Cunming
> Subject: [PATCH v1] ixgbe: remove vector pmd burst size restriction
> 
> The patch removes the restriction of burst size on a constant 32.
> 
> On receive side, the burst size floor now aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
> According to this rule, the burst size less than 4 still won't receive anything.
> (Before this change, the burst size less than 32 can't receive anything.)
> 
> On transmit side, the max burst size no longer bind with a constant, however it still
> require to check the cross tx_rs_thresh violation.
> 
> There's no obvious performance drop found on both recv_pkts_vec
> and recv_scattered_pkts_vec on burst size 32.
> 
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
  2015-07-31  8:17 [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction Cunming Liang
  2015-07-31  9:21 ` Ananyev, Konstantin
@ 2015-07-31 10:03 ` Zoltan Kiss
  2015-07-31 10:21   ` Ananyev, Konstantin
  2015-08-03  2:40   ` Liang, Cunming
  2015-08-04  7:32 ` [dpdk-dev] [PATCH v2] " Cunming Liang
  2 siblings, 2 replies; 18+ messages in thread
From: Zoltan Kiss @ 2015-07-31 10:03 UTC (permalink / raw)
  To: Cunming Liang, dev



On 31/07/15 09:17, Cunming Liang wrote:
> The patch removes the restriction of burst size on a constant 32.
>
> On receive side, the burst size floor now aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
> According to this rule, the burst size less than 4 still won't receive anything.
> (Before this change, the burst size less than 32 can't receive anything.)
>
> On transmit side, the max burst size no longer bind with a constant, however it still
> require to check the cross tx_rs_thresh violation.
>
> There's no obvious performance drop found on both recv_pkts_vec
> and recv_scattered_pkts_vec on burst size 32.
>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
>   drivers/net/ixgbe/ixgbe_rxtx.c     |  3 ++-
>   drivers/net/ixgbe/ixgbe_rxtx.h     |  4 +---
>   drivers/net/ixgbe/ixgbe_rxtx_vec.c | 35 ++++++++++++++++-------------------
>   3 files changed, 19 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 3f808b3..dbdb761 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -4008,7 +4008,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
>   	 */
>   	} else if (adapter->rx_vec_allowed) {
>   		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
> -				   "burst size no less than 32.");
> +				    "burst size no less than 4 (port=%d).",
> +			     dev->data->port_id);

I think it would be better to use RTE_IXGBE_DESCS_PER_LOOP instead of a 
constant 4.
>
>   		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
>   	} else if (adapter->rx_bulk_alloc_allowed) {
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 113682a..eb931fe 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -47,9 +47,7 @@
>   	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
>
>   #ifdef RTE_IXGBE_INC_VECTOR
> -#define RTE_IXGBE_VPMD_RX_BURST         32
> -#define RTE_IXGBE_VPMD_TX_BURST         32
> -#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
> +#define RTE_IXGBE_RXQ_REARM_THRESH      32
>   #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
>   #endif
>
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> index 1c16dec..b72f817 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> @@ -199,13 +199,11 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
>   #endif
>
>   /*
> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> - * in one loop
> + * vPMD raw receive routine
I would keep some warning there, like "(if nb_pkts < 
RTE_IXGBE_DESCS_PER_LOOP, won't receive anything)"

>    *
>    * Notice:
> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
> - *   numbers of DD bit
> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> + * - 'nb_pkts < 4' causes 0 packet receiving
Again, RTE_IXGBE_DESCS_PER_LOOP would be better than 4

>    * - don't support ol_flags for rss and csum err
>    */
>   static inline uint16_t
> @@ -240,8 +238,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   	__m128i dd_check, eop_check;
>   #endif /* RTE_NEXT_ABI */
>
> -	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
> -		return 0;
> +	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
>
>   	/* Just the act of getting into the function from the application is
>   	 * going to cost about 7 cycles */
> @@ -310,7 +307,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   	 * D. fill info. from desc to mbuf
>   	 */
>   #endif /* RTE_NEXT_ABI */
> -	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
> +	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
>   			pos += RTE_IXGBE_DESCS_PER_LOOP,
>   			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
>   		__m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
> @@ -450,13 +447,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   }
>
>   /*
> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> - * in one loop
> + * vPMD receive routine
Same note here as above

>    *
>    * Notice:
> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
> - *   numbers of DD bit
> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> + * - 'nb_pkts < 4' causes 0 packet receiving
>    * - don't support ol_flags for rss and csum err
>    */
>   uint16_t
> @@ -470,12 +465,11 @@ static inline uint16_t
>   reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>   		uint16_t nb_bufs, uint8_t *split_flags)
>   {
> -	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
> +	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
>   	struct rte_mbuf *start = rxq->pkt_first_seg;
>   	struct rte_mbuf *end =  rxq->pkt_last_seg;
>   	unsigned pkt_idx, buf_idx;
>
> -
>   	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
>   		if (end != NULL) {
>   			/* processing a split packet */
> @@ -535,14 +529,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>    *
>    * Notice:
>    * - don't support ol_flags for rss and csum err
> - * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> + * - will floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> + * - 'nb_pkts < 4' causes 0 packet receiving
>    */
>   uint16_t
>   ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>   		uint16_t nb_pkts)
>   {
>   	struct ixgbe_rx_queue *rxq = rx_queue;
> -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
> +	uint8_t split_flags[nb_pkts];
> +
> +	memset(split_flags, 0, nb_pkts);
>
>   	/* get some new buffers */
>   	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,

After this _recv_raw_pkts_vec it checks 32 bytes in split_flags (4x8 
bytes), that can overrun or miss some flags.
Btw. Bruce just fixed that part in "ixgbe: fix check for split packets"


> @@ -667,8 +664,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>   	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
>   	int i;
>
> -	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
> -		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
> +	/* cross rx_thresh boundary is not allowed */
> +	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
>
>   	if (txq->nb_tx_free < txq->tx_free_thresh)
>   		ixgbe_tx_free_bufs(txq);
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
  2015-07-31 10:03 ` Zoltan Kiss
@ 2015-07-31 10:21   ` Ananyev, Konstantin
  2015-07-31 11:57     ` Zoltan Kiss
  2015-08-03  7:41     ` Liang, Cunming
  2015-08-03  2:40   ` Liang, Cunming
  1 sibling, 2 replies; 18+ messages in thread
From: Ananyev, Konstantin @ 2015-07-31 10:21 UTC (permalink / raw)
  To: Zoltan Kiss, Liang, Cunming, dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Zoltan Kiss
> Sent: Friday, July 31, 2015 11:04 AM
> To: Liang, Cunming; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
> 
> 
> 
> On 31/07/15 09:17, Cunming Liang wrote:
> > The patch removes the restriction of burst size on a constant 32.
> >
> > On receive side, the burst size floor now aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
> > According to this rule, the burst size less than 4 still won't receive anything.
> > (Before this change, the burst size less than 32 can't receive anything.)
> >
> > On transmit side, the max burst size no longer bind with a constant, however it still
> > require to check the cross tx_rs_thresh violation.
> >
> > There's no obvious performance drop found on both recv_pkts_vec
> > and recv_scattered_pkts_vec on burst size 32.
> >
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > ---
> >   drivers/net/ixgbe/ixgbe_rxtx.c     |  3 ++-
> >   drivers/net/ixgbe/ixgbe_rxtx.h     |  4 +---
> >   drivers/net/ixgbe/ixgbe_rxtx_vec.c | 35 ++++++++++++++++-------------------
> >   3 files changed, 19 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> > index 3f808b3..dbdb761 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > @@ -4008,7 +4008,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
> >   	 */
> >   	} else if (adapter->rx_vec_allowed) {
> >   		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
> > -				   "burst size no less than 32.");
> > +				    "burst size no less than 4 (port=%d).",
> > +			     dev->data->port_id);
> 
> I think it would be better to use RTE_IXGBE_DESCS_PER_LOOP instead of a
> constant 4.
> >
> >   		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
> >   	} else if (adapter->rx_bulk_alloc_allowed) {
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> > index 113682a..eb931fe 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> > @@ -47,9 +47,7 @@
> >   	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
> >
> >   #ifdef RTE_IXGBE_INC_VECTOR
> > -#define RTE_IXGBE_VPMD_RX_BURST         32
> > -#define RTE_IXGBE_VPMD_TX_BURST         32
> > -#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
> > +#define RTE_IXGBE_RXQ_REARM_THRESH      32
> >   #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
> >   #endif
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> > index 1c16dec..b72f817 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> > @@ -199,13 +199,11 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
> >   #endif
> >
> >   /*
> > - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> > - * in one loop
> > + * vPMD raw receive routine
> I would keep some warning there, like "(if nb_pkts <
> RTE_IXGBE_DESCS_PER_LOOP, won't receive anything)"
> 
> >    *
> >    * Notice:
> > - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> > - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
> > - *   numbers of DD bit
> > + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> > + * - 'nb_pkts < 4' causes 0 packet receiving
> Again, RTE_IXGBE_DESCS_PER_LOOP would be better than 4
> 
> >    * - don't support ol_flags for rss and csum err
> >    */
> >   static inline uint16_t
> > @@ -240,8 +238,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
> >   	__m128i dd_check, eop_check;
> >   #endif /* RTE_NEXT_ABI */
> >
> > -	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
> > -		return 0;
> > +	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
> >
> >   	/* Just the act of getting into the function from the application is
> >   	 * going to cost about 7 cycles */
> > @@ -310,7 +307,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
> >   	 * D. fill info. from desc to mbuf
> >   	 */
> >   #endif /* RTE_NEXT_ABI */
> > -	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
> > +	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
> >   			pos += RTE_IXGBE_DESCS_PER_LOOP,
> >   			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
> >   		__m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
> > @@ -450,13 +447,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
> >   }
> >
> >   /*
> > - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> > - * in one loop
> > + * vPMD receive routine
> Same note here as above
> 
> >    *
> >    * Notice:
> > - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> > - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
> > - *   numbers of DD bit
> > + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> > + * - 'nb_pkts < 4' causes 0 packet receiving
> >    * - don't support ol_flags for rss and csum err
> >    */
> >   uint16_t
> > @@ -470,12 +465,11 @@ static inline uint16_t
> >   reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
> >   		uint16_t nb_bufs, uint8_t *split_flags)
> >   {
> > -	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
> > +	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
> >   	struct rte_mbuf *start = rxq->pkt_first_seg;
> >   	struct rte_mbuf *end =  rxq->pkt_last_seg;
> >   	unsigned pkt_idx, buf_idx;
> >
> > -
> >   	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
> >   		if (end != NULL) {
> >   			/* processing a split packet */
> > @@ -535,14 +529,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
> >    *
> >    * Notice:
> >    * - don't support ol_flags for rss and csum err
> > - * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> > + * - will floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> > + * - 'nb_pkts < 4' causes 0 packet receiving
> >    */
> >   uint16_t
> >   ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> >   		uint16_t nb_pkts)
> >   {
> >   	struct ixgbe_rx_queue *rxq = rx_queue;
> > -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
> > +	uint8_t split_flags[nb_pkts];
> > +
> > +	memset(split_flags, 0, nb_pkts);
> >
> >   	/* get some new buffers */
> >   	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
> 
> After this _recv_raw_pkts_vec it checks 32 bytes in split_flags (4x8
> bytes), that can overrun or miss some flags.
> Btw. Bruce just fixed that part in "ixgbe: fix check for split packets"

Ah yes, missed that when reviewing, that code would be broken if nb_bufs > 32:

        const uint64_t *split_fl64 = (uint64_t *)split_flags;
        if (rxq->pkt_first_seg == NULL &&
                        split_fl64[0] == 0 && split_fl64[1] == 0 &&
                        split_fl64[2] == 0 && split_fl64[3] == 0)
                return nb_bufs; 

right?

Another thing, that I just thought about:
Right now we invoke ixgbe_rxq_rearm() only at the start of _recv_raw_pkts_vec().
Before it was ok, as _recv_raw_pkts_vec() would never try to read more then 32 RXDs.
But what would happen if nb_pkts > rxq->nb_desc and rxq->rxrearm_nb == 0?
I suppose,  _recv_raw_pkts_vec() can wrpa around RXD ring and 'receive' same packet twice?
So we probably better still limit nb_pkts <= 32 at _recv_raw_pkts_vec().

Konstantin

> 
> 
> > @@ -667,8 +664,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> >   	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
> >   	int i;
> >
> > -	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
> > -		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
> > +	/* cross rx_thresh boundary is not allowed */
> > +	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
> >
> >   	if (txq->nb_tx_free < txq->tx_free_thresh)
> >   		ixgbe_tx_free_bufs(txq);
> >

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
  2015-07-31 10:21   ` Ananyev, Konstantin
@ 2015-07-31 11:57     ` Zoltan Kiss
  2015-07-31 14:49       ` Zoltan Kiss
  2015-08-03  7:41     ` Liang, Cunming
  1 sibling, 1 reply; 18+ messages in thread
From: Zoltan Kiss @ 2015-07-31 11:57 UTC (permalink / raw)
  To: Ananyev, Konstantin, Liang, Cunming, dev



On 31/07/15 11:21, Ananyev, Konstantin wrote:
>
>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Zoltan Kiss
>> Sent: Friday, July 31, 2015 11:04 AM
>> To: Liang, Cunming; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
>>
>>
>>
>> On 31/07/15 09:17, Cunming Liang wrote:
>>> The patch removes the restriction of burst size on a constant 32.
>>>
>>> On receive side, the burst size floor now aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
>>> According to this rule, the burst size less than 4 still won't receive anything.
>>> (Before this change, the burst size less than 32 can't receive anything.)
>>>
>>> On transmit side, the max burst size no longer bind with a constant, however it still
>>> require to check the cross tx_rs_thresh violation.
>>>
>>> There's no obvious performance drop found on both recv_pkts_vec
>>> and recv_scattered_pkts_vec on burst size 32.
>>>
>>> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
>>> ---
>>>    drivers/net/ixgbe/ixgbe_rxtx.c     |  3 ++-
>>>    drivers/net/ixgbe/ixgbe_rxtx.h     |  4 +---
>>>    drivers/net/ixgbe/ixgbe_rxtx_vec.c | 35 ++++++++++++++++-------------------
>>>    3 files changed, 19 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
>>> index 3f808b3..dbdb761 100644
>>> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
>>> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
>>> @@ -4008,7 +4008,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
>>>    	 */
>>>    	} else if (adapter->rx_vec_allowed) {
>>>    		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
>>> -				   "burst size no less than 32.");
>>> +				    "burst size no less than 4 (port=%d).",
>>> +			     dev->data->port_id);
>>
>> I think it would be better to use RTE_IXGBE_DESCS_PER_LOOP instead of a
>> constant 4.
>>>
>>>    		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
>>>    	} else if (adapter->rx_bulk_alloc_allowed) {
>>> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
>>> index 113682a..eb931fe 100644
>>> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
>>> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
>>> @@ -47,9 +47,7 @@
>>>    	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
>>>
>>>    #ifdef RTE_IXGBE_INC_VECTOR
>>> -#define RTE_IXGBE_VPMD_RX_BURST         32
>>> -#define RTE_IXGBE_VPMD_TX_BURST         32
>>> -#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
>>> +#define RTE_IXGBE_RXQ_REARM_THRESH      32
>>>    #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
>>>    #endif
>>>
>>> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>>> index 1c16dec..b72f817 100644
>>> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>>> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>>> @@ -199,13 +199,11 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
>>>    #endif
>>>
>>>    /*
>>> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
>>> - * in one loop
>>> + * vPMD raw receive routine
>> I would keep some warning there, like "(if nb_pkts <
>> RTE_IXGBE_DESCS_PER_LOOP, won't receive anything)"
>>
>>>     *
>>>     * Notice:
>>> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
>>> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
>>> - *   numbers of DD bit
>>> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>>> + * - 'nb_pkts < 4' causes 0 packet receiving
>> Again, RTE_IXGBE_DESCS_PER_LOOP would be better than 4
>>
>>>     * - don't support ol_flags for rss and csum err
>>>     */
>>>    static inline uint16_t
>>> @@ -240,8 +238,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>>>    	__m128i dd_check, eop_check;
>>>    #endif /* RTE_NEXT_ABI */
>>>
>>> -	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
>>> -		return 0;
>>> +	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
>>>
>>>    	/* Just the act of getting into the function from the application is
>>>    	 * going to cost about 7 cycles */
>>> @@ -310,7 +307,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>>>    	 * D. fill info. from desc to mbuf
>>>    	 */
>>>    #endif /* RTE_NEXT_ABI */
>>> -	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
>>> +	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
>>>    			pos += RTE_IXGBE_DESCS_PER_LOOP,
>>>    			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
>>>    		__m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
>>> @@ -450,13 +447,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>>>    }
>>>
>>>    /*
>>> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
>>> - * in one loop
>>> + * vPMD receive routine
>> Same note here as above
>>
>>>     *
>>>     * Notice:
>>> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
>>> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
>>> - *   numbers of DD bit
>>> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>>> + * - 'nb_pkts < 4' causes 0 packet receiving
>>>     * - don't support ol_flags for rss and csum err
>>>     */
>>>    uint16_t
>>> @@ -470,12 +465,11 @@ static inline uint16_t
>>>    reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>>>    		uint16_t nb_bufs, uint8_t *split_flags)
>>>    {
>>> -	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
>>> +	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
>>>    	struct rte_mbuf *start = rxq->pkt_first_seg;
>>>    	struct rte_mbuf *end =  rxq->pkt_last_seg;
>>>    	unsigned pkt_idx, buf_idx;
>>>
>>> -
>>>    	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
>>>    		if (end != NULL) {
>>>    			/* processing a split packet */
>>> @@ -535,14 +529,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>>>     *
>>>     * Notice:
>>>     * - don't support ol_flags for rss and csum err
>>> - * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
>>> + * - will floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>>> + * - 'nb_pkts < 4' causes 0 packet receiving
>>>     */
>>>    uint16_t
>>>    ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>>>    		uint16_t nb_pkts)
>>>    {
>>>    	struct ixgbe_rx_queue *rxq = rx_queue;
>>> -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
>>> +	uint8_t split_flags[nb_pkts];
>>> +
>>> +	memset(split_flags, 0, nb_pkts);
>>>
>>>    	/* get some new buffers */
>>>    	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
>>
>> After this _recv_raw_pkts_vec it checks 32 bytes in split_flags (4x8
>> bytes), that can overrun or miss some flags.
>> Btw. Bruce just fixed that part in "ixgbe: fix check for split packets"
>
> Ah yes, missed that when reviewing, that code would be broken if nb_bufs > 32:
>
>          const uint64_t *split_fl64 = (uint64_t *)split_flags;
>          if (rxq->pkt_first_seg == NULL &&
>                          split_fl64[0] == 0 && split_fl64[1] == 0 &&
>                          split_fl64[2] == 0 && split_fl64[3] == 0)
>                  return nb_bufs;
>
> right?

Yes. And if nb_bufs < (32 -RTE_IXGBE_DESCS_PER_LOOP), it would address 
into memory outside the array.
>
> Another thing, that I just thought about:
> Right now we invoke ixgbe_rxq_rearm() only at the start of _recv_raw_pkts_vec().
> Before it was ok, as _recv_raw_pkts_vec() would never try to read more then 32 RXDs.
> But what would happen if nb_pkts > rxq->nb_desc and rxq->rxrearm_nb == 0?
Yes, that call would deplete the RX ring, the card wouldn't be able to 
receive more, so the receive function wouldn't be called again to rearm 
the ring.

> I suppose,  _recv_raw_pkts_vec() can wrpa around RXD ring and 'receive' same packet twice?
> So we probably better still limit nb_pkts <= 32 at _recv_raw_pkts_vec().
I agree we should limit nb_pkts. To avoid the above problem it would be 
enough to limit it to (rxq->nb_desc - 1), but I don't know if there is 
another factor here we should consider.
>
> Konstantin
>
>>
>>
>>> @@ -667,8 +664,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>>>    	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
>>>    	int i;
>>>
>>> -	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
>>> -		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
>>> +	/* cross rx_thresh boundary is not allowed */
>>> +	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
>>>
>>>    	if (txq->nb_tx_free < txq->tx_free_thresh)
>>>    		ixgbe_tx_free_bufs(txq);
>>>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
  2015-07-31 11:57     ` Zoltan Kiss
@ 2015-07-31 14:49       ` Zoltan Kiss
  0 siblings, 0 replies; 18+ messages in thread
From: Zoltan Kiss @ 2015-07-31 14:49 UTC (permalink / raw)
  To: Ananyev, Konstantin, Liang, Cunming, dev



On 31/07/15 12:57, Zoltan Kiss wrote:
>>
>> Another thing, that I just thought about:
>> Right now we invoke ixgbe_rxq_rearm() only at the start of
>> _recv_raw_pkts_vec().
>> Before it was ok, as _recv_raw_pkts_vec() would never try to read more
>> then 32 RXDs.
>> But what would happen if nb_pkts > rxq->nb_desc and rxq->rxrearm_nb == 0?
> Yes, that call would deplete the RX ring, the card wouldn't be able to
> receive more, so the receive function wouldn't be called again to rearm
> the ring.
>
Actually not, the problem is that the recv function would probably 
overran the the descriptor ring. But anyway, we should limit nb_pkts indeed.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
  2015-07-31 10:03 ` Zoltan Kiss
  2015-07-31 10:21   ` Ananyev, Konstantin
@ 2015-08-03  2:40   ` Liang, Cunming
  1 sibling, 0 replies; 18+ messages in thread
From: Liang, Cunming @ 2015-08-03  2:40 UTC (permalink / raw)
  To: Zoltan Kiss, dev

Hi, 

[...]
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> > index 3f808b3..dbdb761 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > @@ -4008,7 +4008,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
> >   	 */
> >   	} else if (adapter->rx_vec_allowed) {
> >   		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure
> RX "
> > -				   "burst size no less than 32.");
> > +				    "burst size no less than 4 (port=%d).",
> > +			     dev->data->port_id);
> 
> I think it would be better to use RTE_IXGBE_DESCS_PER_LOOP instead of a
> constant 4.
> >
[...]
> >
> >   /*
> > - * vPMD receive routine, now only accept (nb_pkts ==
> RTE_IXGBE_VPMD_RX_BURST)
> > - * in one loop
> > + * vPMD raw receive routine
> I would keep some warning there, like "(if nb_pkts <
> RTE_IXGBE_DESCS_PER_LOOP, won't receive anything)"
> 
> >    *
> >    * Notice:
> > - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> > - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan
> RTE_IXGBE_VPMD_RX_BURST
> > - *   numbers of DD bit
> > + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> > + * - 'nb_pkts < 4' causes 0 packet receiving
> Again, RTE_IXGBE_DESCS_PER_LOOP would be better than 4
> 
[...]
> >   uint16_t
> >   ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> >   		uint16_t nb_pkts)
> >   {
> >   	struct ixgbe_rx_queue *rxq = rx_queue;
> > -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
> > +	uint8_t split_flags[nb_pkts];
> > +
> > +	memset(split_flags, 0, nb_pkts);
> >
> >   	/* get some new buffers */
> >   	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
> 
> After this _recv_raw_pkts_vec it checks 32 bytes in split_flags (4x8
> bytes), that can overrun or miss some flags.
> Btw. Bruce just fixed that part in "ixgbe: fix check for split packets"
> 
> 
Thanks for all these valuable comments, will keep the max burst size 32.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
  2015-07-31 10:21   ` Ananyev, Konstantin
  2015-07-31 11:57     ` Zoltan Kiss
@ 2015-08-03  7:41     ` Liang, Cunming
  2015-08-03  9:59       ` Liang, Cunming
  1 sibling, 1 reply; 18+ messages in thread
From: Liang, Cunming @ 2015-08-03  7:41 UTC (permalink / raw)
  To: Ananyev, Konstantin, Zoltan Kiss, dev

Hi, 

[...]
> > >   uint16_t
> > >   ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> > >   		uint16_t nb_pkts)
> > >   {
> > >   	struct ixgbe_rx_queue *rxq = rx_queue;
> > > -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
> > > +	uint8_t split_flags[nb_pkts];
> > > +
> > > +	memset(split_flags, 0, nb_pkts);
> > >
> > >   	/* get some new buffers */
> > >   	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
> >
> > After this _recv_raw_pkts_vec it checks 32 bytes in split_flags (4x8
> > bytes), that can overrun or miss some flags.
> > Btw. Bruce just fixed that part in "ixgbe: fix check for split packets"
> 
> Ah yes, missed that when reviewing, that code would be broken if nb_bufs > 32:
> 
>         const uint64_t *split_fl64 = (uint64_t *)split_flags;
>         if (rxq->pkt_first_seg == NULL &&
>                         split_fl64[0] == 0 && split_fl64[1] == 0 &&
>                         split_fl64[2] == 0 && split_fl64[3] == 0)
>                 return nb_bufs;
> 
> right?

We can either rollback and only allow 'nb_pkts<=32', or do some broken fix as below diff.
By the result of performance test (4*10GE 64B burst_size(32) iofwd by scattered_pkts_vec), there's no drop.
But I'm not sure it is important or not to allow burst size larger than 32. Your comments will be important.

diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
index e94c68b..8f34236 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
@@ -537,26 +537,35 @@ uint16_t
 ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                uint16_t nb_pkts)
 {
+#define NB_SPLIT_ELEM                 (8)
        struct ixgbe_rx_queue *rxq = rx_queue;
        uint8_t split_flags[nb_pkts];
+       uint32_t i, nb_scan;
+       uint16_t nb_bufs;
+       uint64_t *split_fl64 = (uint64_t *)split_flags;

        memset(split_flags, 0, nb_pkts);

        /* get some new buffers */
-       uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
-                       split_flags);
+       nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
+                                    split_flags);
        if (nb_bufs == 0)
                return 0;

        /* happy day case, full burst + no packets to be joined */
-       const uint64_t *split_fl64 = (uint64_t *)split_flags;
-       if (rxq->pkt_first_seg == NULL &&
-                       split_fl64[0] == 0 && split_fl64[1] == 0 &&
-                       split_fl64[2] == 0 && split_fl64[3] == 0)
+       nb_scan = RTE_ALIGN(nb_bufs, NB_SPLIT_ELEM);
+       if (rxq->pkt_first_seg == NULL) {
+               for (i = 0; i < nb_scan;
+                    i += NB_SPLIT_ELEM, split_fl64++) {
+                       if (*split_fl64 != 0)
+                               goto reassemble;
+               }
                return nb_bufs;
+       }

+reassemble:
        /* reassemble any packets that need reassembly*/
-       unsigned i = 0;
+       i = 0;
        if (rxq->pkt_first_seg == NULL) {
                /* find the first split flag, and only reassemble then*/
                while (i < nb_bufs && !split_flags[i])

/Steve
> 
> Another thing, that I just thought about:
> Right now we invoke ixgbe_rxq_rearm() only at the start of
> _recv_raw_pkts_vec().
> Before it was ok, as _recv_raw_pkts_vec() would never try to read more then 32
> RXDs.
> But what would happen if nb_pkts > rxq->nb_desc and rxq->rxrearm_nb == 0?
> I suppose,  _recv_raw_pkts_vec() can wrpa around RXD ring and 'receive' same
> packet twice?
> So we probably better still limit nb_pkts <= 32 at _recv_raw_pkts_vec().

The _recv_raw_pkts_vec() won't wrap around RXD ring. When it reaches the last one, the DD bit of padding desc. always 0.
So in the case nb_pkts > rxq->nb_desc, the '_recv_raw_pkts_vec()' can only get no more than 'rxq->nb_desc' packets.


> 
> Konstantin
> 
> >

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
  2015-08-03  7:41     ` Liang, Cunming
@ 2015-08-03  9:59       ` Liang, Cunming
  0 siblings, 0 replies; 18+ messages in thread
From: Liang, Cunming @ 2015-08-03  9:59 UTC (permalink / raw)
  To: Liang, Cunming, Ananyev, Konstantin, Zoltan Kiss, dev

Hi,

[...]
> > Another thing, that I just thought about:
> > Right now we invoke ixgbe_rxq_rearm() only at the start of
> > _recv_raw_pkts_vec().
> > Before it was ok, as _recv_raw_pkts_vec() would never try to read more then
> 32
> > RXDs.
> > But what would happen if nb_pkts > rxq->nb_desc and rxq->rxrearm_nb == 0?
> > I suppose,  _recv_raw_pkts_vec() can wrpa around RXD ring and 'receive'
> same
> > packet twice?
> > So we probably better still limit nb_pkts <= 32 at _recv_raw_pkts_vec().
> 
> The _recv_raw_pkts_vec() won't wrap around RXD ring. When it reaches the last
> one, the DD bit of padding desc. always 0.
> So in the case nb_pkts > rxq->nb_desc, the '_recv_raw_pkts_vec()' can only get
> no more than 'rxq->nb_desc' packets.
> 
I think the violation is true when rx_id in some middle position of desc_ring, and nb_pkts > rxq->nb_desc.
The DD checking may exceed the boundary (access the entry whose DD is set and waiting for rearm).
So I agree to keep the max burst size as 32.

/Steve

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH v2] ixgbe: remove vector pmd burst size restriction
  2015-07-31  8:17 [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction Cunming Liang
  2015-07-31  9:21 ` Ananyev, Konstantin
  2015-07-31 10:03 ` Zoltan Kiss
@ 2015-08-04  7:32 ` Cunming Liang
  2015-08-04  9:02   ` Zoltan Kiss
  2015-08-04 11:47   ` [dpdk-dev] [PATCH v3] " Cunming Liang
  2 siblings, 2 replies; 18+ messages in thread
From: Cunming Liang @ 2015-08-04  7:32 UTC (permalink / raw)
  To: dev

On receive side, the burst size now floor aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
According to this rule, the burst size less than 4 still won't receive anything.
(Before this change, the burst size less than 32 can't receive anything.)
_recv_*_pkts_vec returns no more than 32(RTE_IXGBE_RXQ_REARM_THRESH) packets.

On transmit side, the max burst size no longer bind with a constant, however it still
require to check the cross tx_rs_thresh violation.

There's no obvious performance drop found on both recv_pkts_vec
and recv_scattered_pkts_vec on burst size 32.

Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 v2 change:
  - keep max rx burst size in 32
  - reword some comments
 
 drivers/net/ixgbe/ixgbe_rxtx.c     |  4 +++-
 drivers/net/ixgbe/ixgbe_rxtx.h     |  5 ++---
 drivers/net/ixgbe/ixgbe_rxtx_vec.c | 39 +++++++++++++++++++++-----------------
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 91023b9..d06aaae 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -4008,7 +4008,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	 */
 	} else if (adapter->rx_vec_allowed) {
 		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
-				   "burst size no less than 32.");
+				    "burst size no less than "
+				    "RTE_IXGBE_DESCS_PER_LOOP(=4) (port=%d).",
+			     dev->data->port_id);
 
 		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
 	} else if (adapter->rx_bulk_alloc_allowed) {
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 113682a..b9eca67 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -47,9 +47,8 @@
 	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
 
 #ifdef RTE_IXGBE_INC_VECTOR
-#define RTE_IXGBE_VPMD_RX_BURST         32
-#define RTE_IXGBE_VPMD_TX_BURST         32
-#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
+#define RTE_IXGBE_RXQ_REARM_THRESH      32
+#define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
 #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
 #endif
 
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
index cf25a53..2ca0e4c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
@@ -245,13 +245,13 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 #endif
 
 /*
- * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
- * in one loop
+ * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
- * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
+ * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
+ * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  * - don't support ol_flags for rss and csum err
  */
 static inline uint16_t
@@ -286,8 +286,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	__m128i dd_check, eop_check;
 #endif /* RTE_NEXT_ABI */
 
-	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
-		return 0;
+	/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
+
+	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
 
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles */
@@ -356,7 +359,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 * D. fill info. from desc to mbuf
 	 */
 #endif /* RTE_NEXT_ABI */
-	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
 			pos += RTE_IXGBE_DESCS_PER_LOOP,
 			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
 #ifdef RTE_NEXT_ABI
@@ -518,13 +521,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 }
 
 /*
- * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
- * in one loop
+ * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
- * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
+ * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
+ * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  * - don't support ol_flags for rss and csum err
  */
 uint16_t
@@ -538,12 +541,11 @@ static inline uint16_t
 reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 		uint16_t nb_bufs, uint8_t *split_flags)
 {
-	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
+	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
 	struct rte_mbuf *start = rxq->pkt_first_seg;
 	struct rte_mbuf *end =  rxq->pkt_last_seg;
 	unsigned pkt_idx, buf_idx;
 
-
 	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
 		if (end != NULL) {
 			/* processing a split packet */
@@ -603,14 +605,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
  *
  * Notice:
  * - don't support ol_flags for rss and csum err
- * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
+ * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
+ *   numbers of DD bit
+ * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 uint16_t
 ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts)
 {
 	struct ixgbe_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
+	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
@@ -735,8 +740,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
 	int i;
 
-	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
-		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
+	/* cross rx_thresh boundary is not allowed */
+	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
 
 	if (txq->nb_tx_free < txq->tx_free_thresh)
 		ixgbe_tx_free_bufs(txq);
-- 
2.4.3

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2] ixgbe: remove vector pmd burst size restriction
  2015-08-04  7:32 ` [dpdk-dev] [PATCH v2] " Cunming Liang
@ 2015-08-04  9:02   ` Zoltan Kiss
  2015-08-04 11:15     ` Liang, Cunming
  2015-08-04 11:47   ` [dpdk-dev] [PATCH v3] " Cunming Liang
  1 sibling, 1 reply; 18+ messages in thread
From: Zoltan Kiss @ 2015-08-04  9:02 UTC (permalink / raw)
  To: Cunming Liang, dev



On 04/08/15 08:32, Cunming Liang wrote:
> On receive side, the burst size now floor aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
> According to this rule, the burst size less than 4 still won't receive anything.
> (Before this change, the burst size less than 32 can't receive anything.)
> _recv_*_pkts_vec returns no more than 32(RTE_IXGBE_RXQ_REARM_THRESH) packets.
>
> On transmit side, the max burst size no longer bind with a constant, however it still
> require to check the cross tx_rs_thresh violation.
>
> There's no obvious performance drop found on both recv_pkts_vec
> and recv_scattered_pkts_vec on burst size 32.
>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
>   v2 change:
>    - keep max rx burst size in 32
>    - reword some comments
>
>   drivers/net/ixgbe/ixgbe_rxtx.c     |  4 +++-
>   drivers/net/ixgbe/ixgbe_rxtx.h     |  5 ++---
>   drivers/net/ixgbe/ixgbe_rxtx_vec.c | 39 +++++++++++++++++++++-----------------
>   3 files changed, 27 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 91023b9..d06aaae 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -4008,7 +4008,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
>   	 */
>   	} else if (adapter->rx_vec_allowed) {
>   		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
> -				   "burst size no less than 32.");
> +				    "burst size no less than "
> +				    "RTE_IXGBE_DESCS_PER_LOOP(=4) (port=%d).",

I think you should write in this line:

"%d (port=%d)", RTE_IXGBE_DESCS_PER_LOOP,
> +			     dev->data->port_id);
>
>   		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
>   	} else if (adapter->rx_bulk_alloc_allowed) {
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 113682a..b9eca67 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -47,9 +47,8 @@
>   	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
>
>   #ifdef RTE_IXGBE_INC_VECTOR
> -#define RTE_IXGBE_VPMD_RX_BURST         32
> -#define RTE_IXGBE_VPMD_TX_BURST         32
> -#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
> +#define RTE_IXGBE_RXQ_REARM_THRESH      32
> +#define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
>   #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
>   #endif
>
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> index cf25a53..2ca0e4c 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> @@ -245,13 +245,13 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
>   #endif
>
>   /*
> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> - * in one loop
> + * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
>    *
>    * Notice:
> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
> + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
> + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
>    *   numbers of DD bit
> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>    * - don't support ol_flags for rss and csum err
>    */
>   static inline uint16_t
> @@ -286,8 +286,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   	__m128i dd_check, eop_check;
>   #endif /* RTE_NEXT_ABI */
>
> -	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
> -		return 0;
> +	/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
> +	nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
> +
> +	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
> +	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
>
>   	/* Just the act of getting into the function from the application is
>   	 * going to cost about 7 cycles */
> @@ -356,7 +359,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   	 * D. fill info. from desc to mbuf
>   	 */
>   #endif /* RTE_NEXT_ABI */
> -	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
> +	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
>   			pos += RTE_IXGBE_DESCS_PER_LOOP,
>   			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
>   #ifdef RTE_NEXT_ABI
> @@ -518,13 +521,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   }
>
>   /*
> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> - * in one loop
> + * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
>    *
>    * Notice:
> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
> + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
> + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
>    *   numbers of DD bit
> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>    * - don't support ol_flags for rss and csum err
>    */
>   uint16_t
> @@ -538,12 +541,11 @@ static inline uint16_t
>   reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>   		uint16_t nb_bufs, uint8_t *split_flags)
>   {
> -	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
> +	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
>   	struct rte_mbuf *start = rxq->pkt_first_seg;
>   	struct rte_mbuf *end =  rxq->pkt_last_seg;
>   	unsigned pkt_idx, buf_idx;
>
> -
>   	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
>   		if (end != NULL) {
>   			/* processing a split packet */
> @@ -603,14 +605,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>    *
>    * Notice:
>    * - don't support ol_flags for rss and csum err
> - * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
> + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
> + *   numbers of DD bit
> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>    */
>   uint16_t
>   ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>   		uint16_t nb_pkts)
>   {
>   	struct ixgbe_rx_queue *rxq = rx_queue;
> -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
> +	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
>
>   	/* get some new buffers */
>   	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,

I don't know if it actually matters from performance point of view, but 
you check the whole split_flags array, even if you received only 4 
packets. Though the overhead of the a for loop might be bigger.

> @@ -735,8 +740,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>   	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
>   	int i;
>
> -	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
> -		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
> +	/* cross rx_thresh boundary is not allowed */
> +	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
>
>   	if (txq->nb_tx_free < txq->tx_free_thresh)
>   		ixgbe_tx_free_bufs(txq);
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2] ixgbe: remove vector pmd burst size restriction
  2015-08-04  9:02   ` Zoltan Kiss
@ 2015-08-04 11:15     ` Liang, Cunming
  0 siblings, 0 replies; 18+ messages in thread
From: Liang, Cunming @ 2015-08-04 11:15 UTC (permalink / raw)
  To: Zoltan Kiss, dev

Hi Zoltan,

> >   	} else if (adapter->rx_vec_allowed) {
> >   		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
> > -				   "burst size no less than 32.");
> > +				    "burst size no less than "
> > +				    "RTE_IXGBE_DESCS_PER_LOOP(=4) (port=%d).",
> 
> I think you should write in this line:
> 
> "%d (port=%d)", RTE_IXGBE_DESCS_PER_LOOP,
> > +			     dev->data->port_id);
> >
Ok, it looks better, will take it.

[...]
> >   uint16_t
> >   ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> >   		uint16_t nb_pkts)
> >   {
> >   	struct ixgbe_rx_queue *rxq = rx_queue;
> > -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
> > +	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
> >
> >   	/* get some new buffers */
> >   	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
> 
> I don't know if it actually matters from performance point of view, but
> you check the whole split_flags array, even if you received only 4
> packets. Though the overhead of the a for loop might be bigger.
> 
v2 here just roll back the change.
The size of array is constant. It won't loop much, always compare 4 times 'split_fl64[]==0'.
As you said, I ever sent another variable aplit_flags with loop, only very tiny performance difference.
As the patch is not trying to improve the performance here, any improvement I propose to make it in another patch.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH v3] ixgbe: remove vector pmd burst size restriction
  2015-08-04  7:32 ` [dpdk-dev] [PATCH v2] " Cunming Liang
  2015-08-04  9:02   ` Zoltan Kiss
@ 2015-08-04 11:47   ` Cunming Liang
  2015-08-04 16:26     ` Zoltan Kiss
  2015-08-05  9:31     ` Ananyev, Konstantin
  1 sibling, 2 replies; 18+ messages in thread
From: Cunming Liang @ 2015-08-04 11:47 UTC (permalink / raw)
  To: dev

On receive side, the burst size now floor aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
According to this rule, the burst size less than 4 still won't receive anything.
(Before this change, the burst size less than 32 can't receive anything.)
_recv_*_pkts_vec returns no more than 32(RTE_IXGBE_RXQ_REARM_THRESH) packets.

On transmit side, the max burst size no longer bind with a constant, however it still
require to check the cross tx_rs_thresh violation.

There's no obvious performance drop found on both recv_pkts_vec
and recv_scattered_pkts_vec on burst size 32.

Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 v3 change:
  - reword the init print log

 v2 change:
  - keep max rx burst size in 32
  - reword some comments

 drivers/net/ixgbe/ixgbe_rxtx.c     |  4 +++-
 drivers/net/ixgbe/ixgbe_rxtx.h     |  5 ++---
 drivers/net/ixgbe/ixgbe_rxtx_vec.c | 39 +++++++++++++++++++++-----------------
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 91023b9..03eb45d 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -4008,7 +4008,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	 */
 	} else if (adapter->rx_vec_allowed) {
 		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
-				   "burst size no less than 32.");
+				    "burst size no less than %d (port=%d).",
+			     RTE_IXGBE_DESCS_PER_LOOP,
+			     dev->data->port_id);
 
 		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
 	} else if (adapter->rx_bulk_alloc_allowed) {
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 113682a..b9eca67 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -47,9 +47,8 @@
 	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
 
 #ifdef RTE_IXGBE_INC_VECTOR
-#define RTE_IXGBE_VPMD_RX_BURST         32
-#define RTE_IXGBE_VPMD_TX_BURST         32
-#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
+#define RTE_IXGBE_RXQ_REARM_THRESH      32
+#define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
 #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
 #endif
 
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
index cf25a53..2ca0e4c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
@@ -245,13 +245,13 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 #endif
 
 /*
- * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
- * in one loop
+ * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
- * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
+ * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
+ * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  * - don't support ol_flags for rss and csum err
  */
 static inline uint16_t
@@ -286,8 +286,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	__m128i dd_check, eop_check;
 #endif /* RTE_NEXT_ABI */
 
-	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
-		return 0;
+	/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
+
+	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
 
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles */
@@ -356,7 +359,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 * D. fill info. from desc to mbuf
 	 */
 #endif /* RTE_NEXT_ABI */
-	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
 			pos += RTE_IXGBE_DESCS_PER_LOOP,
 			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
 #ifdef RTE_NEXT_ABI
@@ -518,13 +521,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 }
 
 /*
- * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
- * in one loop
+ * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
- * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
+ * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
+ * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  * - don't support ol_flags for rss and csum err
  */
 uint16_t
@@ -538,12 +541,11 @@ static inline uint16_t
 reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 		uint16_t nb_bufs, uint8_t *split_flags)
 {
-	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
+	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
 	struct rte_mbuf *start = rxq->pkt_first_seg;
 	struct rte_mbuf *end =  rxq->pkt_last_seg;
 	unsigned pkt_idx, buf_idx;
 
-
 	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
 		if (end != NULL) {
 			/* processing a split packet */
@@ -603,14 +605,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
  *
  * Notice:
  * - don't support ol_flags for rss and csum err
- * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
+ * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
+ *   numbers of DD bit
+ * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 uint16_t
 ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts)
 {
 	struct ixgbe_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
+	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
@@ -735,8 +740,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
 	int i;
 
-	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
-		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
+	/* cross rx_thresh boundary is not allowed */
+	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
 
 	if (txq->nb_tx_free < txq->tx_free_thresh)
 		ixgbe_tx_free_bufs(txq);
-- 
2.4.3

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v3] ixgbe: remove vector pmd burst size restriction
  2015-08-04 11:47   ` [dpdk-dev] [PATCH v3] " Cunming Liang
@ 2015-08-04 16:26     ` Zoltan Kiss
  2015-08-05  6:28       ` Liang, Cunming
  2015-08-05  9:31     ` Ananyev, Konstantin
  1 sibling, 1 reply; 18+ messages in thread
From: Zoltan Kiss @ 2015-08-04 16:26 UTC (permalink / raw)
  To: Cunming Liang, dev



On 04/08/15 12:47, Cunming Liang wrote:
> On receive side, the burst size now floor aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
> According to this rule, the burst size less than 4 still won't receive anything.
> (Before this change, the burst size less than 32 can't receive anything.)
> _recv_*_pkts_vec returns no more than 32(RTE_IXGBE_RXQ_REARM_THRESH) packets.
>
> On transmit side, the max burst size no longer bind with a constant, however it still
> require to check the cross tx_rs_thresh violation.
>
> There's no obvious performance drop found on both recv_pkts_vec
> and recv_scattered_pkts_vec on burst size 32.
>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
>   v3 change:
>    - reword the init print log
>
>   v2 change:
>    - keep max rx burst size in 32
>    - reword some comments
>
>   drivers/net/ixgbe/ixgbe_rxtx.c     |  4 +++-
>   drivers/net/ixgbe/ixgbe_rxtx.h     |  5 ++---
>   drivers/net/ixgbe/ixgbe_rxtx_vec.c | 39 +++++++++++++++++++++-----------------
>   3 files changed, 27 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 91023b9..03eb45d 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -4008,7 +4008,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
>   	 */
>   	} else if (adapter->rx_vec_allowed) {
>   		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
> -				   "burst size no less than 32.");
> +				    "burst size no less than %d (port=%d).",
> +			     RTE_IXGBE_DESCS_PER_LOOP,
> +			     dev->data->port_id);

A tab seems to be missing from the indentation, otherwise:

Reviewed-by: Zoltan Kiss <zoltan.kiss@linaro.org>

>
>   		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
>   	} else if (adapter->rx_bulk_alloc_allowed) {
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 113682a..b9eca67 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -47,9 +47,8 @@
>   	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
>
>   #ifdef RTE_IXGBE_INC_VECTOR
> -#define RTE_IXGBE_VPMD_RX_BURST         32
> -#define RTE_IXGBE_VPMD_TX_BURST         32
> -#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
> +#define RTE_IXGBE_RXQ_REARM_THRESH      32
> +#define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
>   #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
>   #endif
>
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> index cf25a53..2ca0e4c 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> @@ -245,13 +245,13 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
>   #endif
>
>   /*
> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> - * in one loop
> + * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
>    *
>    * Notice:
> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
> + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
> + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
>    *   numbers of DD bit
> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>    * - don't support ol_flags for rss and csum err
>    */
>   static inline uint16_t
> @@ -286,8 +286,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   	__m128i dd_check, eop_check;
>   #endif /* RTE_NEXT_ABI */
>
> -	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
> -		return 0;
> +	/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
> +	nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
> +
> +	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
> +	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
>
>   	/* Just the act of getting into the function from the application is
>   	 * going to cost about 7 cycles */
> @@ -356,7 +359,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   	 * D. fill info. from desc to mbuf
>   	 */
>   #endif /* RTE_NEXT_ABI */
> -	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
> +	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
>   			pos += RTE_IXGBE_DESCS_PER_LOOP,
>   			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
>   #ifdef RTE_NEXT_ABI
> @@ -518,13 +521,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   }
>
>   /*
> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> - * in one loop
> + * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
>    *
>    * Notice:
> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
> + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
> + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
>    *   numbers of DD bit
> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>    * - don't support ol_flags for rss and csum err
>    */
>   uint16_t
> @@ -538,12 +541,11 @@ static inline uint16_t
>   reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>   		uint16_t nb_bufs, uint8_t *split_flags)
>   {
> -	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
> +	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
>   	struct rte_mbuf *start = rxq->pkt_first_seg;
>   	struct rte_mbuf *end =  rxq->pkt_last_seg;
>   	unsigned pkt_idx, buf_idx;
>
> -
>   	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
>   		if (end != NULL) {
>   			/* processing a split packet */
> @@ -603,14 +605,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>    *
>    * Notice:
>    * - don't support ol_flags for rss and csum err
> - * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
> + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
> + *   numbers of DD bit
> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>    */
>   uint16_t
>   ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>   		uint16_t nb_pkts)
>   {
>   	struct ixgbe_rx_queue *rxq = rx_queue;
> -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
> +	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
>
>   	/* get some new buffers */
>   	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
> @@ -735,8 +740,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>   	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
>   	int i;
>
> -	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
> -		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
> +	/* cross rx_thresh boundary is not allowed */
> +	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
>
>   	if (txq->nb_tx_free < txq->tx_free_thresh)
>   		ixgbe_tx_free_bufs(txq);
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v3] ixgbe: remove vector pmd burst size restriction
  2015-08-04 16:26     ` Zoltan Kiss
@ 2015-08-05  6:28       ` Liang, Cunming
  2015-08-05 15:59         ` Zoltan Kiss
  0 siblings, 1 reply; 18+ messages in thread
From: Liang, Cunming @ 2015-08-05  6:28 UTC (permalink / raw)
  To: Zoltan Kiss, dev

Hi Zoltan,

> -----Original Message-----
> From: Zoltan Kiss [mailto:zoltan.kiss@linaro.org]
> Sent: Wednesday, August 05, 2015 12:26 AM
> To: Liang, Cunming; dev@dpdk.org
> Cc: Ananyev, Konstantin
> Subject: Re: [PATCH v3] ixgbe: remove vector pmd burst size restriction
> 
> 
> 
> On 04/08/15 12:47, Cunming Liang wrote:
> > On receive side, the burst size now floor aligns to RTE_IXGBE_DESCS_PER_LOOP
> power of 2.
> > According to this rule, the burst size less than 4 still won't receive anything.
> > (Before this change, the burst size less than 32 can't receive anything.)
> > _recv_*_pkts_vec returns no more than 32(RTE_IXGBE_RXQ_REARM_THRESH)
> packets.
> >
> > On transmit side, the max burst size no longer bind with a constant, however it
> still
> > require to check the cross tx_rs_thresh violation.
> >
> > There's no obvious performance drop found on both recv_pkts_vec
> > and recv_scattered_pkts_vec on burst size 32.
> >
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > ---
> >   v3 change:
> >    - reword the init print log
> >
> >   v2 change:
> >    - keep max rx burst size in 32
> >    - reword some comments
> >
> >   drivers/net/ixgbe/ixgbe_rxtx.c     |  4 +++-
> >   drivers/net/ixgbe/ixgbe_rxtx.h     |  5 ++---
> >   drivers/net/ixgbe/ixgbe_rxtx_vec.c | 39
> +++++++++++++++++++++-----------------
> >   3 files changed, 27 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> > index 91023b9..03eb45d 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > @@ -4008,7 +4008,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
> >   	 */
> >   	} else if (adapter->rx_vec_allowed) {
> >   		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
> > -				   "burst size no less than 32.");
> > +				    "burst size no less than %d (port=%d).",
> > +			     RTE_IXGBE_DESCS_PER_LOOP,
> > +			     dev->data->port_id);
> 
> A tab seems to be missing from the indentation, otherwise:
> 
> Reviewed-by: Zoltan Kiss <zoltan.kiss@linaro.org>
> 
Thanks for the review. I double checked indentation agian, it looks fine.
1st string line 4x/tab intention + space alignment, the other variable lines 3x/tab indentation + space alignment.
According to the 'coding_style.rst' Indentation section - 'As with all style guideline, code should match style already in use in an existing file.'
The style keeps the same as its following condition check. It passes 'checkpatch.pl' checking as well.

Thanks,
/Steve
> >
> >   		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
> >   	} else if (adapter->rx_bulk_alloc_allowed) {
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> > index 113682a..b9eca67 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> > @@ -47,9 +47,8 @@
> >   	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
> >
> >   #ifdef RTE_IXGBE_INC_VECTOR
> > -#define RTE_IXGBE_VPMD_RX_BURST         32
> > -#define RTE_IXGBE_VPMD_TX_BURST         32
> > -#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
> > +#define RTE_IXGBE_RXQ_REARM_THRESH      32
> > +#define RTE_IXGBE_MAX_RX_BURST
> RTE_IXGBE_RXQ_REARM_THRESH
> >   #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
> >   #endif
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> > index cf25a53..2ca0e4c 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> > @@ -245,13 +245,13 @@ desc_to_olflags_v(__m128i descs[4], struct
> rte_mbuf **rx_pkts)
> >   #endif
> >
> >   /*
> > - * vPMD receive routine, now only accept (nb_pkts ==
> RTE_IXGBE_VPMD_RX_BURST)
> > - * in one loop
> > + * vPMD raw receive routine, only accept(nb_pkts >=
> RTE_IXGBE_DESCS_PER_LOOP)
> >    *
> >    * Notice:
> > - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> > - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan
> RTE_IXGBE_VPMD_RX_BURST
> > + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
> > + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan
> RTE_IXGBE_MAX_RX_BURST
> >    *   numbers of DD bit
> > + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> >    * - don't support ol_flags for rss and csum err
> >    */
> >   static inline uint16_t
> > @@ -286,8 +286,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
> >   	__m128i dd_check, eop_check;
> >   #endif /* RTE_NEXT_ABI */
> >
> > -	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
> > -		return 0;
> > +	/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
> > +	nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
> > +
> > +	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
> > +	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
> >
> >   	/* Just the act of getting into the function from the application is
> >   	 * going to cost about 7 cycles */
> > @@ -356,7 +359,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
> >   	 * D. fill info. from desc to mbuf
> >   	 */
> >   #endif /* RTE_NEXT_ABI */
> > -	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
> > +	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
> >   			pos += RTE_IXGBE_DESCS_PER_LOOP,
> >   			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
> >   #ifdef RTE_NEXT_ABI
> > @@ -518,13 +521,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
> >   }
> >
> >   /*
> > - * vPMD receive routine, now only accept (nb_pkts ==
> RTE_IXGBE_VPMD_RX_BURST)
> > - * in one loop
> > + * vPMD receive routine, only accept(nb_pkts >=
> RTE_IXGBE_DESCS_PER_LOOP)
> >    *
> >    * Notice:
> > - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
> > - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan
> RTE_IXGBE_VPMD_RX_BURST
> > + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
> > + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan
> RTE_IXGBE_MAX_RX_BURST
> >    *   numbers of DD bit
> > + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> >    * - don't support ol_flags for rss and csum err
> >    */
> >   uint16_t
> > @@ -538,12 +541,11 @@ static inline uint16_t
> >   reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
> >   		uint16_t nb_bufs, uint8_t *split_flags)
> >   {
> > -	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
> > +	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
> >   	struct rte_mbuf *start = rxq->pkt_first_seg;
> >   	struct rte_mbuf *end =  rxq->pkt_last_seg;
> >   	unsigned pkt_idx, buf_idx;
> >
> > -
> >   	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
> >   		if (end != NULL) {
> >   			/* processing a split packet */
> > @@ -603,14 +605,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq,
> struct rte_mbuf **rx_bufs,
> >    *
> >    * Notice:
> >    * - don't support ol_flags for rss and csum err
> > - * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
> > + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
> > + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan
> RTE_IXGBE_MAX_RX_BURST
> > + *   numbers of DD bit
> > + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> >    */
> >   uint16_t
> >   ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> >   		uint16_t nb_pkts)
> >   {
> >   	struct ixgbe_rx_queue *rxq = rx_queue;
> > -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
> > +	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
> >
> >   	/* get some new buffers */
> >   	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
> > @@ -735,8 +740,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf
> **tx_pkts,
> >   	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
> >   	int i;
> >
> > -	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
> > -		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
> > +	/* cross rx_thresh boundary is not allowed */
> > +	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
> >
> >   	if (txq->nb_tx_free < txq->tx_free_thresh)
> >   		ixgbe_tx_free_bufs(txq);
> >

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v3] ixgbe: remove vector pmd burst size restriction
  2015-08-04 11:47   ` [dpdk-dev] [PATCH v3] " Cunming Liang
  2015-08-04 16:26     ` Zoltan Kiss
@ 2015-08-05  9:31     ` Ananyev, Konstantin
  2015-09-09 13:16       ` Thomas Monjalon
  1 sibling, 1 reply; 18+ messages in thread
From: Ananyev, Konstantin @ 2015-08-05  9:31 UTC (permalink / raw)
  To: Liang, Cunming, dev



> -----Original Message-----
> From: Liang, Cunming
> Sent: Tuesday, August 04, 2015 12:47 PM
> To: dev@dpdk.org
> Cc: Ananyev, Konstantin; zoltan.kiss@linaro.org; Liang, Cunming
> Subject: [PATCH v3] ixgbe: remove vector pmd burst size restriction
> 
> On receive side, the burst size now floor aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
> According to this rule, the burst size less than 4 still won't receive anything.
> (Before this change, the burst size less than 32 can't receive anything.)
> _recv_*_pkts_vec returns no more than 32(RTE_IXGBE_RXQ_REARM_THRESH) packets.
> 
> On transmit side, the max burst size no longer bind with a constant, however it still
> require to check the cross tx_rs_thresh violation.
> 
> There's no obvious performance drop found on both recv_pkts_vec
> and recv_scattered_pkts_vec on burst size 32.
> 
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
>  v3 change:
>   - reword the init print log
> 
>  v2 change:
>   - keep max rx burst size in 32
>   - reword some comments
> 
>  drivers/net/ixgbe/ixgbe_rxtx.c     |  4 +++-
>  drivers/net/ixgbe/ixgbe_rxtx.h     |  5 ++---
>  drivers/net/ixgbe/ixgbe_rxtx_vec.c | 39 +++++++++++++++++++++-----------------
>  3 files changed, 27 insertions(+), 21 deletions(-)
> 

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v3] ixgbe: remove vector pmd burst size restriction
  2015-08-05  6:28       ` Liang, Cunming
@ 2015-08-05 15:59         ` Zoltan Kiss
  0 siblings, 0 replies; 18+ messages in thread
From: Zoltan Kiss @ 2015-08-05 15:59 UTC (permalink / raw)
  To: Liang, Cunming, dev



On 05/08/15 07:28, Liang, Cunming wrote:
> Hi Zoltan,
>
>> -----Original Message-----
>> From: Zoltan Kiss [mailto:zoltan.kiss@linaro.org]
>> Sent: Wednesday, August 05, 2015 12:26 AM
>> To: Liang, Cunming; dev@dpdk.org
>> Cc: Ananyev, Konstantin
>> Subject: Re: [PATCH v3] ixgbe: remove vector pmd burst size restriction
>>
>>
>>
>> On 04/08/15 12:47, Cunming Liang wrote:
>>> On receive side, the burst size now floor aligns to RTE_IXGBE_DESCS_PER_LOOP
>> power of 2.
>>> According to this rule, the burst size less than 4 still won't receive anything.
>>> (Before this change, the burst size less than 32 can't receive anything.)
>>> _recv_*_pkts_vec returns no more than 32(RTE_IXGBE_RXQ_REARM_THRESH)
>> packets.
>>>
>>> On transmit side, the max burst size no longer bind with a constant, however it
>> still
>>> require to check the cross tx_rs_thresh violation.
>>>
>>> There's no obvious performance drop found on both recv_pkts_vec
>>> and recv_scattered_pkts_vec on burst size 32.
>>>
>>> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
>>> ---
>>>    v3 change:
>>>     - reword the init print log
>>>
>>>    v2 change:
>>>     - keep max rx burst size in 32
>>>     - reword some comments
>>>
>>>    drivers/net/ixgbe/ixgbe_rxtx.c     |  4 +++-
>>>    drivers/net/ixgbe/ixgbe_rxtx.h     |  5 ++---
>>>    drivers/net/ixgbe/ixgbe_rxtx_vec.c | 39
>> +++++++++++++++++++++-----------------
>>>    3 files changed, 27 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
>>> index 91023b9..03eb45d 100644
>>> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
>>> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
>>> @@ -4008,7 +4008,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
>>>    	 */
>>>    	} else if (adapter->rx_vec_allowed) {
>>>    		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
>>> -				   "burst size no less than 32.");
>>> +				    "burst size no less than %d (port=%d).",
>>> +			     RTE_IXGBE_DESCS_PER_LOOP,
>>> +			     dev->data->port_id);
>>
>> A tab seems to be missing from the indentation, otherwise:
>>
>> Reviewed-by: Zoltan Kiss <zoltan.kiss@linaro.org>
>>
> Thanks for the review. I double checked indentation agian, it looks fine.
> 1st string line 4x/tab intention + space alignment, the other variable lines 3x/tab indentation + space alignment.
> According to the 'coding_style.rst' Indentation section - 'As with all style guideline, code should match style already in use in an existing file.'
> The style keeps the same as its following condition check. It passes 'checkpatch.pl' checking as well.

Sorry for the noise, I was confused because the string is indented to 
the start of itself, while the other parameters to the opening bracket.
>
> Thanks,
> /Steve
>>>
>>>    		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
>>>    	} else if (adapter->rx_bulk_alloc_allowed) {
>>> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
>>> index 113682a..b9eca67 100644
>>> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
>>> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
>>> @@ -47,9 +47,8 @@
>>>    	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
>>>
>>>    #ifdef RTE_IXGBE_INC_VECTOR
>>> -#define RTE_IXGBE_VPMD_RX_BURST         32
>>> -#define RTE_IXGBE_VPMD_TX_BURST         32
>>> -#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
>>> +#define RTE_IXGBE_RXQ_REARM_THRESH      32
>>> +#define RTE_IXGBE_MAX_RX_BURST
>> RTE_IXGBE_RXQ_REARM_THRESH
>>>    #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
>>>    #endif
>>>
>>> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>> b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>>> index cf25a53..2ca0e4c 100644
>>> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>>> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>>> @@ -245,13 +245,13 @@ desc_to_olflags_v(__m128i descs[4], struct
>> rte_mbuf **rx_pkts)
>>>    #endif
>>>
>>>    /*
>>> - * vPMD receive routine, now only accept (nb_pkts ==
>> RTE_IXGBE_VPMD_RX_BURST)
>>> - * in one loop
>>> + * vPMD raw receive routine, only accept(nb_pkts >=
>> RTE_IXGBE_DESCS_PER_LOOP)
>>>     *
>>>     * Notice:
>>> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
>>> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan
>> RTE_IXGBE_VPMD_RX_BURST
>>> + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
>>> + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan
>> RTE_IXGBE_MAX_RX_BURST
>>>     *   numbers of DD bit
>>> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>>>     * - don't support ol_flags for rss and csum err
>>>     */
>>>    static inline uint16_t
>>> @@ -286,8 +286,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq,
>> struct rte_mbuf **rx_pkts,
>>>    	__m128i dd_check, eop_check;
>>>    #endif /* RTE_NEXT_ABI */
>>>
>>> -	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
>>> -		return 0;
>>> +	/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
>>> +	nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
>>> +
>>> +	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
>>> +	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
>>>
>>>    	/* Just the act of getting into the function from the application is
>>>    	 * going to cost about 7 cycles */
>>> @@ -356,7 +359,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq,
>> struct rte_mbuf **rx_pkts,
>>>    	 * D. fill info. from desc to mbuf
>>>    	 */
>>>    #endif /* RTE_NEXT_ABI */
>>> -	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
>>> +	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
>>>    			pos += RTE_IXGBE_DESCS_PER_LOOP,
>>>    			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
>>>    #ifdef RTE_NEXT_ABI
>>> @@ -518,13 +521,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq,
>> struct rte_mbuf **rx_pkts,
>>>    }
>>>
>>>    /*
>>> - * vPMD receive routine, now only accept (nb_pkts ==
>> RTE_IXGBE_VPMD_RX_BURST)
>>> - * in one loop
>>> + * vPMD receive routine, only accept(nb_pkts >=
>> RTE_IXGBE_DESCS_PER_LOOP)
>>>     *
>>>     * Notice:
>>> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
>>> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan
>> RTE_IXGBE_VPMD_RX_BURST
>>> + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
>>> + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan
>> RTE_IXGBE_MAX_RX_BURST
>>>     *   numbers of DD bit
>>> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>>>     * - don't support ol_flags for rss and csum err
>>>     */
>>>    uint16_t
>>> @@ -538,12 +541,11 @@ static inline uint16_t
>>>    reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>>>    		uint16_t nb_bufs, uint8_t *split_flags)
>>>    {
>>> -	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
>>> +	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
>>>    	struct rte_mbuf *start = rxq->pkt_first_seg;
>>>    	struct rte_mbuf *end =  rxq->pkt_last_seg;
>>>    	unsigned pkt_idx, buf_idx;
>>>
>>> -
>>>    	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
>>>    		if (end != NULL) {
>>>    			/* processing a split packet */
>>> @@ -603,14 +605,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq,
>> struct rte_mbuf **rx_bufs,
>>>     *
>>>     * Notice:
>>>     * - don't support ol_flags for rss and csum err
>>> - * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
>>> + * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
>>> + * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan
>> RTE_IXGBE_MAX_RX_BURST
>>> + *   numbers of DD bit
>>> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>>>     */
>>>    uint16_t
>>>    ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>>>    		uint16_t nb_pkts)
>>>    {
>>>    	struct ixgbe_rx_queue *rxq = rx_queue;
>>> -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
>>> +	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
>>>
>>>    	/* get some new buffers */
>>>    	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
>>> @@ -735,8 +740,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf
>> **tx_pkts,
>>>    	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
>>>    	int i;
>>>
>>> -	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
>>> -		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
>>> +	/* cross rx_thresh boundary is not allowed */
>>> +	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
>>>
>>>    	if (txq->nb_tx_free < txq->tx_free_thresh)
>>>    		ixgbe_tx_free_bufs(txq);
>>>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v3] ixgbe: remove vector pmd burst size restriction
  2015-08-05  9:31     ` Ananyev, Konstantin
@ 2015-09-09 13:16       ` Thomas Monjalon
  0 siblings, 0 replies; 18+ messages in thread
From: Thomas Monjalon @ 2015-09-09 13:16 UTC (permalink / raw)
  To: Liang, Cunming; +Cc: dev

> > On receive side, the burst size now floor aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
> > According to this rule, the burst size less than 4 still won't receive anything.
> > (Before this change, the burst size less than 32 can't receive anything.)
> > _recv_*_pkts_vec returns no more than 32(RTE_IXGBE_RXQ_REARM_THRESH) packets.
> > 
> > On transmit side, the max burst size no longer bind with a constant, however it still
> > require to check the cross tx_rs_thresh violation.
> > 
> > There's no obvious performance drop found on both recv_pkts_vec
> > and recv_scattered_pkts_vec on burst size 32.
> > 
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> 
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

Applied, thanks

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2015-09-09 13:17 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-31  8:17 [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction Cunming Liang
2015-07-31  9:21 ` Ananyev, Konstantin
2015-07-31 10:03 ` Zoltan Kiss
2015-07-31 10:21   ` Ananyev, Konstantin
2015-07-31 11:57     ` Zoltan Kiss
2015-07-31 14:49       ` Zoltan Kiss
2015-08-03  7:41     ` Liang, Cunming
2015-08-03  9:59       ` Liang, Cunming
2015-08-03  2:40   ` Liang, Cunming
2015-08-04  7:32 ` [dpdk-dev] [PATCH v2] " Cunming Liang
2015-08-04  9:02   ` Zoltan Kiss
2015-08-04 11:15     ` Liang, Cunming
2015-08-04 11:47   ` [dpdk-dev] [PATCH v3] " Cunming Liang
2015-08-04 16:26     ` Zoltan Kiss
2015-08-05  6:28       ` Liang, Cunming
2015-08-05 15:59         ` Zoltan Kiss
2015-08-05  9:31     ` Ananyev, Konstantin
2015-09-09 13:16       ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).