patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH] net/mlx5: add 128B padding of Rx completion entry
@ 2022-05-24 10:10 Xing Zhang
  0 siblings, 0 replies; 4+ messages in thread
From: Xing Zhang @ 2022-05-24 10:10 UTC (permalink / raw)
  To: zhangxing_neurs; +Cc: stable

From: "Xing Zhang zhangxing_neurs@163.com" <zhangxing_neurs@163.com>

When using Verbs flow engine("dv_flow_en"=0)and compile macro contain
HAVE_IBV_MLX5_MOD_CQE_128B_PAD, vendor cap lag is setted to
MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD.But if the device is not support the
capability, this will cause CQ creation failure.So use padded CQE when
system cache-line size is 128B and the device has the capability of
CQE padding. The patch that CQE padding device argument should
not be deleted.

Fixes: 4a7f979af28e ("net/mlx5: remove CQE padding device argument")
Cc: stable@dpdk.org

Signed-off-by: Xing Zhang <zhangxing_neurs@163.com>
---
 doc/guides/nics/mlx5.rst            | 19 ++++++++++++++++++-
 drivers/net/mlx5/linux/mlx5_os.c    |  5 +++++
 drivers/net/mlx5/linux/mlx5_verbs.c |  2 +-
 drivers/net/mlx5/mlx5.c             | 13 ++++++++++++-
 drivers/net/mlx5/mlx5.h             |  2 ++
 5 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a0b9284..bd11e5e 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -14,7 +14,6 @@ ConnectX-6 Lx**, **Mellanox BlueField** and **Mellanox BlueField-2** families
 of 10/25/40/50/100/200 Gb/s adapters as well as their virtual functions (VF)
 in SR-IOV context.
 
-
 Design
 ------
 
@@ -580,6 +579,24 @@ for an additional list of options shared with other mlx5 drivers.
   - POWER9 and ARMv8 with ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx,
     ConnectX-6 Lx, BlueField and BlueField-2.
 
+- ``rxq_cqe_pad_en`` parameter [int]
+
+  A nonzero value enables 128B padding of CQE on RX side. The size of CQE
+  is aligned with the size of a cacheline of the core. If cacheline size is
+  128B, the CQE size is configured to be 128B even though the device writes
+  only 64B data on the cacheline. This is to avoid unnecessary cache
+  invalidation by device's two consecutive writes on to one cacheline.
+  However in some architecture, it is more beneficial to update entire
+  cacheline with padding the rest 64B rather than striding because
+  read-modify-write could drop performance a lot. On the other hand,
+  writing extra data will consume more PCIe bandwidth and could also drop
+  the maximum throughput. It is recommended to empirically set this
+  parameter. Disabled by default.
+
+  Supported on:
+
+  - CPU having 128B cacheline with ConnectX-5 and BlueField.
+
 - ``rxq_pkt_pad_en`` parameter [int]
 
   A nonzero value enables padding Rx packet to the size of cacheline on PCI
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index a821153..ef26d94 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -298,6 +298,11 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
 		DRV_LOG(DEBUG, "Device supports Multi-Packet RQ.");
 	}
 #endif
+#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
+	/* Whether device supports 128B Rx CQE padding. */
+	sh->dev_cap.cqe_pad = RTE_CACHE_LINE_SIZE == 128 &&
+		(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD);
+#endif
 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
 	if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) {
 		sh->dev_cap.tunnel_en = dv_attr.tunnel_offloads_caps &
diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c b/drivers/net/mlx5/linux/mlx5_verbs.c
index b6ba21c..4ced94a 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -200,7 +200,7 @@ mlx5_rxq_ibv_cq_create(struct mlx5_rxq_priv *rxq)
 			priv->dev_data->port_id);
 	}
 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
-	if (RTE_CACHE_LINE_SIZE == 128) {
+	if (priv->config.cqe_pad) {
 		cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
 		cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
 	}
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 72b1e35..51c92b2 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -46,6 +46,9 @@
 /* Device parameter to enable RX completion queue compression. */
 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
 
+/* Device parameter to enable RX completion entry padding to 128B. */
+#define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en"
+
 /* Device parameter to enable padding Rx packet to cacheline size. */
 #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en"
 
@@ -2176,7 +2179,9 @@ mlx5_port_args_check_handler(const char *key, const char *val, void *opaque)
 		}
 		config->cqe_comp = !!tmp;
 		config->cqe_comp_fmt = tmp;
-	} else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
+	} else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) {
+		config->cqe_pad = !!tmp;
+ 	} else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
 		config->hw_padding = !!tmp;
 	} else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
 		config->mprq.enabled = !!tmp;
@@ -2352,6 +2357,12 @@ mlx5_port_args_config(struct mlx5_priv *priv, struct mlx5_kvargs_ctrl *mkvlist,
 	}
 	DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.",
 		config->cqe_comp ? "" : "not ");
+	if (config->cqe_pad && !dev_cap->cqe_pad) {
+		DRV_LOG(WARNING, "Rx CQE padding isn't supported.");
+		config->cqe_pad = 0;
+	} else if (config->cqe_pad) {
+		DRV_LOG(INFO, "Rx CQE padding is enabled.");
+	}
 	if ((config->std_delay_drop || config->hp_delay_drop) &&
 	    !dev_cap->rq_delay_drop_en) {
 		config->std_delay_drop = 0;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 23a28f6..fa41b44 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -140,6 +140,7 @@ struct mlx5_dev_cap {
 	uint32_t txpp_en:1; /* Tx packet pacing is supported. */
 	uint32_t mpls_en:1; /* MPLS over GRE/UDP is supported. */
 	uint32_t cqe_comp:1; /* CQE compression is supported. */
+	uint32_t cqe_pad:1; /* CQE padding is supported. */
 	uint32_t hw_csum:1; /* Checksum offload is supported. */
 	uint32_t hw_padding:1; /* End alignment padding is supported. */
 	uint32_t dest_tir:1; /* Whether advanced DR API is available. */
@@ -264,6 +265,7 @@ struct mlx5_port_config {
 	unsigned int hw_vlan_insert:1; /* VLAN insertion in WQE is supported. */
 	unsigned int hw_padding:1; /* End alignment padding is supported. */
 	unsigned int cqe_comp:1; /* CQE compression is enabled. */
+	unsigned int cqe_pad:1; /* CQE padding is enabled. */
 	unsigned int cqe_comp_fmt:3; /* CQE compression format. */
 	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
 	unsigned int std_delay_drop:1; /* Enable standard Rxq delay drop. */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] net/mlx5: add 128B padding of Rx completion entry
  2022-05-24 10:47 Xing Zhang
@ 2022-06-21  5:19 ` Slava Ovsiienko
  0 siblings, 0 replies; 4+ messages in thread
From: Slava Ovsiienko @ 2022-06-21  5:19 UTC (permalink / raw)
  To: Xing Zhang, Matan Azrad; +Cc: dev, stable

Hi, Xing

Thank you for the patch, I have some questions about.

As far as I can understand we have three flags:
1. HAVE_IBV_MLX5_MOD_CQE_128B_PAD,rdma-core library supports CQE size management
2. dv_attr.flags  (flag MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD), reports device supports CQE size management
3. RTE_CACHE_LINE_SIZE == 128  compile time static option, tells us it might make sense to configure 128B CQEs

What use cases have we got ?

1. HAVE_IBV_MLX5_MOD_CQE_128B_PAD is not defined, no choice, 64B CQE is only option
2. dv_attr.flags - flag is zero, no choice, 64B CQE is only option 
3. RTE_CACHE_LINE_SIZE == 64, configuring CQE to 128B is meaningless, 64B is only reasonable option

So, the meaningful use case for the patch looks like the following:
HAVE_IBV_MLX5_MOD_CQE_128B_PAD is defined, device supports CQE size mgt, cache line size is 128.
Right?

AFAIU, the patch covers two issues:

1. Fixes the check of dv_attr flag to see whether device supports CQE size settings
   This was missed in the existing PMD code and should be fixed anyway.
   Also, please see mlx5_devx_cq_create() routine - it should be updated to check dv_attr flag as well.

2. Introduces the new devarg "rxq_cqe_pad_en".
   We have overwhelming amount of devargs in mlx5 and to introduce the new one, we must have
   the strong motivation. What would the new "rxq_cqe_pad_en" allow us? The only reasonable option I see
   - explicitly configure 64B CQE size for arch with 128B cacheline. Does it make sense?
   Have you seen real use case of the forcing CQE size to 64B improved the performance?

> +  A nonzero value enables 128B padding of CQE on RX side. The size of
> + CQE  is aligned with the size of a cacheline of the core. If cacheline
> + size is  128B, the CQE size is configured to be 128B even though the
> + device writes  only 64B data on the cacheline. This is to avoid
> + unnecessary cache  invalidation by device's two consecutive writes on to
> one cacheline.

Sorry, I do not follow. How does 64Bx2 writing cause the cache line invalidation?
AFAIK, the HW uses L3 cache to store data from PCIe transaction. Writing of not
complete line might cause extra reads from memory to fill up the gaps in line
data. L1/L2 caches should be invalidated anyway, regardless of data size.

> +  However in some architecture, it is more beneficial to update entire
> + cacheline with padding the rest 64B rather than striding because
> + read-modify-write could drop performance a lot. On the other hand,
> + writing extra data will consume more PCIe bandwidth and could also
> + drop  the maximum throughput. It is recommended to empirically set
> + this  parameter. Disabled by default.

I would consider "enabled by default" to keep the existing behavior.

With best regards,
Slava


> -----Original Message-----
> From: Xing Zhang <zhangxing_neurs@163.com>
> Sent: Tuesday, May 24, 2022 13:48
> To: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>
> Cc: dev@dpdk.org; zhangxing_neurs@163.com; stable@dpdk.org
> Subject: [PATCH] net/mlx5: add 128B padding of Rx completion entry
> 
> From: "Xing Zhang zhangxing_neurs@163.com" <zhangxing_neurs@163.com>
> 
> When using Verbs flow engine("dv_flow_en"=0)and compile macro contain
> HAVE_IBV_MLX5_MOD_CQE_128B_PAD, vendor cap lag is setted to
> MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD.But if the device is not support the
> capability, this will cause CQ creation failure.So use padded CQE when system
> cache-line size is 128B and the device has the capability of CQE padding. The
> patch that CQE padding device argument should not be deleted.
> 
> Fixes: 4a7f979af28e ("net/mlx5: remove CQE padding device argument")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Xing Zhang <zhangxing_neurs@163.com>
> ---
>  doc/guides/nics/mlx5.rst            | 19 ++++++++++++++++++-
>  drivers/net/mlx5/linux/mlx5_os.c    |  5 +++++
>  drivers/net/mlx5/linux/mlx5_verbs.c |  2 +-
>  drivers/net/mlx5/mlx5.c             | 13 ++++++++++++-
>  drivers/net/mlx5/mlx5.h             |  2 ++
>  5 files changed, 38 insertions(+), 3 deletions(-)
> 
> diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index
> a0b9284..bd11e5e 100644
> --- a/doc/guides/nics/mlx5.rst
> +++ b/doc/guides/nics/mlx5.rst
> @@ -14,7 +14,6 @@ ConnectX-6 Lx**, **Mellanox BlueField** and
> **Mellanox BlueField-2** families  of 10/25/40/50/100/200 Gb/s adapters as
> well as their virtual functions (VF)  in SR-IOV context.
> 
> -
>  Design
>  ------
> 
> @@ -580,6 +579,24 @@ for an additional list of options shared with other
> mlx5 drivers.
>    - POWER9 and ARMv8 with ConnectX-4 Lx, ConnectX-5, ConnectX-6,
> ConnectX-6 Dx,
>      ConnectX-6 Lx, BlueField and BlueField-2.
> 
> +- ``rxq_cqe_pad_en`` parameter [int]
> +
> +  A nonzero value enables 128B padding of CQE on RX side. The size of
> + CQE  is aligned with the size of a cacheline of the core. If cacheline
> + size is  128B, the CQE size is configured to be 128B even though the
> + device writes  only 64B data on the cacheline. This is to avoid
> + unnecessary cache  invalidation by device's two consecutive writes on to
> one cacheline.
> +  However in some architecture, it is more beneficial to update entire
> + cacheline with padding the rest 64B rather than striding because
> + read-modify-write could drop performance a lot. On the other hand,
> + writing extra data will consume more PCIe bandwidth and could also
> + drop  the maximum throughput. It is recommended to empirically set
> + this  parameter. Disabled by default.
> +
> +  Supported on:
> +
> +  - CPU having 128B cacheline with ConnectX-5 and BlueField.
> +
>  - ``rxq_pkt_pad_en`` parameter [int]
> 
>    A nonzero value enables padding Rx packet to the size of cacheline on PCI
> diff --git a/drivers/net/mlx5/linux/mlx5_os.c
> b/drivers/net/mlx5/linux/mlx5_os.c
> index a821153..ef26d94 100644
> --- a/drivers/net/mlx5/linux/mlx5_os.c
> +++ b/drivers/net/mlx5/linux/mlx5_os.c
> @@ -298,6 +298,11 @@ mlx5_os_capabilities_prepare(struct
> mlx5_dev_ctx_shared *sh)
>  		DRV_LOG(DEBUG, "Device supports Multi-Packet RQ.");
>  	}
>  #endif
> +#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
> +	/* Whether device supports 128B Rx CQE padding. */
> +	sh->dev_cap.cqe_pad = RTE_CACHE_LINE_SIZE == 128 &&
> +		(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD);
> +#endif
>  #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
>  	if (dv_attr.comp_mask &
> MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) {
>  		sh->dev_cap.tunnel_en = dv_attr.tunnel_offloads_caps & diff
> --git a/drivers/net/mlx5/linux/mlx5_verbs.c
> b/drivers/net/mlx5/linux/mlx5_verbs.c
> index b6ba21c..4ced94a 100644
> --- a/drivers/net/mlx5/linux/mlx5_verbs.c
> +++ b/drivers/net/mlx5/linux/mlx5_verbs.c
> @@ -200,7 +200,7 @@ mlx5_rxq_ibv_cq_create(struct mlx5_rxq_priv *rxq)
>  			priv->dev_data->port_id);
>  	}
>  #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
> -	if (RTE_CACHE_LINE_SIZE == 128) {
> +	if (priv->config.cqe_pad) {
>  		cq_attr.mlx5.comp_mask |=
> MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
>  		cq_attr.mlx5.flags |=
> MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
>  	}
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 72b1e35..51c92b2 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -46,6 +46,9 @@
>  /* Device parameter to enable RX completion queue compression. */  #define
> MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
> 
> +/* Device parameter to enable RX completion entry padding to 128B. */
> +#define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en"
> +
>  /* Device parameter to enable padding Rx packet to cacheline size. */
> #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en"
> 
> @@ -2176,7 +2179,9 @@ mlx5_port_args_check_handler(const char *key,
> const char *val, void *opaque)
>  		}
>  		config->cqe_comp = !!tmp;
>  		config->cqe_comp_fmt = tmp;
> -	} else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
> +	} else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) {
> +		config->cqe_pad = !!tmp;
> + 	} else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
>  		config->hw_padding = !!tmp;
>  	} else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
>  		config->mprq.enabled = !!tmp;
> @@ -2352,6 +2357,12 @@ mlx5_port_args_config(struct mlx5_priv *priv,
> struct mlx5_kvargs_ctrl *mkvlist,
>  	}
>  	DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.",
>  		config->cqe_comp ? "" : "not ");
> +	if (config->cqe_pad && !dev_cap->cqe_pad) {
> +		DRV_LOG(WARNING, "Rx CQE padding isn't supported.");
> +		config->cqe_pad = 0;
> +	} else if (config->cqe_pad) {
> +		DRV_LOG(INFO, "Rx CQE padding is enabled.");
> +	}
>  	if ((config->std_delay_drop || config->hp_delay_drop) &&
>  	    !dev_cap->rq_delay_drop_en) {
>  		config->std_delay_drop = 0;
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 23a28f6..fa41b44 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -140,6 +140,7 @@ struct mlx5_dev_cap {
>  	uint32_t txpp_en:1; /* Tx packet pacing is supported. */
>  	uint32_t mpls_en:1; /* MPLS over GRE/UDP is supported. */
>  	uint32_t cqe_comp:1; /* CQE compression is supported. */
> +	uint32_t cqe_pad:1; /* CQE padding is supported. */
>  	uint32_t hw_csum:1; /* Checksum offload is supported. */
>  	uint32_t hw_padding:1; /* End alignment padding is supported. */
>  	uint32_t dest_tir:1; /* Whether advanced DR API is available. */ @@ -
> 264,6 +265,7 @@ struct mlx5_port_config {
>  	unsigned int hw_vlan_insert:1; /* VLAN insertion in WQE is
> supported. */
>  	unsigned int hw_padding:1; /* End alignment padding is supported.
> */
>  	unsigned int cqe_comp:1; /* CQE compression is enabled. */
> +	unsigned int cqe_pad:1; /* CQE padding is enabled. */
>  	unsigned int cqe_comp_fmt:3; /* CQE compression format. */
>  	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
>  	unsigned int std_delay_drop:1; /* Enable standard Rxq delay drop. */
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] net/mlx5: add 128B padding of Rx completion entry
@ 2022-05-24 10:47 Xing Zhang
  2022-06-21  5:19 ` Slava Ovsiienko
  0 siblings, 1 reply; 4+ messages in thread
From: Xing Zhang @ 2022-05-24 10:47 UTC (permalink / raw)
  To: matan, viacheslavo; +Cc: dev, zhangxing_neurs, stable

From: "Xing Zhang zhangxing_neurs@163.com" <zhangxing_neurs@163.com>

When using Verbs flow engine("dv_flow_en"=0)and compile macro contain
HAVE_IBV_MLX5_MOD_CQE_128B_PAD, vendor cap lag is setted to
MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD.But if the device is not support the
capability, this will cause CQ creation failure.So use padded CQE when
system cache-line size is 128B and the device has the capability of
CQE padding. The patch that CQE padding device argument should
not be deleted.

Fixes: 4a7f979af28e ("net/mlx5: remove CQE padding device argument")
Cc: stable@dpdk.org

Signed-off-by: Xing Zhang <zhangxing_neurs@163.com>
---
 doc/guides/nics/mlx5.rst            | 19 ++++++++++++++++++-
 drivers/net/mlx5/linux/mlx5_os.c    |  5 +++++
 drivers/net/mlx5/linux/mlx5_verbs.c |  2 +-
 drivers/net/mlx5/mlx5.c             | 13 ++++++++++++-
 drivers/net/mlx5/mlx5.h             |  2 ++
 5 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a0b9284..bd11e5e 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -14,7 +14,6 @@ ConnectX-6 Lx**, **Mellanox BlueField** and **Mellanox BlueField-2** families
 of 10/25/40/50/100/200 Gb/s adapters as well as their virtual functions (VF)
 in SR-IOV context.
 
-
 Design
 ------
 
@@ -580,6 +579,24 @@ for an additional list of options shared with other mlx5 drivers.
   - POWER9 and ARMv8 with ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx,
     ConnectX-6 Lx, BlueField and BlueField-2.
 
+- ``rxq_cqe_pad_en`` parameter [int]
+
+  A nonzero value enables 128B padding of CQE on RX side. The size of CQE
+  is aligned with the size of a cacheline of the core. If cacheline size is
+  128B, the CQE size is configured to be 128B even though the device writes
+  only 64B data on the cacheline. This is to avoid unnecessary cache
+  invalidation by device's two consecutive writes on to one cacheline.
+  However in some architecture, it is more beneficial to update entire
+  cacheline with padding the rest 64B rather than striding because
+  read-modify-write could drop performance a lot. On the other hand,
+  writing extra data will consume more PCIe bandwidth and could also drop
+  the maximum throughput. It is recommended to empirically set this
+  parameter. Disabled by default.
+
+  Supported on:
+
+  - CPU having 128B cacheline with ConnectX-5 and BlueField.
+
 - ``rxq_pkt_pad_en`` parameter [int]
 
   A nonzero value enables padding Rx packet to the size of cacheline on PCI
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index a821153..ef26d94 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -298,6 +298,11 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
 		DRV_LOG(DEBUG, "Device supports Multi-Packet RQ.");
 	}
 #endif
+#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
+	/* Whether device supports 128B Rx CQE padding. */
+	sh->dev_cap.cqe_pad = RTE_CACHE_LINE_SIZE == 128 &&
+		(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD);
+#endif
 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
 	if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) {
 		sh->dev_cap.tunnel_en = dv_attr.tunnel_offloads_caps &
diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c b/drivers/net/mlx5/linux/mlx5_verbs.c
index b6ba21c..4ced94a 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -200,7 +200,7 @@ mlx5_rxq_ibv_cq_create(struct mlx5_rxq_priv *rxq)
 			priv->dev_data->port_id);
 	}
 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
-	if (RTE_CACHE_LINE_SIZE == 128) {
+	if (priv->config.cqe_pad) {
 		cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
 		cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
 	}
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 72b1e35..51c92b2 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -46,6 +46,9 @@
 /* Device parameter to enable RX completion queue compression. */
 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
 
+/* Device parameter to enable RX completion entry padding to 128B. */
+#define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en"
+
 /* Device parameter to enable padding Rx packet to cacheline size. */
 #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en"
 
@@ -2176,7 +2179,9 @@ mlx5_port_args_check_handler(const char *key, const char *val, void *opaque)
 		}
 		config->cqe_comp = !!tmp;
 		config->cqe_comp_fmt = tmp;
-	} else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
+	} else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) {
+		config->cqe_pad = !!tmp;
+ 	} else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
 		config->hw_padding = !!tmp;
 	} else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
 		config->mprq.enabled = !!tmp;
@@ -2352,6 +2357,12 @@ mlx5_port_args_config(struct mlx5_priv *priv, struct mlx5_kvargs_ctrl *mkvlist,
 	}
 	DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.",
 		config->cqe_comp ? "" : "not ");
+	if (config->cqe_pad && !dev_cap->cqe_pad) {
+		DRV_LOG(WARNING, "Rx CQE padding isn't supported.");
+		config->cqe_pad = 0;
+	} else if (config->cqe_pad) {
+		DRV_LOG(INFO, "Rx CQE padding is enabled.");
+	}
 	if ((config->std_delay_drop || config->hp_delay_drop) &&
 	    !dev_cap->rq_delay_drop_en) {
 		config->std_delay_drop = 0;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 23a28f6..fa41b44 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -140,6 +140,7 @@ struct mlx5_dev_cap {
 	uint32_t txpp_en:1; /* Tx packet pacing is supported. */
 	uint32_t mpls_en:1; /* MPLS over GRE/UDP is supported. */
 	uint32_t cqe_comp:1; /* CQE compression is supported. */
+	uint32_t cqe_pad:1; /* CQE padding is supported. */
 	uint32_t hw_csum:1; /* Checksum offload is supported. */
 	uint32_t hw_padding:1; /* End alignment padding is supported. */
 	uint32_t dest_tir:1; /* Whether advanced DR API is available. */
@@ -264,6 +265,7 @@ struct mlx5_port_config {
 	unsigned int hw_vlan_insert:1; /* VLAN insertion in WQE is supported. */
 	unsigned int hw_padding:1; /* End alignment padding is supported. */
 	unsigned int cqe_comp:1; /* CQE compression is enabled. */
+	unsigned int cqe_pad:1; /* CQE padding is enabled. */
 	unsigned int cqe_comp_fmt:3; /* CQE compression format. */
 	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
 	unsigned int std_delay_drop:1; /* Enable standard Rxq delay drop. */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] net/mlx5:add 128B padding of Rx completion entry
@ 2022-05-24  8:41 Xing Zhang
  0 siblings, 0 replies; 4+ messages in thread
From: Xing Zhang @ 2022-05-24  8:41 UTC (permalink / raw)
  To: zhangxing_neurs; +Cc: stable

From: "Xing Zhang zhangxing_neurs@163.com" <zhangxing_neurs@163.com>

When using Verbs flow engine("dv_flow_en"=0)and compile macro contain HAVE_IBV_MLX5_MOD_CQE_128B_PAD, vendor cap lag is setted to MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD.But the device is not support the capability, this will cause CQ creatin failure.So the condition of padded CQE should be system cache-line size is 128B nd the device has the capability of CQE padding.

Fixes:4a7f979af28e ("net/mlx5:remove CQE padding device argument")
Cc: stable@dpdk.org

Signed-off-by: Xing Zhang <zhangxing_neurs@163.com>
---
 doc/guides/nics/mlx5.rst            | 19 ++++++++++++++++++-
 drivers/net/mlx5/linux/mlx5_os.c    |  5 +++++
 drivers/net/mlx5/linux/mlx5_verbs.c |  2 +-
 drivers/net/mlx5/mlx5.c             | 13 ++++++++++++-
 drivers/net/mlx5/mlx5.h             |  2 ++
 5 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a0b9284..bd11e5e 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -14,7 +14,6 @@ ConnectX-6 Lx**, **Mellanox BlueField** and **Mellanox BlueField-2** families
 of 10/25/40/50/100/200 Gb/s adapters as well as their virtual functions (VF)
 in SR-IOV context.
 
-
 Design
 ------
 
@@ -580,6 +579,24 @@ for an additional list of options shared with other mlx5 drivers.
   - POWER9 and ARMv8 with ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx,
     ConnectX-6 Lx, BlueField and BlueField-2.
 
+- ``rxq_cqe_pad_en`` parameter [int]
+
+  A nonzero value enables 128B padding of CQE on RX side. The size of CQE
+  is aligned with the size of a cacheline of the core. If cacheline size is
+  128B, the CQE size is configured to be 128B even though the device writes
+  only 64B data on the cacheline. This is to avoid unnecessary cache
+  invalidation by device's two consecutive writes on to one cacheline.
+  However in some architecture, it is more beneficial to update entire
+  cacheline with padding the rest 64B rather than striding because
+  read-modify-write could drop performance a lot. On the other hand,
+  writing extra data will consume more PCIe bandwidth and could also drop
+  the maximum throughput. It is recommended to empirically set this
+  parameter. Disabled by default.
+
+  Supported on:
+
+  - CPU having 128B cacheline with ConnectX-5 and BlueField.
+
 - ``rxq_pkt_pad_en`` parameter [int]
 
   A nonzero value enables padding Rx packet to the size of cacheline on PCI
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index a821153..ef26d94 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -298,6 +298,11 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
 		DRV_LOG(DEBUG, "Device supports Multi-Packet RQ.");
 	}
 #endif
+#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
+	/* Whether device supports 128B Rx CQE padding. */
+	sh->dev_cap.cqe_pad = RTE_CACHE_LINE_SIZE == 128 &&
+		(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD);
+#endif
 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
 	if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) {
 		sh->dev_cap.tunnel_en = dv_attr.tunnel_offloads_caps &
diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c b/drivers/net/mlx5/linux/mlx5_verbs.c
index b6ba21c..4ced94a 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -200,7 +200,7 @@ mlx5_rxq_ibv_cq_create(struct mlx5_rxq_priv *rxq)
 			priv->dev_data->port_id);
 	}
 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
-	if (RTE_CACHE_LINE_SIZE == 128) {
+	if (priv->config.cqe_pad) {
 		cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
 		cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
 	}
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 72b1e35..51c92b2 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -46,6 +46,9 @@
 /* Device parameter to enable RX completion queue compression. */
 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
 
+/* Device parameter to enable RX completion entry padding to 128B. */
+#define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en"
+
 /* Device parameter to enable padding Rx packet to cacheline size. */
 #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en"
 
@@ -2176,7 +2179,9 @@ mlx5_port_args_check_handler(const char *key, const char *val, void *opaque)
 		}
 		config->cqe_comp = !!tmp;
 		config->cqe_comp_fmt = tmp;
-	} else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
+	} else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) {
+		config->cqe_pad = !!tmp;
+ 	} else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
 		config->hw_padding = !!tmp;
 	} else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
 		config->mprq.enabled = !!tmp;
@@ -2352,6 +2357,12 @@ mlx5_port_args_config(struct mlx5_priv *priv, struct mlx5_kvargs_ctrl *mkvlist,
 	}
 	DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.",
 		config->cqe_comp ? "" : "not ");
+	if (config->cqe_pad && !dev_cap->cqe_pad) {
+		DRV_LOG(WARNING, "Rx CQE padding isn't supported.");
+		config->cqe_pad = 0;
+	} else if (config->cqe_pad) {
+		DRV_LOG(INFO, "Rx CQE padding is enabled.");
+	}
 	if ((config->std_delay_drop || config->hp_delay_drop) &&
 	    !dev_cap->rq_delay_drop_en) {
 		config->std_delay_drop = 0;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 23a28f6..fa41b44 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -140,6 +140,7 @@ struct mlx5_dev_cap {
 	uint32_t txpp_en:1; /* Tx packet pacing is supported. */
 	uint32_t mpls_en:1; /* MPLS over GRE/UDP is supported. */
 	uint32_t cqe_comp:1; /* CQE compression is supported. */
+	uint32_t cqe_pad:1; /* CQE padding is supported. */
 	uint32_t hw_csum:1; /* Checksum offload is supported. */
 	uint32_t hw_padding:1; /* End alignment padding is supported. */
 	uint32_t dest_tir:1; /* Whether advanced DR API is available. */
@@ -264,6 +265,7 @@ struct mlx5_port_config {
 	unsigned int hw_vlan_insert:1; /* VLAN insertion in WQE is supported. */
 	unsigned int hw_padding:1; /* End alignment padding is supported. */
 	unsigned int cqe_comp:1; /* CQE compression is enabled. */
+	unsigned int cqe_pad:1; /* CQE padding is enabled. */
 	unsigned int cqe_comp_fmt:3; /* CQE compression format. */
 	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
 	unsigned int std_delay_drop:1; /* Enable standard Rxq delay drop. */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-06-21  5:19 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-24 10:10 [PATCH] net/mlx5: add 128B padding of Rx completion entry Xing Zhang
  -- strict thread matches above, loose matches on Subject: below --
2022-05-24 10:47 Xing Zhang
2022-06-21  5:19 ` Slava Ovsiienko
2022-05-24  8:41 [PATCH] net/mlx5:add " Xing Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).