DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 0/3] Windows performance enhancements
@ 2023-05-03 11:49 Tal Shnaiderman
  2023-05-03 11:49 ` [PATCH 1/3] net/mlx5: support multi-packet RQ on Windows Tal Shnaiderman
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Tal Shnaiderman @ 2023-05-03 11:49 UTC (permalink / raw)
  To: dev; +Cc: thomas, matan, viacheslavo, rasland, pdamouny, tamerh

The following series enables support of 3 hardware offloads on Windows which improve PMD throughput.

RX throughput improvements:
**Multi-packet RQ.
**CQE compression.

TX throughput improvement:
**Multi packet send.

Tal Shnaiderman (3):
  net/mlx5: support multi-packet RQ on Windows
  net/mlx5: support CQE compression on Windows
  net/mlx5: support enhanced multi-packet write on Windows

 doc/guides/rel_notes/release_23_07.rst      | 33 +++------------------
 drivers/common/mlx5/mlx5_devx_cmds.c        | 11 +++++++
 drivers/common/mlx5/mlx5_devx_cmds.h        |  5 ++++
 drivers/common/mlx5/windows/mlx5_win_defs.h |  8 ++++-
 drivers/net/mlx5/windows/mlx5_os.c          | 46 ++++++++++++++++++++++++++++-
 5 files changed, 72 insertions(+), 31 deletions(-)

-- 
2.16.1.windows.4


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] net/mlx5: support multi-packet RQ on Windows
  2023-05-03 11:49 [PATCH 0/3] Windows performance enhancements Tal Shnaiderman
@ 2023-05-03 11:49 ` Tal Shnaiderman
  2023-05-08  7:12   ` Pier Damouny
  2023-05-03 11:49 ` [PATCH 2/3] net/mlx5: support CQE compression " Tal Shnaiderman
  2023-05-03 11:49 ` [PATCH 3/3] net/mlx5: support enhanced multi-packet write " Tal Shnaiderman
  2 siblings, 1 reply; 7+ messages in thread
From: Tal Shnaiderman @ 2023-05-03 11:49 UTC (permalink / raw)
  To: dev; +Cc: thomas, matan, viacheslavo, rasland, pdamouny, tamerh

Multi-Packet RQ can further save PCIe bandwidth by posting a single large
buffer for multiple packets.

Instead of posting a buffer per a packet, one large buffer is posted
to receive multiple packets on the buffer.

Add support for multi-packet RQ on Windows.
The feature is disabled by default and can by enabled
by setting mprq_en=1 in the PMD specific arguments.

Signed-off-by: Tal Shnaiderman <talshn@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c        |  3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h        |  2 ++
 drivers/common/mlx5/windows/mlx5_win_defs.h |  8 +++++++-
 drivers/net/mlx5/windows/mlx5_os.c          | 26 ++++++++++++++++++++++++++
 4 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index d0907fcd49..096bd1d520 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1076,6 +1076,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 					 general_obj_types) &
 			      MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
 	attr->rq_delay_drop = MLX5_GET(cmd_hca_cap, hcattr, rq_delay_drop);
+	attr->striding_rq = MLX5_GET(cmd_hca_cap, hcattr, striding_rq);
+	attr->ext_stride_num_range =
+		MLX5_GET(cmd_hca_cap, hcattr, ext_stride_num_range);
 	attr->max_flow_counter_15_0 = MLX5_GET(cmd_hca_cap, hcattr,
 			max_flow_counter_15_0);
 	attr->max_flow_counter_31_16 = MLX5_GET(cmd_hca_cap, hcattr,
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index ce173bc36a..9e7992b1c6 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -282,6 +282,8 @@ struct mlx5_hca_attr {
 	uint32_t crypto_wrapped_import_method:1;
 	uint16_t esw_mgr_vport_id; /* E-Switch Mgr vport ID . */
 	uint16_t max_wqe_sz_sq;
+	uint32_t striding_rq:1;
+	uint32_t ext_stride_num_range:1;
 	uint32_t set_reg_c:8;
 	uint32_t nic_flow_table:1;
 	uint32_t modify_outer_ip_ecn:1;
diff --git a/drivers/common/mlx5/windows/mlx5_win_defs.h b/drivers/common/mlx5/windows/mlx5_win_defs.h
index 65da820c5e..885114655f 100644
--- a/drivers/common/mlx5/windows/mlx5_win_defs.h
+++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
@@ -270,4 +270,10 @@ enum {
 	MLX5_MATCH_INNER_HEADERS        = RTE_BIT32(2),
 };
 
-#endif /* MLX5_WIN_DEFS_H */
+#define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9
+#define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16
+#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6
+#define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13
+#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3
+#define IB_QPT_RAW_PACKET 8
+#endif /* __MLX5_WIN_DEFS_H__ */
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index f401264b61..0caa8931e4 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -187,6 +187,32 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
 	if (sh->dev_cap.tso)
 		sh->dev_cap.tso_max_payload_sz = 1 << hca_attr->max_lso_cap;
 	DRV_LOG(DEBUG, "Counters are not supported.");
+	if (hca_attr->striding_rq) {
+		sh->dev_cap.mprq.enabled = 1;
+		sh->dev_cap.mprq.log_min_stride_size =
+			MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
+		sh->dev_cap.mprq.log_max_stride_size =
+			MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
+		if (hca_attr->ext_stride_num_range)
+			sh->dev_cap.mprq.log_min_stride_num =
+				MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+		else
+			sh->dev_cap.mprq.log_min_stride_num =
+				MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+		sh->dev_cap.mprq.log_max_stride_num =
+			MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
+		DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes: %u",
+			sh->dev_cap.mprq.log_min_stride_size);
+		DRV_LOG(DEBUG, "\tmax_single_stride_log_num_of_bytes: %u",
+			sh->dev_cap.mprq.log_max_stride_size);
+		DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides: %u",
+			sh->dev_cap.mprq.log_min_stride_num);
+		DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides: %u",
+			sh->dev_cap.mprq.log_max_stride_num);
+		DRV_LOG(DEBUG, "\tmin_stride_wqe_log_size: %u",
+			sh->dev_cap.mprq.log_min_stride_wqe_size);
+		DRV_LOG(DEBUG, "Device supports Multi-Packet RQ.");
+	}
 	if (hca_attr->rss_ind_tbl_cap) {
 		/*
 		 * DPDK doesn't support larger/variable indirection tables.
-- 
2.16.1.windows.4


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/3] net/mlx5: support CQE compression on Windows
  2023-05-03 11:49 [PATCH 0/3] Windows performance enhancements Tal Shnaiderman
  2023-05-03 11:49 ` [PATCH 1/3] net/mlx5: support multi-packet RQ on Windows Tal Shnaiderman
@ 2023-05-03 11:49 ` Tal Shnaiderman
  2023-05-08  7:12   ` Pier Damouny
  2023-05-03 11:49 ` [PATCH 3/3] net/mlx5: support enhanced multi-packet write " Tal Shnaiderman
  2 siblings, 1 reply; 7+ messages in thread
From: Tal Shnaiderman @ 2023-05-03 11:49 UTC (permalink / raw)
  To: dev; +Cc: thomas, matan, viacheslavo, rasland, pdamouny, tamerh

CQE Compression reduces PCI overhead by coalescing and compressing
multiple CQEs into a single merged CQE.

Add supported for the CQE compression feature on Windows.
feature is enabled by default unless not supported by the HW
or if the rxq_cqe_comp_en PMD argument is explicitly disabled.

Signed-off-by: Tal Shnaiderman <talshn@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c |  2 ++
 drivers/common/mlx5/mlx5_devx_cmds.h |  1 +
 drivers/net/mlx5/windows/mlx5_os.c   | 12 ++++++++++++
 3 files changed, 15 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 096bd1d520..a31e4995f5 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1062,6 +1062,8 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 	attr->cqe_compression = MLX5_GET(cmd_hca_cap, hcattr, cqe_compression);
 	attr->mini_cqe_resp_flow_tag = MLX5_GET(cmd_hca_cap, hcattr,
 						mini_cqe_resp_flow_tag);
+	attr->cqe_compression_128 = MLX5_GET(cmd_hca_cap, hcattr,
+						cqe_compression_128);
 	attr->mini_cqe_resp_l3_l4_tag = MLX5_GET(cmd_hca_cap, hcattr,
 						 mini_cqe_resp_l3_l4_tag);
 	attr->enhanced_cqe_compression = MLX5_GET(cmd_hca_cap, hcattr,
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 9e7992b1c6..edcd867c4e 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -284,6 +284,7 @@ struct mlx5_hca_attr {
 	uint16_t max_wqe_sz_sq;
 	uint32_t striding_rq:1;
 	uint32_t ext_stride_num_range:1;
+	uint32_t cqe_compression_128:1;
 	uint32_t set_reg_c:8;
 	uint32_t nic_flow_table:1;
 	uint32_t modify_outer_ip_ecn:1;
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index 0caa8931e4..6527269663 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -237,6 +237,18 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
 	} else {
 		DRV_LOG(DEBUG, "Tunnel offloading is not supported.");
 	}
+	sh->dev_cap.cqe_comp = 0;
+#if (RTE_CACHE_LINE_SIZE == 128)
+	if (hca_attr->cqe_compression_128)
+		sh->dev_cap.cqe_comp = 1;
+	DRV_LOG(DEBUG, "Rx CQE 128B compression is %ssupported.",
+		sh->dev_cap.cqe_comp ? "" : "not ");
+#else
+	if (hca_attr->cqe_compression)
+		sh->dev_cap.cqe_comp = 1;
+	DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.",
+		sh->dev_cap.cqe_comp ? "" : "not ");
+#endif
 	snprintf(sh->dev_cap.fw_ver, 64, "%x.%x.%04x",
 		 MLX5_GET(initial_seg, pv_iseg, fw_rev_major),
 		 MLX5_GET(initial_seg, pv_iseg, fw_rev_minor),
-- 
2.16.1.windows.4


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/3] net/mlx5: support enhanced multi-packet write on Windows
  2023-05-03 11:49 [PATCH 0/3] Windows performance enhancements Tal Shnaiderman
  2023-05-03 11:49 ` [PATCH 1/3] net/mlx5: support multi-packet RQ on Windows Tal Shnaiderman
  2023-05-03 11:49 ` [PATCH 2/3] net/mlx5: support CQE compression " Tal Shnaiderman
@ 2023-05-03 11:49 ` Tal Shnaiderman
  2023-05-08  7:13   ` Pier Damouny
  2 siblings, 1 reply; 7+ messages in thread
From: Tal Shnaiderman @ 2023-05-03 11:49 UTC (permalink / raw)
  To: dev; +Cc: thomas, matan, viacheslavo, rasland, pdamouny, tamerh

Add support for enhanced multi-packet write on Windows.

Enhanced multi-packet write allows the Tx burst function to pack up
multiple packets in a single descriptor session to save PCI bandwidth
and improve performance.

The feature can be controlled by the txq_mpw_en PMD argument:

txq_mpw_en=1 - PMD will first attempt to use "enhanced multi packet write"
if the feature is not supported by the HW the legacy "multi packet write"
will be used.
if both are unsupported the multi packet write feature is disabled.

txq_mpw_en=0 - multi packet write is disabled.

txq_mpw_en unset(default) - enhanced multi packet write
will be activated if supported.
if unsupported the multi packet write feature is disabled.

Signed-off-by: Tal Shnaiderman <talshn@nvidia.com>
---
 doc/guides/rel_notes/release_23_07.rst | 33 ++++-----------------------------
 drivers/common/mlx5/mlx5_devx_cmds.c   |  6 ++++++
 drivers/common/mlx5/mlx5_devx_cmds.h   |  2 ++
 drivers/net/mlx5/windows/mlx5_os.c     |  8 +++++++-
 4 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/doc/guides/rel_notes/release_23_07.rst b/doc/guides/rel_notes/release_23_07.rst
index a9b1293689..d74551414d 100644
--- a/doc/guides/rel_notes/release_23_07.rst
+++ b/doc/guides/rel_notes/release_23_07.rst
@@ -24,36 +24,11 @@ DPDK Release 23.07
 New Features
 ------------
 
-.. This section should contain new features added in this release.
-   Sample format:
+* **Updated NVIDIA mlx5 driver.**
 
-   * **Add a title in the past tense with a full stop.**
-
-     Add a short 1-2 sentence description in the past tense.
-     The description should be enough to allow someone scanning
-     the release notes to understand the new feature.
-
-     If the feature adds a lot of sub-features you can use a bullet list
-     like this:
-
-     * Added feature foo to do something.
-     * Enhanced feature bar to do something else.
-
-     Refer to the previous release notes for examples.
-
-     Suggested order in release notes items:
-     * Core libs (EAL, mempool, ring, mbuf, buses)
-     * Device abstraction libs and PMDs (ordered alphabetically by vendor name)
-       - ethdev (lib, PMDs)
-       - cryptodev (lib, PMDs)
-       - eventdev (lib, PMDs)
-       - etc
-     * Other libs
-     * Apps, Examples, Tools (if significant)
-
-     This section is a comment. Do not overwrite or remove it.
-     Also, make sure to start the actual text at the margin.
-     =======================================================
+  * Added support for multi-packet RQ on Windows.
+  * Added support for CQE compression on Windows.
+  * Added support for enhanced multi-packet write on Windows.
 
 
 Removed Items
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index a31e4995f5..b2abc742cf 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1298,6 +1298,12 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 	attr->rss_ind_tbl_cap = MLX5_GET
 					(per_protocol_networking_offload_caps,
 					 hcattr, rss_ind_tbl_cap);
+	attr->multi_pkt_send_wqe = MLX5_GET
+					(per_protocol_networking_offload_caps,
+					 hcattr, multi_pkt_send_wqe);
+	attr->enhanced_multi_pkt_send_wqe = MLX5_GET
+					(per_protocol_networking_offload_caps,
+					 hcattr, enhanced_multi_pkt_send_wqe);
 	/* Query HCA attribute for ROCE. */
 	if (attr->roce) {
 		hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc,
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index edcd867c4e..c8427d2dbb 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -285,6 +285,8 @@ struct mlx5_hca_attr {
 	uint32_t striding_rq:1;
 	uint32_t ext_stride_num_range:1;
 	uint32_t cqe_compression_128:1;
+	uint32_t multi_pkt_send_wqe:1;
+	uint32_t enhanced_multi_pkt_send_wqe:1;
 	uint32_t set_reg_c:8;
 	uint32_t nic_flow_table:1;
 	uint32_t modify_outer_ip_ecn:1;
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index 6527269663..b731bdff06 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -173,7 +173,6 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
 	sh->dev_cap.max_qp = 1 << hca_attr->log_max_qp;
 	sh->dev_cap.max_qp_wr = 1 << hca_attr->log_max_qp_sz;
 	sh->dev_cap.dv_flow_en = 1;
-	sh->dev_cap.mps = MLX5_MPW_DISABLED;
 	DRV_LOG(DEBUG, "MPW isn't supported.");
 	DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is no supported.");
 	sh->dev_cap.hw_csum = hca_attr->csum_cap;
@@ -224,6 +223,13 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
 		DRV_LOG(DEBUG, "Maximum Rx indirection table size is %u",
 			sh->dev_cap.ind_table_max_size);
 	}
+	if (hca_attr->enhanced_multi_pkt_send_wqe)
+		sh->dev_cap.mps = MLX5_MPW_ENHANCED;
+	else if (hca_attr->multi_pkt_send_wqe &&
+		 sh->dev_cap.mps != MLX5_ARG_UNSET)
+		sh->dev_cap.mps = MLX5_MPW;
+	else
+		sh->dev_cap.mps = MLX5_MPW_DISABLED;
 	sh->dev_cap.swp = mlx5_get_supported_sw_parsing_offloads(hca_attr);
 	sh->dev_cap.tunnel_en = mlx5_get_supported_tunneling_offloads(hca_attr);
 	if (sh->dev_cap.tunnel_en) {
-- 
2.16.1.windows.4


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 1/3] net/mlx5: support multi-packet RQ on Windows
  2023-05-03 11:49 ` [PATCH 1/3] net/mlx5: support multi-packet RQ on Windows Tal Shnaiderman
@ 2023-05-08  7:12   ` Pier Damouny
  0 siblings, 0 replies; 7+ messages in thread
From: Pier Damouny @ 2023-05-08  7:12 UTC (permalink / raw)
  To: Tal Shnaiderman, dev
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	Matan Azrad, Slava Ovsiienko, Raslan Darawsheh, Tamer Hleihel

> Subject: [PATCH 1/3] net/mlx5: support multi-packet RQ on Windows
> 
> Multi-Packet RQ can further save PCIe bandwidth by posting a single large
> buffer for multiple packets.
> 
> Instead of posting a buffer per a packet, one large buffer is posted to receive
> multiple packets on the buffer.
> 
> Add support for multi-packet RQ on Windows.
> The feature is disabled by default and can by enabled by setting mprq_en=1
> in the PMD specific arguments.
> 
> Signed-off-by: Tal Shnaiderman <talshn@nvidia.com>
> ---
>  drivers/common/mlx5/mlx5_devx_cmds.c        |  3 +++
>  drivers/common/mlx5/mlx5_devx_cmds.h        |  2 ++
>  drivers/common/mlx5/windows/mlx5_win_defs.h |  8 +++++++-
>  drivers/net/mlx5/windows/mlx5_os.c          | 26
> ++++++++++++++++++++++++++
>  4 files changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c
> b/drivers/common/mlx5/mlx5_devx_cmds.c
> index d0907fcd49..096bd1d520 100644
> --- a/drivers/common/mlx5/mlx5_devx_cmds.c
> +++ b/drivers/common/mlx5/mlx5_devx_cmds.c
> @@ -1076,6 +1076,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
>  					 general_obj_types) &
> 
> MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
>  	attr->rq_delay_drop = MLX5_GET(cmd_hca_cap, hcattr,
> rq_delay_drop);
> +	attr->striding_rq = MLX5_GET(cmd_hca_cap, hcattr, striding_rq);
> +	attr->ext_stride_num_range =
> +		MLX5_GET(cmd_hca_cap, hcattr, ext_stride_num_range);
>  	attr->max_flow_counter_15_0 = MLX5_GET(cmd_hca_cap, hcattr,
>  			max_flow_counter_15_0);
>  	attr->max_flow_counter_31_16 = MLX5_GET(cmd_hca_cap, hcattr,
> diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h
> b/drivers/common/mlx5/mlx5_devx_cmds.h
> index ce173bc36a..9e7992b1c6 100644
> --- a/drivers/common/mlx5/mlx5_devx_cmds.h
> +++ b/drivers/common/mlx5/mlx5_devx_cmds.h
> @@ -282,6 +282,8 @@ struct mlx5_hca_attr {
>  	uint32_t crypto_wrapped_import_method:1;
>  	uint16_t esw_mgr_vport_id; /* E-Switch Mgr vport ID . */
>  	uint16_t max_wqe_sz_sq;
> +	uint32_t striding_rq:1;
> +	uint32_t ext_stride_num_range:1;
>  	uint32_t set_reg_c:8;
>  	uint32_t nic_flow_table:1;
>  	uint32_t modify_outer_ip_ecn:1;
> diff --git a/drivers/common/mlx5/windows/mlx5_win_defs.h
> b/drivers/common/mlx5/windows/mlx5_win_defs.h
> index 65da820c5e..885114655f 100644
> --- a/drivers/common/mlx5/windows/mlx5_win_defs.h
> +++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
> @@ -270,4 +270,10 @@ enum {
>  	MLX5_MATCH_INNER_HEADERS        = RTE_BIT32(2),
>  };
> 
> -#endif /* MLX5_WIN_DEFS_H */
> +#define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9 #define
> +MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16 #define
> +MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 #define
> +MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13 #define
> +MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3 #define
> IB_QPT_RAW_PACKET 8
> +#endif /* __MLX5_WIN_DEFS_H__ */
> diff --git a/drivers/net/mlx5/windows/mlx5_os.c
> b/drivers/net/mlx5/windows/mlx5_os.c
> index f401264b61..0caa8931e4 100644
> --- a/drivers/net/mlx5/windows/mlx5_os.c
> +++ b/drivers/net/mlx5/windows/mlx5_os.c
> @@ -187,6 +187,32 @@ mlx5_os_capabilities_prepare(struct
> mlx5_dev_ctx_shared *sh)
>  	if (sh->dev_cap.tso)
>  		sh->dev_cap.tso_max_payload_sz = 1 << hca_attr-
> >max_lso_cap;
>  	DRV_LOG(DEBUG, "Counters are not supported.");
> +	if (hca_attr->striding_rq) {
> +		sh->dev_cap.mprq.enabled = 1;
> +		sh->dev_cap.mprq.log_min_stride_size =
> +			MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
> +		sh->dev_cap.mprq.log_max_stride_size =
> +			MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
> +		if (hca_attr->ext_stride_num_range)
> +			sh->dev_cap.mprq.log_min_stride_num =
> +
> 	MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
> +		else
> +			sh->dev_cap.mprq.log_min_stride_num =
> +
> 	MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
> +		sh->dev_cap.mprq.log_max_stride_num =
> +			MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
> +		DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes:
> %u",
> +			sh->dev_cap.mprq.log_min_stride_size);
> +		DRV_LOG(DEBUG,
> "\tmax_single_stride_log_num_of_bytes: %u",
> +			sh->dev_cap.mprq.log_max_stride_size);
> +		DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides:
> %u",
> +			sh->dev_cap.mprq.log_min_stride_num);
> +		DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides:
> %u",
> +			sh->dev_cap.mprq.log_max_stride_num);
> +		DRV_LOG(DEBUG, "\tmin_stride_wqe_log_size: %u",
> +			sh->dev_cap.mprq.log_min_stride_wqe_size);
> +		DRV_LOG(DEBUG, "Device supports Multi-Packet RQ.");
> +	}
>  	if (hca_attr->rss_ind_tbl_cap) {
>  		/*
>  		 * DPDK doesn't support larger/variable indirection tables.
> --
> 2.16.1.windows.4

Tested-by: Pier Damouny  <pdamouny@nvidia.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 2/3] net/mlx5: support CQE compression on Windows
  2023-05-03 11:49 ` [PATCH 2/3] net/mlx5: support CQE compression " Tal Shnaiderman
@ 2023-05-08  7:12   ` Pier Damouny
  0 siblings, 0 replies; 7+ messages in thread
From: Pier Damouny @ 2023-05-08  7:12 UTC (permalink / raw)
  To: Tal Shnaiderman, dev
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	Matan Azrad, Slava Ovsiienko, Raslan Darawsheh, Tamer Hleihel

> Subject: [PATCH 2/3] net/mlx5: support CQE compression on Windows
> 
> CQE Compression reduces PCI overhead by coalescing and compressing
> multiple CQEs into a single merged CQE.
> 
> Add supported for the CQE compression feature on Windows.
> feature is enabled by default unless not supported by the HW or if the
> rxq_cqe_comp_en PMD argument is explicitly disabled.
> 
> Signed-off-by: Tal Shnaiderman <talshn@nvidia.com>
> ---
>  drivers/common/mlx5/mlx5_devx_cmds.c |  2 ++
> drivers/common/mlx5/mlx5_devx_cmds.h |  1 +
>  drivers/net/mlx5/windows/mlx5_os.c   | 12 ++++++++++++
>  3 files changed, 15 insertions(+)
> 
> diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c
> b/drivers/common/mlx5/mlx5_devx_cmds.c
> index 096bd1d520..a31e4995f5 100644
> --- a/drivers/common/mlx5/mlx5_devx_cmds.c
> +++ b/drivers/common/mlx5/mlx5_devx_cmds.c
> @@ -1062,6 +1062,8 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
>  	attr->cqe_compression = MLX5_GET(cmd_hca_cap, hcattr,
> cqe_compression);
>  	attr->mini_cqe_resp_flow_tag = MLX5_GET(cmd_hca_cap, hcattr,
>  						mini_cqe_resp_flow_tag);
> +	attr->cqe_compression_128 = MLX5_GET(cmd_hca_cap, hcattr,
> +						cqe_compression_128);
>  	attr->mini_cqe_resp_l3_l4_tag = MLX5_GET(cmd_hca_cap, hcattr,
>  						 mini_cqe_resp_l3_l4_tag);
>  	attr->enhanced_cqe_compression = MLX5_GET(cmd_hca_cap,
> hcattr, diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h
> b/drivers/common/mlx5/mlx5_devx_cmds.h
> index 9e7992b1c6..edcd867c4e 100644
> --- a/drivers/common/mlx5/mlx5_devx_cmds.h
> +++ b/drivers/common/mlx5/mlx5_devx_cmds.h
> @@ -284,6 +284,7 @@ struct mlx5_hca_attr {
>  	uint16_t max_wqe_sz_sq;
>  	uint32_t striding_rq:1;
>  	uint32_t ext_stride_num_range:1;
> +	uint32_t cqe_compression_128:1;
>  	uint32_t set_reg_c:8;
>  	uint32_t nic_flow_table:1;
>  	uint32_t modify_outer_ip_ecn:1;
> diff --git a/drivers/net/mlx5/windows/mlx5_os.c
> b/drivers/net/mlx5/windows/mlx5_os.c
> index 0caa8931e4..6527269663 100644
> --- a/drivers/net/mlx5/windows/mlx5_os.c
> +++ b/drivers/net/mlx5/windows/mlx5_os.c
> @@ -237,6 +237,18 @@ mlx5_os_capabilities_prepare(struct
> mlx5_dev_ctx_shared *sh)
>  	} else {
>  		DRV_LOG(DEBUG, "Tunnel offloading is not supported.");
>  	}
> +	sh->dev_cap.cqe_comp = 0;
> +#if (RTE_CACHE_LINE_SIZE == 128)
> +	if (hca_attr->cqe_compression_128)
> +		sh->dev_cap.cqe_comp = 1;
> +	DRV_LOG(DEBUG, "Rx CQE 128B compression is %ssupported.",
> +		sh->dev_cap.cqe_comp ? "" : "not ");
> +#else
> +	if (hca_attr->cqe_compression)
> +		sh->dev_cap.cqe_comp = 1;
> +	DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.",
> +		sh->dev_cap.cqe_comp ? "" : "not ");
> +#endif
>  	snprintf(sh->dev_cap.fw_ver, 64, "%x.%x.%04x",
>  		 MLX5_GET(initial_seg, pv_iseg, fw_rev_major),
>  		 MLX5_GET(initial_seg, pv_iseg, fw_rev_minor),
> --
> 2.16.1.windows.4

Tested-by: Pier Damouny  <pdamouny@nvidia.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 3/3] net/mlx5: support enhanced multi-packet write on Windows
  2023-05-03 11:49 ` [PATCH 3/3] net/mlx5: support enhanced multi-packet write " Tal Shnaiderman
@ 2023-05-08  7:13   ` Pier Damouny
  0 siblings, 0 replies; 7+ messages in thread
From: Pier Damouny @ 2023-05-08  7:13 UTC (permalink / raw)
  To: Tal Shnaiderman, dev
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	Matan Azrad, Slava Ovsiienko, Raslan Darawsheh, Tamer Hleihel

> Subject: [PATCH 3/3] net/mlx5: support enhanced multi-packet write on
> Windows
> 
> Add support for enhanced multi-packet write on Windows.
> 
> Enhanced multi-packet write allows the Tx burst function to pack up multiple
> packets in a single descriptor session to save PCI bandwidth and improve
> performance.
> 
> The feature can be controlled by the txq_mpw_en PMD argument:
> 
> txq_mpw_en=1 - PMD will first attempt to use "enhanced multi packet
> write"
> if the feature is not supported by the HW the legacy "multi packet write"
> will be used.
> if both are unsupported the multi packet write feature is disabled.
> 
> txq_mpw_en=0 - multi packet write is disabled.
> 
> txq_mpw_en unset(default) - enhanced multi packet write will be activated
> if supported.
> if unsupported the multi packet write feature is disabled.
> 
> Signed-off-by: Tal Shnaiderman <talshn@nvidia.com>
> ---
>  doc/guides/rel_notes/release_23_07.rst | 33 ++++-----------------------------
>  drivers/common/mlx5/mlx5_devx_cmds.c   |  6 ++++++
>  drivers/common/mlx5/mlx5_devx_cmds.h   |  2 ++
>  drivers/net/mlx5/windows/mlx5_os.c     |  8 +++++++-
>  4 files changed, 19 insertions(+), 30 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_23_07.rst
> b/doc/guides/rel_notes/release_23_07.rst
> index a9b1293689..d74551414d 100644
> --- a/doc/guides/rel_notes/release_23_07.rst
> +++ b/doc/guides/rel_notes/release_23_07.rst
> @@ -24,36 +24,11 @@ DPDK Release 23.07
>  New Features
>  ------------
> 
> -.. This section should contain new features added in this release.
> -   Sample format:
> +* **Updated NVIDIA mlx5 driver.**
> 
> -   * **Add a title in the past tense with a full stop.**
> -
> -     Add a short 1-2 sentence description in the past tense.
> -     The description should be enough to allow someone scanning
> -     the release notes to understand the new feature.
> -
> -     If the feature adds a lot of sub-features you can use a bullet list
> -     like this:
> -
> -     * Added feature foo to do something.
> -     * Enhanced feature bar to do something else.
> -
> -     Refer to the previous release notes for examples.
> -
> -     Suggested order in release notes items:
> -     * Core libs (EAL, mempool, ring, mbuf, buses)
> -     * Device abstraction libs and PMDs (ordered alphabetically by vendor
> name)
> -       - ethdev (lib, PMDs)
> -       - cryptodev (lib, PMDs)
> -       - eventdev (lib, PMDs)
> -       - etc
> -     * Other libs
> -     * Apps, Examples, Tools (if significant)
> -
> -     This section is a comment. Do not overwrite or remove it.
> -     Also, make sure to start the actual text at the margin.
> -     =======================================================
> +  * Added support for multi-packet RQ on Windows.
> +  * Added support for CQE compression on Windows.
> +  * Added support for enhanced multi-packet write on Windows.
> 
> 
>  Removed Items
> diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c
> b/drivers/common/mlx5/mlx5_devx_cmds.c
> index a31e4995f5..b2abc742cf 100644
> --- a/drivers/common/mlx5/mlx5_devx_cmds.c
> +++ b/drivers/common/mlx5/mlx5_devx_cmds.c
> @@ -1298,6 +1298,12 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
>  	attr->rss_ind_tbl_cap = MLX5_GET
> 
> 	(per_protocol_networking_offload_caps,
>  					 hcattr, rss_ind_tbl_cap);
> +	attr->multi_pkt_send_wqe = MLX5_GET
> +
> 	(per_protocol_networking_offload_caps,
> +					 hcattr, multi_pkt_send_wqe);
> +	attr->enhanced_multi_pkt_send_wqe = MLX5_GET
> +
> 	(per_protocol_networking_offload_caps,
> +					 hcattr,
> enhanced_multi_pkt_send_wqe);
>  	/* Query HCA attribute for ROCE. */
>  	if (attr->roce) {
>  		hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc, diff --git
> a/drivers/common/mlx5/mlx5_devx_cmds.h
> b/drivers/common/mlx5/mlx5_devx_cmds.h
> index edcd867c4e..c8427d2dbb 100644
> --- a/drivers/common/mlx5/mlx5_devx_cmds.h
> +++ b/drivers/common/mlx5/mlx5_devx_cmds.h
> @@ -285,6 +285,8 @@ struct mlx5_hca_attr {
>  	uint32_t striding_rq:1;
>  	uint32_t ext_stride_num_range:1;
>  	uint32_t cqe_compression_128:1;
> +	uint32_t multi_pkt_send_wqe:1;
> +	uint32_t enhanced_multi_pkt_send_wqe:1;
>  	uint32_t set_reg_c:8;
>  	uint32_t nic_flow_table:1;
>  	uint32_t modify_outer_ip_ecn:1;
> diff --git a/drivers/net/mlx5/windows/mlx5_os.c
> b/drivers/net/mlx5/windows/mlx5_os.c
> index 6527269663..b731bdff06 100644
> --- a/drivers/net/mlx5/windows/mlx5_os.c
> +++ b/drivers/net/mlx5/windows/mlx5_os.c
> @@ -173,7 +173,6 @@ mlx5_os_capabilities_prepare(struct
> mlx5_dev_ctx_shared *sh)
>  	sh->dev_cap.max_qp = 1 << hca_attr->log_max_qp;
>  	sh->dev_cap.max_qp_wr = 1 << hca_attr->log_max_qp_sz;
>  	sh->dev_cap.dv_flow_en = 1;
> -	sh->dev_cap.mps = MLX5_MPW_DISABLED;
>  	DRV_LOG(DEBUG, "MPW isn't supported.");
>  	DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is no
> supported.");
>  	sh->dev_cap.hw_csum = hca_attr->csum_cap; @@ -224,6 +223,13
> @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
>  		DRV_LOG(DEBUG, "Maximum Rx indirection table size is %u",
>  			sh->dev_cap.ind_table_max_size);
>  	}
> +	if (hca_attr->enhanced_multi_pkt_send_wqe)
> +		sh->dev_cap.mps = MLX5_MPW_ENHANCED;
> +	else if (hca_attr->multi_pkt_send_wqe &&
> +		 sh->dev_cap.mps != MLX5_ARG_UNSET)
> +		sh->dev_cap.mps = MLX5_MPW;
> +	else
> +		sh->dev_cap.mps = MLX5_MPW_DISABLED;
>  	sh->dev_cap.swp =
> mlx5_get_supported_sw_parsing_offloads(hca_attr);
>  	sh->dev_cap.tunnel_en =
> mlx5_get_supported_tunneling_offloads(hca_attr);
>  	if (sh->dev_cap.tunnel_en) {
> --
> 2.16.1.windows.4

Tested-by: Pier Damouny  <pdamouny@nvidia.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-05-08  7:13 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-03 11:49 [PATCH 0/3] Windows performance enhancements Tal Shnaiderman
2023-05-03 11:49 ` [PATCH 1/3] net/mlx5: support multi-packet RQ on Windows Tal Shnaiderman
2023-05-08  7:12   ` Pier Damouny
2023-05-03 11:49 ` [PATCH 2/3] net/mlx5: support CQE compression " Tal Shnaiderman
2023-05-08  7:12   ` Pier Damouny
2023-05-03 11:49 ` [PATCH 3/3] net/mlx5: support enhanced multi-packet write " Tal Shnaiderman
2023-05-08  7:13   ` Pier Damouny

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).