* [RFC PATCH 1/3] net/mlx5: support multi-packet RQ on Windows
2022-04-12 14:59 [RFC PATCH 0/3] Windows performance enhancements Tal Shnaiderman
@ 2022-04-12 14:59 ` Tal Shnaiderman
2022-04-12 14:59 ` [RFC PATCH 2/3] net/mlx5: support CQE compression " Tal Shnaiderman
2022-04-12 15:00 ` [RFC PATCH 3/3] net/mlx5: support enhanced multi-packet write " Tal Shnaiderman
2 siblings, 0 replies; 4+ messages in thread
From: Tal Shnaiderman @ 2022-04-12 14:59 UTC (permalink / raw)
To: dev; +Cc: thomas, matan
Multi-Packet RQ can further save PCIe bandwidth by posting a single large
buffer for multiple packets.
Instead of posting a buffer per a packet, one large buffer is posted
to receive multiple packets on the buffer.
Add support for multi-packet RQ on Windows.
The feature is disabled by default and can by enabled
by setting mprq_en=1 in the PMD specific arguments.
Signed-off-by: Tal Shnaiderman <talshn@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.c | 3 +++
drivers/common/mlx5/mlx5_devx_cmds.h | 2 ++
drivers/common/mlx5/windows/mlx5_win_defs.h | 7 +++++++
drivers/net/mlx5/windows/mlx5_os.c | 26 ++++++++++++++++++++++++++
4 files changed, 38 insertions(+)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index d02ac2a678..7900254287 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -970,6 +970,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
general_obj_types) &
MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
attr->rq_delay_drop = MLX5_GET(cmd_hca_cap, hcattr, rq_delay_drop);
+ attr->striding_rq = MLX5_GET(cmd_hca_cap, hcattr, striding_rq);
+ attr->ext_stride_num_range =
+ MLX5_GET(cmd_hca_cap, hcattr, ext_stride_num_range);
if (hca_cap_2_sup) {
hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc,
MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 1bac18c59d..2d813c0fdc 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -256,6 +256,8 @@ struct mlx5_hca_attr {
uint32_t esw_mgr_vport_id_valid:1; /* E-Switch Mgr vport ID is valid. */
uint16_t esw_mgr_vport_id; /* E-Switch Mgr vport ID . */
uint16_t max_wqe_sz_sq;
+ uint32_t striding_rq:1;
+ uint32_t ext_stride_num_range:1;
};
/* LAG Context. */
diff --git a/drivers/common/mlx5/windows/mlx5_win_defs.h b/drivers/common/mlx5/windows/mlx5_win_defs.h
index 3554e4a7ff..822104a109 100644
--- a/drivers/common/mlx5/windows/mlx5_win_defs.h
+++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
@@ -258,4 +258,11 @@ enum {
MLX5_MATCH_MISC_PARAMETERS = 1 << 1,
MLX5_MATCH_INNER_HEADERS = 1 << 2,
};
+
+#define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9
+#define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16
+#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6
+#define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13
+#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3
+#define IB_QPT_RAW_PACKET 8
#endif /* __MLX5_WIN_DEFS_H__ */
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index c7bb81549e..02438ece56 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -187,6 +187,32 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
if (sh->dev_cap.tso)
sh->dev_cap.tso_max_payload_sz = 1 << hca_attr->max_lso_cap;
DRV_LOG(DEBUG, "Counters are not supported.");
+ if (hca_attr->striding_rq) {
+ sh->dev_cap.mprq.enabled = 1;
+ sh->dev_cap.mprq.log_min_stride_size =
+ MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
+ sh->dev_cap.mprq.log_max_stride_size =
+ MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
+ if (hca_attr->ext_stride_num_range)
+ sh->dev_cap.mprq.log_min_stride_num =
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+ else
+ sh->dev_cap.mprq.log_min_stride_num =
+ MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+ sh->dev_cap.mprq.log_max_stride_num =
+ MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
+ DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes: %u",
+ sh->dev_cap.mprq.log_min_stride_size);
+ DRV_LOG(DEBUG, "\tmax_single_stride_log_num_of_bytes: %u",
+ sh->dev_cap.mprq.log_max_stride_size);
+ DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides: %u",
+ sh->dev_cap.mprq.log_min_stride_num);
+ DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides: %u",
+ sh->dev_cap.mprq.log_max_stride_num);
+ DRV_LOG(DEBUG, "\tmin_stride_wqe_log_size: %u",
+ sh->dev_cap.mprq.log_min_stride_wqe_size);
+ DRV_LOG(DEBUG, "Device supports Multi-Packet RQ.");
+ }
if (hca_attr->rss_ind_tbl_cap) {
/*
* DPDK doesn't support larger/variable indirection tables.
--
2.16.1.windows.4
^ permalink raw reply [flat|nested] 4+ messages in thread
* [RFC PATCH 2/3] net/mlx5: support CQE compression on Windows
2022-04-12 14:59 [RFC PATCH 0/3] Windows performance enhancements Tal Shnaiderman
2022-04-12 14:59 ` [RFC PATCH 1/3] net/mlx5: support multi-packet RQ on Windows Tal Shnaiderman
@ 2022-04-12 14:59 ` Tal Shnaiderman
2022-04-12 15:00 ` [RFC PATCH 3/3] net/mlx5: support enhanced multi-packet write " Tal Shnaiderman
2 siblings, 0 replies; 4+ messages in thread
From: Tal Shnaiderman @ 2022-04-12 14:59 UTC (permalink / raw)
To: dev; +Cc: thomas, matan
CQE Compression reduces PCI overhead by coalescing and compressing
multiple CQEs into a single merged CQE.
Add supported for the CQE compression feature on Windows.
feature is enabled by default unless not supported by the HW
or if the rxq_cqe_comp_en PMD argument is explicitly disabled.
Signed-off-by: Tal Shnaiderman <talshn@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.c | 2 ++
drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
drivers/net/mlx5/windows/mlx5_os.c | 8 ++++++++
3 files changed, 11 insertions(+)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 7900254287..a109341d02 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -956,6 +956,8 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
attr->cqe_compression = MLX5_GET(cmd_hca_cap, hcattr, cqe_compression);
attr->mini_cqe_resp_flow_tag = MLX5_GET(cmd_hca_cap, hcattr,
mini_cqe_resp_flow_tag);
+ attr->cqe_compression_128 = MLX5_GET(cmd_hca_cap, hcattr,
+ cqe_compression_128);
attr->mini_cqe_resp_l3_l4_tag = MLX5_GET(cmd_hca_cap, hcattr,
mini_cqe_resp_l3_l4_tag);
attr->umr_indirect_mkey_disabled =
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 2d813c0fdc..6413176f2e 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -258,6 +258,7 @@ struct mlx5_hca_attr {
uint16_t max_wqe_sz_sq;
uint32_t striding_rq:1;
uint32_t ext_stride_num_range:1;
+ uint32_t cqe_compression_128:1;
};
/* LAG Context. */
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index 02438ece56..232f22232b 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -237,6 +237,14 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
} else {
DRV_LOG(DEBUG, "Tunnel offloading is not supported.");
}
+#if (RTE_CACHE_LINE_SIZE == 128)
+ if (hca_attr->hca_attr.cqe_compression_128)
+ sh->dev_cap.cqe_comp = 1;
+ DRV_LOG(DEBUG, "Rx CQE 128B compression is %ssupported.",
+ sh->dev_cap.cqe_comp ? "" : "not ");
+#else
+ sh->dev_cap.cqe_comp = 1;
+#endif
snprintf(sh->dev_cap.fw_ver, 64, "%x.%x.%04x",
MLX5_GET(initial_seg, pv_iseg, fw_rev_major),
MLX5_GET(initial_seg, pv_iseg, fw_rev_minor),
--
2.16.1.windows.4
^ permalink raw reply [flat|nested] 4+ messages in thread
* [RFC PATCH 3/3] net/mlx5: support enhanced multi-packet write on Windows
2022-04-12 14:59 [RFC PATCH 0/3] Windows performance enhancements Tal Shnaiderman
2022-04-12 14:59 ` [RFC PATCH 1/3] net/mlx5: support multi-packet RQ on Windows Tal Shnaiderman
2022-04-12 14:59 ` [RFC PATCH 2/3] net/mlx5: support CQE compression " Tal Shnaiderman
@ 2022-04-12 15:00 ` Tal Shnaiderman
2 siblings, 0 replies; 4+ messages in thread
From: Tal Shnaiderman @ 2022-04-12 15:00 UTC (permalink / raw)
To: dev; +Cc: thomas, matan
Add support for enhanced multi-packet write on Windows.
Enhanced multi-packet write allows the Tx burst function to pack up
multiple packets in a single descriptor session to save PCI bandwidth
and improve performance.
The feature can be controlled by the txq_mpw_en PMD argument:
txq_mpw_en=1 - PMD will first attempt to use "enhanced multi packet write"
if the feature is not supported by the HW the legacy "multi packet write"
will be used.
if both are unsupported the multi packet write feature is disabled.
txq_mpw_en=0 - multi packet write is disabled.
txq_mpw_en unset(default) - enhanced multi packet write
will be activated if supported.
if unsupported the multi packet write feature is disabled.
Signed-off-by: Tal Shnaiderman <talshn@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.c | 6 ++++++
drivers/common/mlx5/mlx5_devx_cmds.h | 2 ++
drivers/net/mlx5/windows/mlx5_os.c | 14 ++++++++++++--
3 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index a109341d02..c6fc59dbbe 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1119,6 +1119,12 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
attr->rss_ind_tbl_cap = MLX5_GET
(per_protocol_networking_offload_caps,
hcattr, rss_ind_tbl_cap);
+ attr->multi_pkt_send_wqe = MLX5_GET
+ (per_protocol_networking_offload_caps,
+ hcattr, multi_pkt_send_wqe);
+ attr->enhanced_multi_pkt_send_wqe = MLX5_GET
+ (per_protocol_networking_offload_caps,
+ hcattr, enhanced_multi_pkt_send_wqe);
/* Query HCA attribute for ROCE. */
if (attr->roce) {
hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc,
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 6413176f2e..db6f1b2e71 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -259,6 +259,8 @@ struct mlx5_hca_attr {
uint32_t striding_rq:1;
uint32_t ext_stride_num_range:1;
uint32_t cqe_compression_128:1;
+ uint32_t multi_pkt_send_wqe:1;
+ uint32_t enhanced_multi_pkt_send_wqe:1;
};
/* LAG Context. */
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index 232f22232b..a99f4ea183 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -173,8 +173,6 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
sh->dev_cap.max_qp = 1 << hca_attr->log_max_qp;
sh->dev_cap.max_qp_wr = 1 << hca_attr->log_max_qp_sz;
sh->dev_cap.dv_flow_en = 1;
- sh->dev_cap.mps = MLX5_MPW_DISABLED;
- DRV_LOG(DEBUG, "MPW isn't supported.");
DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is no supported.");
sh->dev_cap.hw_csum = hca_attr->csum_cap;
DRV_LOG(DEBUG, "Checksum offloading is %ssupported.",
@@ -224,6 +222,18 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh)
DRV_LOG(DEBUG, "Maximum Rx indirection table size is %u",
sh->dev_cap.ind_table_max_size);
}
+ if (hca_attr->enhanced_multi_pkt_send_wqe) {
+ sh->dev_cap.mps = MLX5_MPW_ENHANCED;
+ DRV_LOG(DEBUG, "Enhanced MPW is supported.");
+ }
+ else if (hca_attr->multi_pkt_send_wqe &&
+ sh->dev_cap.mps != MLX5_ARG_UNSET) {
+ sh->dev_cap.mps = MLX5_MPW;
+ DRV_LOG(DEBUG, "MPW is supported.");
+ } else {
+ sh->dev_cap.mps = MLX5_MPW_DISABLED;
+ DRV_LOG(DEBUG, "MPW isn't supported.");
+ }
sh->dev_cap.swp = mlx5_get_supported_sw_parsing_offloads(hca_attr);
sh->dev_cap.tunnel_en = mlx5_get_supported_tunneling_offloads(hca_attr);
if (sh->dev_cap.tunnel_en) {
--
2.16.1.windows.4
^ permalink raw reply [flat|nested] 4+ messages in thread