From: Gavin Li <gavinl@nvidia.com>
To: <dev@dpdk.org>, <dsosnowski@nvidia.com>, <viacheslavo@nvidia.com>,
<orika@nvidia.com>, <suanmingm@nvidia.com>, <matan@nvidia.com>
Cc: <jiaweiw@nvidia.com>, <rasland@nvidia.com>
Subject: [V1 2/2] net/mlx5: use traffic class PRM field for IPv6 modification
Date: Fri, 12 Jan 2024 09:50:55 +0200 [thread overview]
Message-ID: <20240112075055.1288263-3-gavinl@nvidia.com> (raw)
In-Reply-To: <20240112075055.1288263-1-gavinl@nvidia.com>
New PRM defined new field OUT_IPV6_TRAFFIC_CLASS for IPv6 which will be
used by both IPv6 ECN and DSCP. A new cap bit
modify_out_ipv6_traffic_class is added. It can be used to check if the
new field is supported by FW.
However, IPv6 ECN and DSCP starts from different offset in the same byte.
Update SWS and HWS to used the new filed and introduce extra offset for
IPv6 DSCP data and mask to solve the issue.
Signed-off-by: Gavin Li <gavinl@nvidia.com>
Acked-by: Suanming Mou <suanmingm@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.c | 3 ++
drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
drivers/common/mlx5/mlx5_prm.h | 8 ++-
drivers/net/mlx5/linux/mlx5_os.c | 5 +-
drivers/net/mlx5/mlx5_flow.h | 3 ++
drivers/net/mlx5/mlx5_flow_dv.c | 78 ++++++++++++++++++++++++----
drivers/net/mlx5/mlx5_flow_hw.c | 7 +++
7 files changed, 92 insertions(+), 13 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 4d8818924a..3a894f894a 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1229,6 +1229,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
attr->modify_outer_ip_ecn = MLX5_GET
(flow_table_nic_cap, hcattr,
ft_header_modify_nic_receive.outer_ip_ecn);
+ attr->modify_outer_ipv6_traffic_class = MLX5_GET
+ (flow_table_nic_cap, hcattr,
+ ft_header_modify_nic_receive.outer_ipv6_traffic_class);
attr->set_reg_c = 0xffff;
if (attr->nic_flow_table) {
#define GET_RX_REG_X_BITS \
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 7f23e925a5..4a6008dc1a 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -304,6 +304,7 @@ struct mlx5_hca_attr {
uint32_t set_reg_c:16;
uint32_t nic_flow_table:1;
uint32_t modify_outer_ip_ecn:1;
+ uint32_t modify_outer_ipv6_traffic_class:1;
union {
uint32_t max_flow_counter;
struct {
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 0d46ba9c40..69404b5ed8 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -848,6 +848,7 @@ enum mlx5_modification_field {
MLX5_MODI_META_REG_C_13 = 0x94,
MLX5_MODI_META_REG_C_14 = 0x95,
MLX5_MODI_META_REG_C_15 = 0x96,
+ MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS = 0x11C,
MLX5_MODI_OUT_IPV4_TOTAL_LEN = 0x11D,
MLX5_MODI_OUT_IPV6_PAYLOAD_LEN = 0x11E,
MLX5_MODI_OUT_IPV4_IHL = 0x11F,
@@ -2202,7 +2203,9 @@ struct mlx5_ifc_ft_fields_support_bits {
u8 metadata_reg_c_x[0x8];
}; /* end of DW3 */
/* set_action_field_support_2 */
- u8 reserved_at_80[0x80];
+ u8 reserved_at_80[0x37];
+ u8 outer_ipv6_traffic_class[0x1];
+ u8 reserved_at_B8[0x48];
/* add_action_field_support */
u8 reserved_at_100[0x80];
/* add_action_field_support_2 */
@@ -2240,7 +2243,8 @@ struct mlx5_ifc_ft_fields_support_2_bits {
u8 inner_l4_checksum_ok[0x1];
u8 outer_ipv4_checksum_ok[0x1];
u8 outer_l4_checksum_ok[0x1]; /* end of DW0 */
- u8 reserved_at_20[0x18];
+ u8 reserved_at_20[0x17];
+ u8 outer_ipv6_traffic_class[0x1];
union {
struct {
u8 metadata_reg_c_15[0x1];
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 5ae31c88f4..6ea0296109 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1602,9 +1602,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
goto error;
}
rte_rwlock_init(&priv->ind_tbls_lock);
- if (sh->config.dv_flow_en == 1 &&
+ if (!priv->sh->cdev->config.hca_attr.modify_outer_ipv6_traffic_class ||
+ (sh->config.dv_flow_en == 1 &&
!priv->sh->ipv6_tc_fallback &&
- mlx5_flow_discover_ipv6_tc_support(eth_dev))
+ mlx5_flow_discover_ipv6_tc_support(eth_dev)))
priv->sh->ipv6_tc_fallback = 1;
if (priv->sh->config.dv_flow_en == 2) {
#ifdef HAVE_MLX5_HWS_SUPPORT
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 33d4a28077..fe4f46724b 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -413,6 +413,9 @@ enum mlx5_feature_name {
#define IPPROTO_MPLS 137
#endif
+#define MLX5_IPV6_HDR_ECN_MASK 0x3
+#define MLX5_IPV6_HDR_DSCP_SHIFT 2
+
/* UDP port number for MPLS */
#define MLX5_UDP_PORT_MPLS 6635
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 97f55003c3..ecf86d861d 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -253,6 +253,11 @@ struct field_modify_info modify_ipv6[] = {
{0, 0, 0},
};
+struct field_modify_info modify_ipv6_traffic_class[] = {
+ {1, 0, MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS},
+ {0, 0, 0},
+};
+
struct field_modify_info modify_udp[] = {
{2, 0, MLX5_MODI_OUT_UDP_SPORT},
{2, 2, MLX5_MODI_OUT_UDP_DPORT},
@@ -1323,6 +1328,7 @@ static int
flow_dv_convert_action_modify_ipv6_dscp
(struct mlx5_flow_dv_modify_hdr_resource *resource,
const struct rte_flow_action *action,
+ uint32_t ipv6_tc_off,
struct rte_flow_error *error)
{
const struct rte_flow_action_set_dscp *conf =
@@ -1330,6 +1336,7 @@ flow_dv_convert_action_modify_ipv6_dscp
struct rte_flow_item item = { .type = RTE_FLOW_ITEM_TYPE_IPV6 };
struct rte_flow_item_ipv6 ipv6;
struct rte_flow_item_ipv6 ipv6_mask;
+ struct field_modify_info *modify_info;
memset(&ipv6, 0, sizeof(ipv6));
memset(&ipv6_mask, 0, sizeof(ipv6_mask));
@@ -1338,12 +1345,19 @@ flow_dv_convert_action_modify_ipv6_dscp
* rdma-core only accept the DSCP bits byte aligned start from
* bit 0 to 5 as to be compatible with IPv4. No need to shift the
* bits in IPv6 case as rdma-core requires byte aligned value.
+ * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+ * bits left. Shift the mask left for IPV6 DSCP. Do it here because
+ * it's needed to distinguish DSCP from ECN in data field construct
*/
- ipv6.hdr.vtc_flow = conf->dscp;
- ipv6_mask.hdr.vtc_flow = RTE_IPV6_HDR_DSCP_MASK >> 22;
+ ipv6.hdr.vtc_flow = conf->dscp << ipv6_tc_off;
+ ipv6_mask.hdr.vtc_flow = RTE_IPV6_HDR_DSCP_MASK >> (22 - ipv6_tc_off);
item.spec = &ipv6;
item.mask = &ipv6_mask;
- return flow_dv_convert_modify_action(&item, modify_ipv6, NULL, resource,
+ if (ipv6_tc_off)
+ modify_info = modify_ipv6_traffic_class;
+ else
+ modify_info = modify_ipv6;
+ return flow_dv_convert_modify_action(&item, modify_info, NULL, resource,
MLX5_MODIFICATION_TYPE_SET, error);
}
@@ -1576,6 +1590,12 @@ mlx5_modify_flex_item(const struct rte_eth_dev *dev,
}
}
+static inline bool
+mlx5_dv_modify_ipv6_traffic_class_supported(struct mlx5_priv *priv)
+{
+ return !priv->sh->ipv6_tc_fallback;
+}
+
void
mlx5_flow_field_id_to_modify_info
(const struct rte_flow_action_modify_data *data,
@@ -1731,9 +1751,20 @@ mlx5_flow_field_id_to_modify_info
break;
case RTE_FLOW_FIELD_IPV6_DSCP:
MLX5_ASSERT(data->offset + width <= 6);
- off_be = 6 - (data->offset + width);
- info[idx] = (struct field_modify_info){1, 0,
- MLX5_MODI_OUT_IP_DSCP};
+ /*
+ * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+ * bits left. Shift the mask left for IPV6 DSCP. Do it here because
+ * it's needed to distinguish DSCP from ECN in data field construct
+ */
+ if (mlx5_dv_modify_ipv6_traffic_class_supported(priv)) {
+ off_be = 6 - (data->offset + width) + MLX5_IPV6_HDR_DSCP_SHIFT;
+ info[idx] = (struct field_modify_info){1, 0,
+ MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS};
+ } else {
+ off_be = 6 - (data->offset + width);
+ info[idx] = (struct field_modify_info){1, 0,
+ MLX5_MODI_OUT_IP_DSCP};
+ }
if (mask)
mask[idx] = flow_modify_info_mask_8(width, off_be);
else
@@ -2029,7 +2060,6 @@ mlx5_flow_field_id_to_modify_info
}
break;
case RTE_FLOW_FIELD_IPV4_ECN:
- case RTE_FLOW_FIELD_IPV6_ECN:
MLX5_ASSERT(data->offset + width <= 2);
off_be = 2 - (data->offset + width);
info[idx] = (struct field_modify_info){1, 0,
@@ -2039,6 +2069,20 @@ mlx5_flow_field_id_to_modify_info
else
info[idx].offset = off_be;
break;
+ case RTE_FLOW_FIELD_IPV6_ECN:
+ MLX5_ASSERT(data->offset + width <= 2);
+ off_be = 2 - (data->offset + width);
+ if (mlx5_dv_modify_ipv6_traffic_class_supported(priv))
+ info[idx] = (struct field_modify_info){1, 0,
+ MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS};
+ else
+ info[idx] = (struct field_modify_info){1, 0,
+ MLX5_MODI_OUT_IP_ECN};
+ if (mask)
+ mask[idx] = flow_modify_info_mask_8(width, off_be);
+ else
+ info[idx].offset = off_be;
+ break;
case RTE_FLOW_FIELD_GTP_PSC_QFI:
MLX5_ASSERT(data->offset + width <= 8);
off_be = data->offset + 8;
@@ -2161,7 +2205,7 @@ flow_dv_convert_action_modify_field
struct field_modify_info dcopy[MLX5_ACT_MAX_MOD_FIELDS] = {
{0, 0, 0} };
uint32_t mask[MLX5_ACT_MAX_MOD_FIELDS] = {0, 0, 0, 0, 0};
- uint32_t type, meta = 0;
+ uint32_t type, meta = 0, dscp = 0;
if (conf->src.field == RTE_FLOW_FIELD_POINTER ||
conf->src.field == RTE_FLOW_FIELD_VALUE) {
@@ -2181,6 +2225,17 @@ flow_dv_convert_action_modify_field
meta = rte_cpu_to_be_32(meta);
item.spec = &meta;
}
+ if (mlx5_dv_modify_ipv6_traffic_class_supported(dev->data->dev_private) &&
+ conf->dst.field == RTE_FLOW_FIELD_IPV6_DSCP &&
+ !(mask[0] & MLX5_IPV6_HDR_ECN_MASK)) {
+ dscp = *(const unaligned_uint32_t *)item.spec;
+ /*
+ * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+ * bits left. Shift the data left for IPV6 DSCP
+ */
+ dscp <<= MLX5_IPV6_HDR_DSCP_SHIFT;
+ item.spec = &dscp;
+ }
} else {
type = MLX5_MODIFICATION_TYPE_COPY;
/** For COPY fill the destination field (dcopy) without mask. */
@@ -14385,6 +14440,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
struct mlx5_flow_sub_actions_list *sample_act;
uint32_t sample_act_pos = UINT32_MAX;
uint32_t age_act_pos = UINT32_MAX;
+ uint32_t ipv6_tc_off = 0;
uint32_t num_of_dest = 0;
int tmp_actions_n = 0;
uint32_t table;
@@ -14941,8 +14997,12 @@ flow_dv_translate(struct rte_eth_dev *dev,
action_flags |= MLX5_FLOW_ACTION_SET_IPV4_DSCP;
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
+ if (mlx5_dv_modify_ipv6_traffic_class_supported(priv))
+ ipv6_tc_off = MLX5_IPV6_HDR_DSCP_SHIFT;
+ else
+ ipv6_tc_off = 0;
if (flow_dv_convert_action_modify_ipv6_dscp(mhdr_res,
- actions, error))
+ actions, ipv6_tc_off, error))
return -rte_errno;
action_flags |= MLX5_FLOW_ACTION_SET_IPV6_DSCP;
break;
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index c4a90a3690..504a250e44 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -2862,6 +2862,13 @@ flow_hw_modify_field_construct(struct mlx5_hw_q_job *job,
}
off_b = rte_bsf32(mask);
data = flow_dv_fetch_field(values + field->offset, field->size);
+ /*
+ * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+ * bits left. Shift the data left for IPV6 DSCP
+ */
+ if (field->id == MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS &&
+ !(mask & MLX5_IPV6_HDR_ECN_MASK))
+ data <<= MLX5_IPV6_HDR_DSCP_SHIFT;
data = (data & mask) >> off_b;
job->mhdr_cmd[i++].data1 = rte_cpu_to_be_32(data);
++field;
--
2.39.1
next prev parent reply other threads:[~2024-01-12 7:51 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-12 7:50 [V1 0/2] " Gavin Li
2024-01-12 7:50 ` [V1 1/2] net/mlx5: discover IPv6 traffic class support in RDMA core Gavin Li
2024-01-12 7:50 ` Gavin Li [this message]
2024-01-17 13:41 ` [V1 0/2] use traffic class PRM field for IPv6 modification Raslan Darawsheh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240112075055.1288263-3-gavinl@nvidia.com \
--to=gavinl@nvidia.com \
--cc=dev@dpdk.org \
--cc=dsosnowski@nvidia.com \
--cc=jiaweiw@nvidia.com \
--cc=matan@nvidia.com \
--cc=orika@nvidia.com \
--cc=rasland@nvidia.com \
--cc=suanmingm@nvidia.com \
--cc=viacheslavo@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).