* [PATCH 0/3] add new PHY affinity support in MLX5 PMD
@ 2023-02-03 5:21 Jiawei Wang
2023-02-03 5:21 ` [PATCH 1/3] drivers: add lag Rx port affinity in PRM Jiawei Wang
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Jiawei Wang @ 2023-02-03 5:21 UTC (permalink / raw)
To: viacheslavo, orika; +Cc: dev, rasland
This patch series implement PHY affinity item support in MLX5 PMD and
the support for TIS creation with tx_phy_affinity value of
Tx queue.
This patch series relies on the preceding ethdev API:
http://patches.dpdk.org/project/dpdk/patch/20230203050717.46914-2-jiaweiw@nvidia.com/
Jiawei Wang (3):
drivers: add lag Rx port affinity in PRM
net/mlx5: add PHY affinity item support
drivers: enhance the Tx queue affinity
doc/guides/nics/features/default.ini | 1 +
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 8 ++-
drivers/common/mlx5/mlx5_devx_cmds.c | 3 +
drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
drivers/common/mlx5/mlx5_prm.h | 15 ++--
drivers/net/mlx5/linux/mlx5_os.c | 6 ++
drivers/net/mlx5/mlx5.c | 43 +++++-------
drivers/net/mlx5/mlx5.h | 3 +
drivers/net/mlx5/mlx5_devx.c | 24 ++++---
drivers/net/mlx5/mlx5_ethdev.c | 1 +
drivers/net/mlx5/mlx5_flow.h | 3 +
drivers/net/mlx5/mlx5_flow_dv.c | 100 ++++++++++++++++++++++++++-
drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++
drivers/net/mlx5/mlx5_tx.h | 1 +
drivers/net/mlx5/mlx5_txq.c | 8 +++
16 files changed, 184 insertions(+), 48 deletions(-)
--
2.18.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/3] drivers: add lag Rx port affinity in PRM
2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang
@ 2023-02-03 5:21 ` Jiawei Wang
2023-02-03 5:21 ` [PATCH 2/3] net/mlx5: add PHY affinity item support Jiawei Wang
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Jiawei Wang @ 2023-02-03 5:21 UTC (permalink / raw)
To: viacheslavo, orika, Matan Azrad; +Cc: dev, rasland
This patch adds function to query hca capability via Devx for
lag_rx_port_affinity.
Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.c | 3 +++
drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
drivers/common/mlx5/mlx5_prm.h | 7 +++++--
drivers/net/mlx5/linux/mlx5_os.c | 4 ++++
drivers/net/mlx5/mlx5.h | 2 ++
5 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index e3a4927d0f..a157ab4b6c 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1141,6 +1141,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
attr->outer_ipv4_ihl = MLX5_GET
(flow_table_nic_cap, hcattr,
ft_field_support_2_nic_receive.outer_ipv4_ihl);
+ attr->lag_rx_port_affinity = MLX5_GET
+ (flow_table_nic_cap, hcattr,
+ ft_field_support_2_nic_receive.lag_rx_port_affinity);
/* Query HCA offloads for Ethernet protocol. */
hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc,
MLX5_GET_HCA_CAP_OP_MOD_ETHERNET_OFFLOAD_CAPS |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index c94b9eac06..9cf045ccaa 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -288,6 +288,7 @@ struct mlx5_hca_attr {
uint32_t alloc_flow_counter_pd:1;
uint32_t flow_counter_access_aso:1;
uint32_t flow_access_aso_opc_mod:8;
+ uint32_t lag_rx_port_affinity:1;
};
/* LAG Context. */
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 9294f65e24..8bbb800206 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -905,7 +905,8 @@ struct mlx5_ifc_fte_match_set_misc_bits {
u8 vxlan_vni[0x18];
u8 reserved_at_b8[0x8];
u8 geneve_vni[0x18];
- u8 reserved_at_e4[0x6];
+ u8 lag_rx_port_affinity[0x4];
+ u8 reserved_at_e8[0x2];
u8 geneve_tlv_option_0_exist[0x1];
u8 geneve_oam[0x1];
u8 reserved_at_e0[0xc];
@@ -2044,7 +2045,9 @@ struct mlx5_ifc_ft_fields_support_bits {
* Table 1872 - Flow Table Fields Supported 2 Format
*/
struct mlx5_ifc_ft_fields_support_2_bits {
- u8 reserved_at_0[0xd];
+ u8 reserved_at_0[0xa];
+ u8 lag_rx_port_affinity[0x1];
+ u8 reserved_at_c[0x2];
u8 hash_result[0x1];
u8 reserved_at_e[0x1];
u8 tunnel_header_2_3[0x1];
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index a71474c90a..60462da39d 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1390,6 +1390,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
DRV_LOG(DEBUG, "DV flow is not supported!");
}
#endif
+ if (hca_attr->lag_rx_port_affinity) {
+ sh->lag_rx_port_affinity_en = 1;
+ DRV_LOG(DEBUG, "LAG Rx Port Affinity enabled");
+ }
}
/* Process parameters and store port configuration on priv structure. */
err = mlx5_port_args_config(priv, mkvlist, &priv->config);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 16b33e1548..bbd7262a51 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1374,6 +1374,8 @@ struct mlx5_dev_ctx_shared {
uint32_t hws_tags:1; /* Check if tags info for HWS initialized. */
uint32_t shared_mark_enabled:1;
/* If mark action is enabled on Rxqs (shared E-Switch domain). */
+ uint32_t lag_rx_port_affinity_en:1;
+ /* lag_rx_port_affinity is supported. */
uint32_t hws_max_log_bulk_sz:5;
/* Log of minimal HWS counters created hard coded. */
uint32_t hws_max_nb_counters; /* Maximal number for HWS counters. */
--
2.18.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/3] net/mlx5: add PHY affinity item support
2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang
2023-02-03 5:21 ` [PATCH 1/3] drivers: add lag Rx port affinity in PRM Jiawei Wang
@ 2023-02-03 5:21 ` Jiawei Wang
2023-02-03 5:21 ` [PATCH 3/3] drivers: enhance the Tx queue affinity Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang
3 siblings, 0 replies; 9+ messages in thread
From: Jiawei Wang @ 2023-02-03 5:21 UTC (permalink / raw)
To: viacheslavo, orika, Ferruh Yigit, Matan Azrad; +Cc: dev, rasland
This patch adds the new phy affinity item support in PMD:
RTE_FLOW_ITEM_TYPE_PHY_AFFINITY.
This patch adds the validation function for the new item,
it works for NIC-RX and FDB rule on ROOT-table only.
Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com>
---
doc/guides/nics/features/default.ini | 1 +
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 8 ++-
drivers/net/mlx5/linux/mlx5_os.c | 2 +
drivers/net/mlx5/mlx5.h | 1 +
drivers/net/mlx5/mlx5_flow.h | 3 +
drivers/net/mlx5/mlx5_flow_dv.c | 100 ++++++++++++++++++++++++++-
drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++
8 files changed, 127 insertions(+), 3 deletions(-)
diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini
index 510cc6679d..fd5edd11c8 100644
--- a/doc/guides/nics/features/default.ini
+++ b/doc/guides/nics/features/default.ini
@@ -141,6 +141,7 @@ udp =
vlan =
vxlan =
vxlan_gpe =
+phy_affinity =
[rte_flow actions]
age =
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index 62fd330e2b..9142f04f93 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -87,6 +87,7 @@ vlan = Y
vxlan = Y
vxlan_gpe = Y
represented_port = Y
+phy_affinity = Y
[rte_flow actions]
age = I
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f137f156f9..5569b045d7 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -106,6 +106,7 @@ Features
- Sub-Function representors.
- Sub-Function.
- Matching on represented port.
+- Matching on phy affinity.
Limitations
@@ -595,13 +596,18 @@ Limitations
- key
- sequence
- Matching on checksum and sequence needs MLNX_OFED 5.6+.
+- Matching on checksum and sequence needs MLNX_OFED 5.6+.
- The NIC egress flow rules on representor port are not supported.
- When using DV/verbs flow engine (``dv_flow_en`` = 1/0 respectively), Match on SPI field
in ESP header for group 0 needs MLNX_OFED 5.6+.
+- Match on phy affinity:
+
+ - Supports NIC ingress flow in group 0.
+ - Supports E-Switch flow in group 0 and depends on
+ device-managed flow steering (DMFS) mode.
Statistics
----------
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 60462da39d..1c26b30702 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1394,6 +1394,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
sh->lag_rx_port_affinity_en = 1;
DRV_LOG(DEBUG, "LAG Rx Port Affinity enabled");
}
+ priv->num_lag_ports = hca_attr->num_lag_ports;
+ DRV_LOG(DEBUG, "The number of lag ports is %d", priv->num_lag_ports);
}
/* Process parameters and store port configuration on priv structure. */
err = mlx5_port_args_config(priv, mkvlist, &priv->config);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index bbd7262a51..c7a7b176b8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1668,6 +1668,7 @@ struct mlx5_priv {
unsigned int mtr_reg_share:1; /* Whether support meter REG_C share. */
unsigned int lb_used:1; /* Loopback queue is referred to. */
uint32_t mark_enabled:1; /* If mark action is enabled on rxqs. */
+ uint32_t num_lag_ports:4; /* Number of ports can be bonded. */
uint16_t domain_id; /* Switch domain identifier. */
uint16_t vport_id; /* Associated VF vport index (if any). */
uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index e376dcae93..64b1b7c37c 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -219,6 +219,9 @@ enum mlx5_feature_name {
/* Meter color item */
#define MLX5_FLOW_ITEM_METER_COLOR (UINT64_C(1) << 44)
+/* PHY affinity item */
+#define MLX5_FLOW_ITEM_PHY_AFFINITY (UINT64_C(1) << 49)
+
/* Outer Masks. */
#define MLX5_FLOW_LAYER_OUTER_L3 \
(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 7ca909999b..994f184aaf 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -3764,6 +3764,75 @@ flow_dv_validate_item_meter_color(struct rte_eth_dev *dev,
return 0;
}
+/**
+ * Validate Phy affinity item.
+ *
+ * @param[in] dev
+ * Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ * Item specification.
+ * @param[in] attr
+ * Attributes of flow that includes this item.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_phy_affinity(struct rte_eth_dev *dev,
+ const struct rte_flow_item *item,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_item_phy_affinity *spec = item->spec;
+ const struct rte_flow_item_phy_affinity *mask = item->mask;
+ struct rte_flow_item_phy_affinity nic_mask = {
+ .affinity = UINT8_MAX
+ };
+ int ret;
+
+ if (!priv->sh->lag_rx_port_affinity_en)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+ "Unsupported phy affinity with Older FW");
+ if ((attr->transfer && priv->fdb_def_rule) ||
+ attr->egress || attr->group)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "phy affinity is not supported with egress or FDB on non root table");
+ if (!spec)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "data cannot be empty");
+ if (spec->affinity == 0)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "zero affinity number not supported");
+ if (spec->affinity > priv->num_lag_ports)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "exceed max affinity number in lag ports");
+ if (!mask)
+ mask = &rte_flow_item_phy_affinity_mask;
+ if (!mask->affinity)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC, NULL,
+ "mask cannot be zero");
+ ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+ (const uint8_t *)&nic_mask,
+ sizeof(struct rte_flow_item_phy_affinity),
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
int
flow_dv_encap_decap_match_cb(void *tool_ctx __rte_unused,
struct mlx5_list_entry *entry, void *cb_ctx)
@@ -7443,6 +7512,13 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
return ret;
last_item = MLX5_FLOW_ITEM_METER_COLOR;
break;
+ case RTE_FLOW_ITEM_TYPE_PHY_AFFINITY:
+ ret = flow_dv_validate_item_phy_affinity(dev, items,
+ attr, error);
+ if (ret < 0)
+ return ret;
+ last_item = MLX5_FLOW_ITEM_PHY_AFFINITY;
+ break;
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM,
@@ -9981,7 +10057,7 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key,
const struct rte_flow_item_tag *tag_vv = item->spec;
const struct rte_flow_item_tag *tag_v;
const struct rte_flow_item_tag *tag_m;
- enum modify_reg reg;
+ int reg;
uint32_t index;
if (MLX5_ITEM_VALID(item, key_type))
@@ -9996,7 +10072,7 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key,
else
reg = flow_hw_get_reg_id(RTE_FLOW_ITEM_TYPE_TAG, index);
MLX5_ASSERT(reg > 0);
- flow_dv_match_meta_reg(key, reg, tag_v->data, tag_m->data);
+ flow_dv_match_meta_reg(key, (enum modify_reg)reg, tag_v->data, tag_m->data);
}
/**
@@ -10639,6 +10715,22 @@ flow_dv_translate_item_meter_color(struct rte_eth_dev *dev, void *key,
flow_dv_match_meta_reg(key, (enum modify_reg)reg, value, mask);
}
+static void
+flow_dv_translate_item_phy_affinity(void *key,
+ const struct rte_flow_item *item,
+ uint32_t key_type)
+{
+ const struct rte_flow_item_phy_affinity *affinity_v;
+ const struct rte_flow_item_phy_affinity *affinity_m;
+ void *misc_v;
+
+ MLX5_ITEM_UPDATE(item, key_type, affinity_v, affinity_m,
+ &rte_flow_item_phy_affinity_mask);
+ misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc_v, lag_rx_port_affinity,
+ affinity_v->affinity & affinity_m->affinity);
+}
+
static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
#define HEADER_IS_ZERO(match_criteria, headers) \
@@ -13430,6 +13522,10 @@ flow_dv_translate_items(struct rte_eth_dev *dev,
last_item = flow_dv_translate_item_integrity(items,
wks, key_type);
break;
+ case RTE_FLOW_ITEM_TYPE_PHY_AFFINITY:
+ flow_dv_translate_item_phy_affinity(key, items, key_type);
+ last_item = MLX5_FLOW_ITEM_PHY_AFFINITY;
+ break;
default:
break;
}
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 20c71ff7f0..e5ca86ca4b 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -4715,6 +4715,20 @@ flow_hw_pattern_validate(struct rte_eth_dev *dev,
"Unsupported meter color register");
break;
}
+ case RTE_FLOW_ITEM_TYPE_PHY_AFFINITY:
+ {
+ if (!priv->sh->lag_rx_port_affinity_en)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+ "Unsupported phy affinity with Older FW");
+ if ((attr->transfer && priv->fdb_def_rule) || attr->egress)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+ "Phy affinity item not supported"
+ " with egress or transfer"
+ " attribute");
+ break;
+ }
case RTE_FLOW_ITEM_TYPE_VOID:
case RTE_FLOW_ITEM_TYPE_ETH:
case RTE_FLOW_ITEM_TYPE_VLAN:
--
2.18.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/3] drivers: enhance the Tx queue affinity
2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang
2023-02-03 5:21 ` [PATCH 1/3] drivers: add lag Rx port affinity in PRM Jiawei Wang
2023-02-03 5:21 ` [PATCH 2/3] net/mlx5: add PHY affinity item support Jiawei Wang
@ 2023-02-03 5:21 ` Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang
3 siblings, 0 replies; 9+ messages in thread
From: Jiawei Wang @ 2023-02-03 5:21 UTC (permalink / raw)
To: viacheslavo, orika, Matan Azrad; +Cc: dev, rasland
Previous patch supports the tx phy affinity configuration in the Tx
queue API, it supports to set the affinity value per Queue.
This patch updates TIS creation with tx_phy_affinity value of
Tx queue, TIS index 1 goes to hardware port 0, TIS index 2 goes to
hardware port 1, and TIS index 0 is reserved for default HWS hash mode.
Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com>
---
drivers/common/mlx5/mlx5_prm.h | 8 -------
drivers/net/mlx5/mlx5.c | 43 +++++++++++++++-------------------
drivers/net/mlx5/mlx5_devx.c | 24 ++++++++++---------
drivers/net/mlx5/mlx5_ethdev.c | 1 +
drivers/net/mlx5/mlx5_tx.h | 1 +
drivers/net/mlx5/mlx5_txq.c | 8 +++++++
6 files changed, 42 insertions(+), 43 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 8bbb800206..ded001d0b2 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -2331,14 +2331,6 @@ struct mlx5_ifc_query_nic_vport_context_in_bits {
u8 reserved_at_68[0x18];
};
-/*
- * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa.
- * Each TIS binds to one PF by setting lag_tx_port_affinity (>0).
- * Once LAG enabled, we create multiple TISs and bind each one to
- * different PFs, then TIS[i] gets affinity i+1 and goes to PF i+1.
- */
-#define MLX5_IFC_LAG_MAP_TIS_AFFINITY(index, num) ((num) ? \
- (index) % (num) + 1 : 0)
struct mlx5_ifc_tisc_bits {
u8 strict_lag_tx_port_affinity[0x1];
u8 reserved_at_1[0x3];
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b8643cebdd..c75c98b8b0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1162,9 +1162,9 @@ mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev)
static int
mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
{
- int i;
struct mlx5_devx_lag_context lag_ctx = { 0 };
struct mlx5_devx_tis_attr tis_attr = { 0 };
+ int i;
tis_attr.transport_domain = sh->td->id;
if (sh->bond.n_port) {
@@ -1178,35 +1178,30 @@ mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
DRV_LOG(ERR, "Failed to query lag affinity.");
return -1;
}
- if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
- for (i = 0; i < sh->bond.n_port; i++) {
- tis_attr.lag_tx_port_affinity =
- MLX5_IFC_LAG_MAP_TIS_AFFINITY(i,
- sh->bond.n_port);
- sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx,
- &tis_attr);
- if (!sh->tis[i]) {
- DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device"
- " %s.", i, sh->bond.n_port,
- sh->ibdev_name);
- return -1;
- }
- }
+ if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS)
DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n",
sh->bond.n_port, lag_ctx.tx_remap_affinity_1,
lag_ctx.tx_remap_affinity_2);
- return 0;
- }
- if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
+ else if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
DRV_LOG(INFO, "Device %s enabled HW hash based LAG.",
sh->ibdev_name);
}
- tis_attr.lag_tx_port_affinity = 0;
- sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
- if (!sh->tis[0]) {
- DRV_LOG(ERR, "Failed to TIS 0 for bonding device"
- " %s.", sh->ibdev_name);
- return -1;
+ for (i = 0; i <= sh->bond.n_port; i++) {
+ /*
+ * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa.
+ * Each TIS binds to one PF by setting lag_tx_port_affinity (> 0).
+ * Once LAG enabled, we create multiple TISs and bind each one to
+ * different PFs, then TIS[i+1] gets affinity i+1 and goes to PF i+1.
+ * TIS[0] is reserved for HW Hash mode.
+ */
+ tis_attr.lag_tx_port_affinity = i;
+ sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
+ if (!sh->tis[i]) {
+ DRV_LOG(ERR, "Failed to create TIS %d/%d for [bonding] device"
+ " %s.", i, sh->bond.n_port,
+ sh->ibdev_name);
+ return -1;
+ }
}
return 0;
}
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index f6e1943fd7..a3fe0b3b98 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1190,17 +1190,19 @@ static uint32_t
mlx5_get_txq_tis_num(struct rte_eth_dev *dev, uint16_t queue_idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
- int tis_idx;
-
- if (priv->sh->bond.n_port && priv->sh->lag.affinity_mode ==
- MLX5_LAG_MODE_TIS) {
- tis_idx = (priv->lag_affinity_idx + queue_idx) %
- priv->sh->bond.n_port;
- DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.",
- dev->data->port_id, queue_idx, tis_idx + 1,
- priv->sh->lag.tx_remap_affinity[tis_idx]);
- } else {
- tis_idx = 0;
+ struct mlx5_txq_data *txq_data = (*priv->txqs)[queue_idx];
+ int tis_idx = 0;
+
+ if (priv->sh->bond.n_port) {
+ if (txq_data->tx_phy_affinity) {
+ tis_idx = txq_data->tx_phy_affinity;
+ } else if (priv->sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
+ tis_idx = (priv->lag_affinity_idx + queue_idx) %
+ priv->sh->bond.n_port + 1;
+ DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.",
+ dev->data->port_id, queue_idx, tis_idx,
+ priv->sh->lag.tx_remap_affinity[tis_idx - 1]);
+ }
}
MLX5_ASSERT(priv->sh->tis[tis_idx]);
return priv->sh->tis[tis_idx]->id;
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 4a85415ff3..354bb4f022 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -352,6 +352,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
info->switch_info.domain_id = priv->domain_id;
info->switch_info.port_id = priv->representor_id;
info->switch_info.rx_domain = 0; /* No sub Rx domains. */
+ info->nb_phy_ports = priv->sh->bond.n_port;
if (priv->representor) {
uint16_t port_id;
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index a056be7ca8..674c2aebe5 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -144,6 +144,7 @@ struct mlx5_txq_data {
uint16_t inlen_send; /* Ordinary send data inline size. */
uint16_t inlen_empw; /* eMPW max packet size to inline. */
uint16_t inlen_mode; /* Minimal data length to inline. */
+ uint8_t tx_phy_affinity; /* TxQ affinity configuration. */
uint32_t qp_num_8s; /* QP number shifted by 8. */
uint64_t offloads; /* Offloads for Tx Queue. */
struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5543f2c570..4e53706807 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -392,6 +392,13 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
container_of(txq, struct mlx5_txq_ctrl, txq);
int res;
+ if (conf->tx_phy_affinity > priv->num_lag_ports) {
+ rte_errno = EINVAL;
+ DRV_LOG(ERR, "port %u unable to setup Tx queue index %u"
+ " affinity is %u exceeds the maximum %u", dev->data->port_id,
+ idx, conf->tx_phy_affinity, priv->num_lag_ports);
+ return -rte_errno;
+ }
res = mlx5_tx_queue_pre_setup(dev, idx, &desc);
if (res)
return res;
@@ -1095,6 +1102,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
tmpl->txq.elts_m = desc - 1;
tmpl->txq.port_id = dev->data->port_id;
tmpl->txq.idx = idx;
+ tmpl->txq.tx_phy_affinity = conf->tx_phy_affinity;
txq_set_params(tmpl);
if (txq_adjust_params(tmpl))
goto error;
--
2.18.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD
2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang
` (2 preceding siblings ...)
2023-02-03 5:21 ` [PATCH 3/3] drivers: enhance the Tx queue affinity Jiawei Wang
@ 2023-02-22 12:26 ` Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM Jiawei Wang
` (3 more replies)
3 siblings, 4 replies; 9+ messages in thread
From: Jiawei Wang @ 2023-02-22 12:26 UTC (permalink / raw)
To: viacheslavo, orika; +Cc: dev, rasland
This patch implements RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY
support in MLX5 PMD.
This patch adds the MLX5 PMD support for two device ops:
- map_aggr_tx_affinity
- count_aggr_ports
This patch series relies on the preceding RTE API:
http://patches.dpdk.org/project/dpdk/list/?series=27064
v2:
* update the PMD code based on the new RTE API.
Jiawei Wang (3):
net/mlx5: add lag Rx port affinity in PRM
net/mlx5: add aggregated affinity item support
net/mlx5: enhance the Tx queue affinity
doc/guides/nics/features/default.ini | 1 +
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 6 ++
drivers/common/mlx5/mlx5_devx_cmds.c | 3 +
drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
drivers/common/mlx5/mlx5_prm.h | 15 ++--
drivers/net/mlx5/linux/mlx5_os.c | 6 ++
drivers/net/mlx5/mlx5.c | 49 ++++++-------
drivers/net/mlx5/mlx5.h | 3 +
drivers/net/mlx5/mlx5_devx.c | 24 ++++---
drivers/net/mlx5/mlx5_flow.h | 3 +
drivers/net/mlx5/mlx5_flow_dv.c | 100 ++++++++++++++++++++++++++-
drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++
drivers/net/mlx5/mlx5_tx.h | 4 ++
drivers/net/mlx5/mlx5_txq.c | 38 ++++++++++
15 files changed, 221 insertions(+), 47 deletions(-)
--
2.18.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM
2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang
@ 2023-02-22 12:26 ` Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 2/3] net/mlx5: add aggregated affinity item support Jiawei Wang
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Jiawei Wang @ 2023-02-22 12:26 UTC (permalink / raw)
To: viacheslavo, orika, Matan Azrad; +Cc: dev, rasland
This patch adds function to query hca capability via Devx for
lag_rx_port_affinity.
Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.c | 3 +++
drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
drivers/common/mlx5/mlx5_prm.h | 7 +++++--
drivers/net/mlx5/linux/mlx5_os.c | 4 ++++
drivers/net/mlx5/mlx5.h | 2 ++
5 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index fb670950ef..bfc6e09eac 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1163,6 +1163,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
attr->outer_ipv4_ihl = MLX5_GET
(flow_table_nic_cap, hcattr,
ft_field_support_2_nic_receive.outer_ipv4_ihl);
+ attr->lag_rx_port_affinity = MLX5_GET
+ (flow_table_nic_cap, hcattr,
+ ft_field_support_2_nic_receive.lag_rx_port_affinity);
/* Query HCA offloads for Ethernet protocol. */
hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc,
MLX5_GET_HCA_CAP_OP_MOD_ETHERNET_OFFLOAD_CAPS |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 6b5a6b8667..8e68eeaf37 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -293,6 +293,7 @@ struct mlx5_hca_attr {
uint32_t flow_counter_access_aso:1;
uint32_t flow_access_aso_opc_mod:8;
uint32_t cross_vhca:1;
+ uint32_t lag_rx_port_affinity:1;
};
/* LAG Context. */
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 613cc6face..26a1f0717d 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -906,7 +906,8 @@ struct mlx5_ifc_fte_match_set_misc_bits {
u8 vxlan_vni[0x18];
u8 reserved_at_b8[0x8];
u8 geneve_vni[0x18];
- u8 reserved_at_e4[0x6];
+ u8 lag_rx_port_affinity[0x4];
+ u8 reserved_at_e8[0x2];
u8 geneve_tlv_option_0_exist[0x1];
u8 geneve_oam[0x1];
u8 reserved_at_e0[0xc];
@@ -2069,7 +2070,9 @@ struct mlx5_ifc_ft_fields_support_bits {
* Table 1872 - Flow Table Fields Supported 2 Format
*/
struct mlx5_ifc_ft_fields_support_2_bits {
- u8 reserved_at_0[0xd];
+ u8 reserved_at_0[0xa];
+ u8 lag_rx_port_affinity[0x1];
+ u8 reserved_at_c[0x2];
u8 hash_result[0x1];
u8 reserved_at_e[0x1];
u8 tunnel_header_2_3[0x1];
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index a71474c90a..60462da39d 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1390,6 +1390,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
DRV_LOG(DEBUG, "DV flow is not supported!");
}
#endif
+ if (hca_attr->lag_rx_port_affinity) {
+ sh->lag_rx_port_affinity_en = 1;
+ DRV_LOG(DEBUG, "LAG Rx Port Affinity enabled");
+ }
}
/* Process parameters and store port configuration on priv structure. */
err = mlx5_port_args_config(priv, mkvlist, &priv->config);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a766fb408e..32797008c1 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1386,6 +1386,8 @@ struct mlx5_dev_ctx_shared {
uint32_t hws_tags:1; /* Check if tags info for HWS initialized. */
uint32_t shared_mark_enabled:1;
/* If mark action is enabled on Rxqs (shared E-Switch domain). */
+ uint32_t lag_rx_port_affinity_en:1;
+ /* lag_rx_port_affinity is supported. */
uint32_t hws_max_log_bulk_sz:5;
/* Log of minimal HWS counters created hard coded. */
uint32_t hws_max_nb_counters; /* Maximal number for HWS counters. */
--
2.18.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 2/3] net/mlx5: add aggregated affinity item support
2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM Jiawei Wang
@ 2023-02-22 12:26 ` Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 3/3] net/mlx5: enhance the Tx queue affinity Jiawei Wang
2023-02-26 14:35 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Raslan Darawsheh
3 siblings, 0 replies; 9+ messages in thread
From: Jiawei Wang @ 2023-02-22 12:26 UTC (permalink / raw)
To: viacheslavo, orika, Ferruh Yigit, Matan Azrad; +Cc: dev, rasland
This patch adds the new aggregated affinity item support in PMD:
RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY.
This patch adds the validation function for the new item,
it works for NIC-RX and FDB rule on ROOT-table only.
Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
doc/guides/nics/features/default.ini | 1 +
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 6 ++
drivers/net/mlx5/linux/mlx5_os.c | 2 +
drivers/net/mlx5/mlx5.h | 1 +
drivers/net/mlx5/mlx5_flow.h | 3 +
drivers/net/mlx5/mlx5_flow_dv.c | 100 ++++++++++++++++++++++++++-
drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++
8 files changed, 126 insertions(+), 2 deletions(-)
diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini
index e249f62f31..0834bdc300 100644
--- a/doc/guides/nics/features/default.ini
+++ b/doc/guides/nics/features/default.ini
@@ -145,6 +145,7 @@ udp =
vlan =
vxlan =
vxlan_gpe =
+aggr_affinity =
[rte_flow actions]
age =
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index c58e1934e9..7276e5bd1a 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -90,6 +90,7 @@ vlan = Y
vxlan = Y
vxlan_gpe = Y
represented_port = Y
+aggr_affinity = Y
[rte_flow actions]
age = I
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 6510e74fb9..dbf0c7a4bc 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -107,6 +107,7 @@ Features
- Sub-Function representors.
- Sub-Function.
- Matching on represented port.
+- Matching on aggregated affinity.
Limitations
@@ -615,6 +616,11 @@ Limitations
- The NIC egress flow rules on representor port are not supported.
+- Match on aggregated affinity:
+
+ - Supports NIC ingress flow in group 0.
+ - Supports E-Switch flow in group 0 and depends on
+ device-managed flow steering (DMFS) mode.
Statistics
----------
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 60462da39d..1c26b30702 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1394,6 +1394,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
sh->lag_rx_port_affinity_en = 1;
DRV_LOG(DEBUG, "LAG Rx Port Affinity enabled");
}
+ priv->num_lag_ports = hca_attr->num_lag_ports;
+ DRV_LOG(DEBUG, "The number of lag ports is %d", priv->num_lag_ports);
}
/* Process parameters and store port configuration on priv structure. */
err = mlx5_port_args_config(priv, mkvlist, &priv->config);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 32797008c1..581179cecd 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1681,6 +1681,7 @@ struct mlx5_priv {
unsigned int mtr_reg_share:1; /* Whether support meter REG_C share. */
unsigned int lb_used:1; /* Loopback queue is referred to. */
uint32_t mark_enabled:1; /* If mark action is enabled on rxqs. */
+ uint32_t num_lag_ports:4; /* Number of ports can be bonded. */
uint16_t domain_id; /* Switch domain identifier. */
uint16_t vport_id; /* Associated VF vport index (if any). */
uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 4bef2296b8..3a72f4eb4d 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -223,6 +223,9 @@ enum mlx5_feature_name {
#define MLX5_FLOW_ITEM_OUTER_IPV6_ROUTING_EXT (UINT64_C(1) << 45)
#define MLX5_FLOW_ITEM_INNER_IPV6_ROUTING_EXT (UINT64_C(1) << 46)
+/* Aggregated affinity item */
+#define MLX5_FLOW_ITEM_AGGR_AFFINITY (UINT64_C(1) << 49)
+
/* Outer Masks. */
#define MLX5_FLOW_LAYER_OUTER_L3 \
(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index f93dd4073c..94d944bade 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -3774,6 +3774,75 @@ flow_dv_validate_item_meter_color(struct rte_eth_dev *dev,
return 0;
}
+/**
+ * Validate aggregated affinity item.
+ *
+ * @param[in] dev
+ * Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ * Item specification.
+ * @param[in] attr
+ * Attributes of flow that includes this item.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_aggr_affinity(struct rte_eth_dev *dev,
+ const struct rte_flow_item *item,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_item_aggr_affinity *spec = item->spec;
+ const struct rte_flow_item_aggr_affinity *mask = item->mask;
+ struct rte_flow_item_aggr_affinity nic_mask = {
+ .affinity = UINT8_MAX
+ };
+ int ret;
+
+ if (!priv->sh->lag_rx_port_affinity_en)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+ "Unsupported aggregated affinity with Older FW");
+ if ((attr->transfer && priv->fdb_def_rule) ||
+ attr->egress || attr->group)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "aggregated affinity is not supported with egress or FDB on non root table");
+ if (!spec)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "data cannot be empty");
+ if (spec->affinity == 0)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "zero affinity number not supported");
+ if (spec->affinity > priv->num_lag_ports)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "exceed max affinity number in lag ports");
+ if (!mask)
+ mask = &rte_flow_item_aggr_affinity_mask;
+ if (!mask->affinity)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC, NULL,
+ "mask cannot be zero");
+ ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+ (const uint8_t *)&nic_mask,
+ sizeof(struct rte_flow_item_aggr_affinity),
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
int
flow_dv_encap_decap_match_cb(void *tool_ctx __rte_unused,
struct mlx5_list_entry *entry, void *cb_ctx)
@@ -7464,6 +7533,13 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
return ret;
last_item = MLX5_FLOW_ITEM_METER_COLOR;
break;
+ case RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY:
+ ret = flow_dv_validate_item_aggr_affinity(dev, items,
+ attr, error);
+ if (ret < 0)
+ return ret;
+ last_item = MLX5_FLOW_ITEM_AGGR_AFFINITY;
+ break;
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM,
@@ -10002,7 +10078,7 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key,
const struct rte_flow_item_tag *tag_vv = item->spec;
const struct rte_flow_item_tag *tag_v;
const struct rte_flow_item_tag *tag_m;
- enum modify_reg reg;
+ int reg;
uint32_t index;
if (MLX5_ITEM_VALID(item, key_type))
@@ -10017,7 +10093,7 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key,
else
reg = flow_hw_get_reg_id(RTE_FLOW_ITEM_TYPE_TAG, index);
MLX5_ASSERT(reg > 0);
- flow_dv_match_meta_reg(key, reg, tag_v->data, tag_m->data);
+ flow_dv_match_meta_reg(key, (enum modify_reg)reg, tag_v->data, tag_m->data);
}
/**
@@ -10719,6 +10795,22 @@ flow_dv_translate_item_meter_color(struct rte_eth_dev *dev, void *key,
flow_dv_match_meta_reg(key, (enum modify_reg)reg, value, mask);
}
+static void
+flow_dv_translate_item_aggr_affinity(void *key,
+ const struct rte_flow_item *item,
+ uint32_t key_type)
+{
+ const struct rte_flow_item_aggr_affinity *affinity_v;
+ const struct rte_flow_item_aggr_affinity *affinity_m;
+ void *misc_v;
+
+ MLX5_ITEM_UPDATE(item, key_type, affinity_v, affinity_m,
+ &rte_flow_item_aggr_affinity_mask);
+ misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc_v, lag_rx_port_affinity,
+ affinity_v->affinity & affinity_m->affinity);
+}
+
static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
#define HEADER_IS_ZERO(match_criteria, headers) \
@@ -13516,6 +13608,10 @@ flow_dv_translate_items(struct rte_eth_dev *dev,
last_item = flow_dv_translate_item_integrity(items,
wks, key_type);
break;
+ case RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY:
+ flow_dv_translate_item_aggr_affinity(key, items, key_type);
+ last_item = MLX5_FLOW_ITEM_AGGR_AFFINITY;
+ break;
default:
break;
}
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index a9c7045a3e..07766dd8f1 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -4723,6 +4723,20 @@ flow_hw_pattern_validate(struct rte_eth_dev *dev,
"Unsupported meter color register");
break;
}
+ case RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY:
+ {
+ if (!priv->sh->lag_rx_port_affinity_en)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+ "Unsupported aggregated affinity with Older FW");
+ if ((attr->transfer && priv->fdb_def_rule) || attr->egress)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+ "Aggregated affinity item not supported"
+ " with egress or transfer"
+ " attribute");
+ break;
+ }
case RTE_FLOW_ITEM_TYPE_VOID:
case RTE_FLOW_ITEM_TYPE_ETH:
case RTE_FLOW_ITEM_TYPE_VLAN:
--
2.18.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 3/3] net/mlx5: enhance the Tx queue affinity
2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 2/3] net/mlx5: add aggregated affinity item support Jiawei Wang
@ 2023-02-22 12:26 ` Jiawei Wang
2023-02-26 14:35 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Raslan Darawsheh
3 siblings, 0 replies; 9+ messages in thread
From: Jiawei Wang @ 2023-02-22 12:26 UTC (permalink / raw)
To: viacheslavo, orika, Matan Azrad; +Cc: dev, rasland
The rte_eth_dev_map_aggr_tx_affinity() was introduced in
ethdev lib, it was used to set the affinity value per Tx queue.
This patch adds the MLX5 PMD support for two device ops:
- map_aggr_tx_affinity
- count_aggr_ports
After maps a Tx queue with an aggregated port by call
map_aggr_tx_affinity() and starts sending traffic, the MLX5 PMD
updates TIS creation with tx_aggr_affinity value of Tx queue.
TIS index 1 goes to first physical port, TIS index 2 goes to second
physical port, and so on, TIS index 0 is reserved for default
HW hash mode.
Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
drivers/common/mlx5/mlx5_prm.h | 8 ------
drivers/net/mlx5/mlx5.c | 49 +++++++++++++++++-----------------
drivers/net/mlx5/mlx5_devx.c | 24 +++++++++--------
drivers/net/mlx5/mlx5_tx.h | 4 +++
drivers/net/mlx5/mlx5_txq.c | 38 ++++++++++++++++++++++++++
5 files changed, 80 insertions(+), 43 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 26a1f0717d..2f5aeecaa9 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -2363,14 +2363,6 @@ struct mlx5_ifc_query_nic_vport_context_in_bits {
u8 reserved_at_68[0x18];
};
-/*
- * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa.
- * Each TIS binds to one PF by setting lag_tx_port_affinity (>0).
- * Once LAG enabled, we create multiple TISs and bind each one to
- * different PFs, then TIS[i] gets affinity i+1 and goes to PF i+1.
- */
-#define MLX5_IFC_LAG_MAP_TIS_AFFINITY(index, num) ((num) ? \
- (index) % (num) + 1 : 0)
struct mlx5_ifc_tisc_bits {
u8 strict_lag_tx_port_affinity[0x1];
u8 reserved_at_1[0x3];
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f55c1caca0..8c8f71d508 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1257,9 +1257,9 @@ mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev)
static int
mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
{
- int i;
struct mlx5_devx_lag_context lag_ctx = { 0 };
struct mlx5_devx_tis_attr tis_attr = { 0 };
+ int i;
tis_attr.transport_domain = sh->td->id;
if (sh->bond.n_port) {
@@ -1273,35 +1273,30 @@ mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
DRV_LOG(ERR, "Failed to query lag affinity.");
return -1;
}
- if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
- for (i = 0; i < sh->bond.n_port; i++) {
- tis_attr.lag_tx_port_affinity =
- MLX5_IFC_LAG_MAP_TIS_AFFINITY(i,
- sh->bond.n_port);
- sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx,
- &tis_attr);
- if (!sh->tis[i]) {
- DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device"
- " %s.", i, sh->bond.n_port,
- sh->ibdev_name);
- return -1;
- }
- }
+ if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS)
DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n",
sh->bond.n_port, lag_ctx.tx_remap_affinity_1,
lag_ctx.tx_remap_affinity_2);
- return 0;
- }
- if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
+ else if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
DRV_LOG(INFO, "Device %s enabled HW hash based LAG.",
sh->ibdev_name);
}
- tis_attr.lag_tx_port_affinity = 0;
- sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
- if (!sh->tis[0]) {
- DRV_LOG(ERR, "Failed to TIS 0 for bonding device"
- " %s.", sh->ibdev_name);
- return -1;
+ for (i = 0; i <= sh->bond.n_port; i++) {
+ /*
+ * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa.
+ * Each TIS binds to one PF by setting lag_tx_port_affinity (> 0).
+ * Once LAG enabled, we create multiple TISs and bind each one to
+ * different PFs, then TIS[i+1] gets affinity i+1 and goes to PF i+1.
+ * TIS[0] is reserved for HW Hash mode.
+ */
+ tis_attr.lag_tx_port_affinity = i;
+ sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
+ if (!sh->tis[i]) {
+ DRV_LOG(ERR, "Failed to create TIS %d/%d for [bonding] device"
+ " %s.", i, sh->bond.n_port,
+ sh->ibdev_name);
+ return -1;
+ }
}
return 0;
}
@@ -2335,6 +2330,8 @@ const struct eth_dev_ops mlx5_dev_ops = {
.hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
.hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
.get_monitor_addr = mlx5_get_monitor_addr,
+ .count_aggr_ports = mlx5_count_aggr_ports,
+ .map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
};
/* Available operations from secondary process. */
@@ -2358,6 +2355,8 @@ const struct eth_dev_ops mlx5_dev_sec_ops = {
.tx_burst_mode_get = mlx5_tx_burst_mode_get,
.get_module_info = mlx5_get_module_info,
.get_module_eeprom = mlx5_get_module_eeprom,
+ .count_aggr_ports = mlx5_count_aggr_ports,
+ .map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
};
/* Available operations in flow isolated mode. */
@@ -2422,6 +2421,8 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = {
.hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
.hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
.get_monitor_addr = mlx5_get_monitor_addr,
+ .count_aggr_ports = mlx5_count_aggr_ports,
+ .map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
};
/**
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index f6e1943fd7..d02cedb202 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1190,17 +1190,19 @@ static uint32_t
mlx5_get_txq_tis_num(struct rte_eth_dev *dev, uint16_t queue_idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
- int tis_idx;
-
- if (priv->sh->bond.n_port && priv->sh->lag.affinity_mode ==
- MLX5_LAG_MODE_TIS) {
- tis_idx = (priv->lag_affinity_idx + queue_idx) %
- priv->sh->bond.n_port;
- DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.",
- dev->data->port_id, queue_idx, tis_idx + 1,
- priv->sh->lag.tx_remap_affinity[tis_idx]);
- } else {
- tis_idx = 0;
+ struct mlx5_txq_data *txq_data = (*priv->txqs)[queue_idx];
+ int tis_idx = 0;
+
+ if (priv->sh->bond.n_port) {
+ if (txq_data->tx_aggr_affinity) {
+ tis_idx = txq_data->tx_aggr_affinity;
+ } else if (priv->sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
+ tis_idx = (priv->lag_affinity_idx + queue_idx) %
+ priv->sh->bond.n_port + 1;
+ DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.",
+ dev->data->port_id, queue_idx, tis_idx,
+ priv->sh->lag.tx_remap_affinity[tis_idx - 1]);
+ }
}
MLX5_ASSERT(priv->sh->tis[tis_idx]);
return priv->sh->tis[tis_idx]->id;
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index a056be7ca8..d0c6303a2d 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -144,6 +144,7 @@ struct mlx5_txq_data {
uint16_t inlen_send; /* Ordinary send data inline size. */
uint16_t inlen_empw; /* eMPW max packet size to inline. */
uint16_t inlen_mode; /* Minimal data length to inline. */
+ uint8_t tx_aggr_affinity; /* TxQ affinity configuration. */
uint32_t qp_num_8s; /* QP number shifted by 8. */
uint64_t offloads; /* Offloads for Tx Queue. */
struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
@@ -218,6 +219,9 @@ void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl);
void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl);
uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev);
void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev);
+int mlx5_count_aggr_ports(struct rte_eth_dev *dev);
+int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+ uint8_t affinity);
/* mlx5_tx.c */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 419e913559..1e0e61a620 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -1365,3 +1365,41 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
ts_mask : 0;
}
}
+
+int mlx5_count_aggr_ports(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ return priv->sh->bond.n_port;
+}
+
+int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+ uint8_t affinity)
+{
+ struct mlx5_txq_ctrl *txq_ctrl;
+ struct mlx5_txq_data *txq;
+ struct mlx5_priv *priv;
+
+ priv = dev->data->dev_private;
+ txq = (*priv->txqs)[tx_queue_id];
+ if (!txq)
+ return -1;
+ txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+ if (tx_queue_id >= priv->txqs_n) {
+ DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)",
+ dev->data->port_id, tx_queue_id, priv->txqs_n);
+ rte_errno = EOVERFLOW;
+ return -rte_errno;
+ }
+ if (affinity > priv->num_lag_ports) {
+ DRV_LOG(ERR, "port %u unable to setup Tx queue index %u"
+ " affinity is %u exceeds the maximum %u", dev->data->port_id,
+ tx_queue_id, affinity, priv->num_lag_ports);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ DRV_LOG(DEBUG, "port %u configuring queue %u for aggregated affinity %u",
+ dev->data->port_id, tx_queue_id, affinity);
+ txq_ctrl->txq.tx_aggr_affinity = affinity;
+ return 0;
+}
--
2.18.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD
2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang
` (2 preceding siblings ...)
2023-02-22 12:26 ` [PATCH v2 3/3] net/mlx5: enhance the Tx queue affinity Jiawei Wang
@ 2023-02-26 14:35 ` Raslan Darawsheh
3 siblings, 0 replies; 9+ messages in thread
From: Raslan Darawsheh @ 2023-02-26 14:35 UTC (permalink / raw)
To: Jiawei(Jonny) Wang, Slava Ovsiienko, Ori Kam; +Cc: dev
Hi,
> -----Original Message-----
> From: Jiawei(Jonny) Wang <jiaweiw@nvidia.com>
> Sent: Wednesday, February 22, 2023 2:26 PM
> To: Slava Ovsiienko <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>
> Cc: dev@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>
> Subject: [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5
> PMD
>
> This patch implements RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY support in
> MLX5 PMD.
>
> This patch adds the MLX5 PMD support for two device ops:
> - map_aggr_tx_affinity
> - count_aggr_ports
>
> This patch series relies on the preceding RTE API:
> http://patches.dpdk.org/project/dpdk/list/?series=27064
>
> v2:
> * update the PMD code based on the new RTE API.
>
> Jiawei Wang (3):
> net/mlx5: add lag Rx port affinity in PRM
> net/mlx5: add aggregated affinity item support
> net/mlx5: enhance the Tx queue affinity
>
> doc/guides/nics/features/default.ini | 1 +
> doc/guides/nics/features/mlx5.ini | 1 +
> doc/guides/nics/mlx5.rst | 6 ++
> drivers/common/mlx5/mlx5_devx_cmds.c | 3 +
> drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
> drivers/common/mlx5/mlx5_prm.h | 15 ++--
> drivers/net/mlx5/linux/mlx5_os.c | 6 ++
> drivers/net/mlx5/mlx5.c | 49 ++++++-------
> drivers/net/mlx5/mlx5.h | 3 +
> drivers/net/mlx5/mlx5_devx.c | 24 ++++---
> drivers/net/mlx5/mlx5_flow.h | 3 +
> drivers/net/mlx5/mlx5_flow_dv.c | 100
> ++++++++++++++++++++++++++-
> drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++
> drivers/net/mlx5/mlx5_tx.h | 4 ++
> drivers/net/mlx5/mlx5_txq.c | 38 ++++++++++
> 15 files changed, 221 insertions(+), 47 deletions(-)
>
> --
> 2.18.1
Series applied to next-net-mlx,
Kindest regards,
Raslan Darawsheh
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2023-02-26 14:35 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang
2023-02-03 5:21 ` [PATCH 1/3] drivers: add lag Rx port affinity in PRM Jiawei Wang
2023-02-03 5:21 ` [PATCH 2/3] net/mlx5: add PHY affinity item support Jiawei Wang
2023-02-03 5:21 ` [PATCH 3/3] drivers: enhance the Tx queue affinity Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 2/3] net/mlx5: add aggregated affinity item support Jiawei Wang
2023-02-22 12:26 ` [PATCH v2 3/3] net/mlx5: enhance the Tx queue affinity Jiawei Wang
2023-02-26 14:35 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Raslan Darawsheh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).