* [PATCH 0/3] add new PHY affinity support in MLX5 PMD @ 2023-02-03 5:21 Jiawei Wang 2023-02-03 5:21 ` [PATCH 1/3] drivers: add lag Rx port affinity in PRM Jiawei Wang ` (3 more replies) 0 siblings, 4 replies; 9+ messages in thread From: Jiawei Wang @ 2023-02-03 5:21 UTC (permalink / raw) To: viacheslavo, orika; +Cc: dev, rasland This patch series implement PHY affinity item support in MLX5 PMD and the support for TIS creation with tx_phy_affinity value of Tx queue. This patch series relies on the preceding ethdev API: http://patches.dpdk.org/project/dpdk/patch/20230203050717.46914-2-jiaweiw@nvidia.com/ Jiawei Wang (3): drivers: add lag Rx port affinity in PRM net/mlx5: add PHY affinity item support drivers: enhance the Tx queue affinity doc/guides/nics/features/default.ini | 1 + doc/guides/nics/features/mlx5.ini | 1 + doc/guides/nics/mlx5.rst | 8 ++- drivers/common/mlx5/mlx5_devx_cmds.c | 3 + drivers/common/mlx5/mlx5_devx_cmds.h | 1 + drivers/common/mlx5/mlx5_prm.h | 15 ++-- drivers/net/mlx5/linux/mlx5_os.c | 6 ++ drivers/net/mlx5/mlx5.c | 43 +++++------- drivers/net/mlx5/mlx5.h | 3 + drivers/net/mlx5/mlx5_devx.c | 24 ++++--- drivers/net/mlx5/mlx5_ethdev.c | 1 + drivers/net/mlx5/mlx5_flow.h | 3 + drivers/net/mlx5/mlx5_flow_dv.c | 100 ++++++++++++++++++++++++++- drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++ drivers/net/mlx5/mlx5_tx.h | 1 + drivers/net/mlx5/mlx5_txq.c | 8 +++ 16 files changed, 184 insertions(+), 48 deletions(-) -- 2.18.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/3] drivers: add lag Rx port affinity in PRM 2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang @ 2023-02-03 5:21 ` Jiawei Wang 2023-02-03 5:21 ` [PATCH 2/3] net/mlx5: add PHY affinity item support Jiawei Wang ` (2 subsequent siblings) 3 siblings, 0 replies; 9+ messages in thread From: Jiawei Wang @ 2023-02-03 5:21 UTC (permalink / raw) To: viacheslavo, orika, Matan Azrad; +Cc: dev, rasland This patch adds function to query hca capability via Devx for lag_rx_port_affinity. Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com> --- drivers/common/mlx5/mlx5_devx_cmds.c | 3 +++ drivers/common/mlx5/mlx5_devx_cmds.h | 1 + drivers/common/mlx5/mlx5_prm.h | 7 +++++-- drivers/net/mlx5/linux/mlx5_os.c | 4 ++++ drivers/net/mlx5/mlx5.h | 2 ++ 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c index e3a4927d0f..a157ab4b6c 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.c +++ b/drivers/common/mlx5/mlx5_devx_cmds.c @@ -1141,6 +1141,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, attr->outer_ipv4_ihl = MLX5_GET (flow_table_nic_cap, hcattr, ft_field_support_2_nic_receive.outer_ipv4_ihl); + attr->lag_rx_port_affinity = MLX5_GET + (flow_table_nic_cap, hcattr, + ft_field_support_2_nic_receive.lag_rx_port_affinity); /* Query HCA offloads for Ethernet protocol. */ hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc, MLX5_GET_HCA_CAP_OP_MOD_ETHERNET_OFFLOAD_CAPS | diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h index c94b9eac06..9cf045ccaa 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.h +++ b/drivers/common/mlx5/mlx5_devx_cmds.h @@ -288,6 +288,7 @@ struct mlx5_hca_attr { uint32_t alloc_flow_counter_pd:1; uint32_t flow_counter_access_aso:1; uint32_t flow_access_aso_opc_mod:8; + uint32_t lag_rx_port_affinity:1; }; /* LAG Context. */ diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 9294f65e24..8bbb800206 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -905,7 +905,8 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 vxlan_vni[0x18]; u8 reserved_at_b8[0x8]; u8 geneve_vni[0x18]; - u8 reserved_at_e4[0x6]; + u8 lag_rx_port_affinity[0x4]; + u8 reserved_at_e8[0x2]; u8 geneve_tlv_option_0_exist[0x1]; u8 geneve_oam[0x1]; u8 reserved_at_e0[0xc]; @@ -2044,7 +2045,9 @@ struct mlx5_ifc_ft_fields_support_bits { * Table 1872 - Flow Table Fields Supported 2 Format */ struct mlx5_ifc_ft_fields_support_2_bits { - u8 reserved_at_0[0xd]; + u8 reserved_at_0[0xa]; + u8 lag_rx_port_affinity[0x1]; + u8 reserved_at_c[0x2]; u8 hash_result[0x1]; u8 reserved_at_e[0x1]; u8 tunnel_header_2_3[0x1]; diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index a71474c90a..60462da39d 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1390,6 +1390,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, DRV_LOG(DEBUG, "DV flow is not supported!"); } #endif + if (hca_attr->lag_rx_port_affinity) { + sh->lag_rx_port_affinity_en = 1; + DRV_LOG(DEBUG, "LAG Rx Port Affinity enabled"); + } } /* Process parameters and store port configuration on priv structure. */ err = mlx5_port_args_config(priv, mkvlist, &priv->config); diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 16b33e1548..bbd7262a51 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1374,6 +1374,8 @@ struct mlx5_dev_ctx_shared { uint32_t hws_tags:1; /* Check if tags info for HWS initialized. */ uint32_t shared_mark_enabled:1; /* If mark action is enabled on Rxqs (shared E-Switch domain). */ + uint32_t lag_rx_port_affinity_en:1; + /* lag_rx_port_affinity is supported. */ uint32_t hws_max_log_bulk_sz:5; /* Log of minimal HWS counters created hard coded. */ uint32_t hws_max_nb_counters; /* Maximal number for HWS counters. */ -- 2.18.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/3] net/mlx5: add PHY affinity item support 2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang 2023-02-03 5:21 ` [PATCH 1/3] drivers: add lag Rx port affinity in PRM Jiawei Wang @ 2023-02-03 5:21 ` Jiawei Wang 2023-02-03 5:21 ` [PATCH 3/3] drivers: enhance the Tx queue affinity Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang 3 siblings, 0 replies; 9+ messages in thread From: Jiawei Wang @ 2023-02-03 5:21 UTC (permalink / raw) To: viacheslavo, orika, Ferruh Yigit, Matan Azrad; +Cc: dev, rasland This patch adds the new phy affinity item support in PMD: RTE_FLOW_ITEM_TYPE_PHY_AFFINITY. This patch adds the validation function for the new item, it works for NIC-RX and FDB rule on ROOT-table only. Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com> --- doc/guides/nics/features/default.ini | 1 + doc/guides/nics/features/mlx5.ini | 1 + doc/guides/nics/mlx5.rst | 8 ++- drivers/net/mlx5/linux/mlx5_os.c | 2 + drivers/net/mlx5/mlx5.h | 1 + drivers/net/mlx5/mlx5_flow.h | 3 + drivers/net/mlx5/mlx5_flow_dv.c | 100 ++++++++++++++++++++++++++- drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++ 8 files changed, 127 insertions(+), 3 deletions(-) diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini index 510cc6679d..fd5edd11c8 100644 --- a/doc/guides/nics/features/default.ini +++ b/doc/guides/nics/features/default.ini @@ -141,6 +141,7 @@ udp = vlan = vxlan = vxlan_gpe = +phy_affinity = [rte_flow actions] age = diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini index 62fd330e2b..9142f04f93 100644 --- a/doc/guides/nics/features/mlx5.ini +++ b/doc/guides/nics/features/mlx5.ini @@ -87,6 +87,7 @@ vlan = Y vxlan = Y vxlan_gpe = Y represented_port = Y +phy_affinity = Y [rte_flow actions] age = I diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index f137f156f9..5569b045d7 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -106,6 +106,7 @@ Features - Sub-Function representors. - Sub-Function. - Matching on represented port. +- Matching on phy affinity. Limitations @@ -595,13 +596,18 @@ Limitations - key - sequence - Matching on checksum and sequence needs MLNX_OFED 5.6+. +- Matching on checksum and sequence needs MLNX_OFED 5.6+. - The NIC egress flow rules on representor port are not supported. - When using DV/verbs flow engine (``dv_flow_en`` = 1/0 respectively), Match on SPI field in ESP header for group 0 needs MLNX_OFED 5.6+. +- Match on phy affinity: + + - Supports NIC ingress flow in group 0. + - Supports E-Switch flow in group 0 and depends on + device-managed flow steering (DMFS) mode. Statistics ---------- diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 60462da39d..1c26b30702 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1394,6 +1394,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, sh->lag_rx_port_affinity_en = 1; DRV_LOG(DEBUG, "LAG Rx Port Affinity enabled"); } + priv->num_lag_ports = hca_attr->num_lag_ports; + DRV_LOG(DEBUG, "The number of lag ports is %d", priv->num_lag_ports); } /* Process parameters and store port configuration on priv structure. */ err = mlx5_port_args_config(priv, mkvlist, &priv->config); diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index bbd7262a51..c7a7b176b8 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1668,6 +1668,7 @@ struct mlx5_priv { unsigned int mtr_reg_share:1; /* Whether support meter REG_C share. */ unsigned int lb_used:1; /* Loopback queue is referred to. */ uint32_t mark_enabled:1; /* If mark action is enabled on rxqs. */ + uint32_t num_lag_ports:4; /* Number of ports can be bonded. */ uint16_t domain_id; /* Switch domain identifier. */ uint16_t vport_id; /* Associated VF vport index (if any). */ uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */ diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index e376dcae93..64b1b7c37c 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -219,6 +219,9 @@ enum mlx5_feature_name { /* Meter color item */ #define MLX5_FLOW_ITEM_METER_COLOR (UINT64_C(1) << 44) +/* PHY affinity item */ +#define MLX5_FLOW_ITEM_PHY_AFFINITY (UINT64_C(1) << 49) + /* Outer Masks. */ #define MLX5_FLOW_LAYER_OUTER_L3 \ (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6) diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 7ca909999b..994f184aaf 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -3764,6 +3764,75 @@ flow_dv_validate_item_meter_color(struct rte_eth_dev *dev, return 0; } +/** + * Validate Phy affinity item. + * + * @param[in] dev + * Pointer to the rte_eth_dev structure. + * @param[in] item + * Item specification. + * @param[in] attr + * Attributes of flow that includes this item. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_item_phy_affinity(struct rte_eth_dev *dev, + const struct rte_flow_item *item, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + struct mlx5_priv *priv = dev->data->dev_private; + const struct rte_flow_item_phy_affinity *spec = item->spec; + const struct rte_flow_item_phy_affinity *mask = item->mask; + struct rte_flow_item_phy_affinity nic_mask = { + .affinity = UINT8_MAX + }; + int ret; + + if (!priv->sh->lag_rx_port_affinity_en) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Unsupported phy affinity with Older FW"); + if ((attr->transfer && priv->fdb_def_rule) || + attr->egress || attr->group) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "phy affinity is not supported with egress or FDB on non root table"); + if (!spec) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "data cannot be empty"); + if (spec->affinity == 0) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "zero affinity number not supported"); + if (spec->affinity > priv->num_lag_ports) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "exceed max affinity number in lag ports"); + if (!mask) + mask = &rte_flow_item_phy_affinity_mask; + if (!mask->affinity) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, NULL, + "mask cannot be zero"); + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, + (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_phy_affinity), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); + if (ret < 0) + return ret; + return 0; +} + int flow_dv_encap_decap_match_cb(void *tool_ctx __rte_unused, struct mlx5_list_entry *entry, void *cb_ctx) @@ -7443,6 +7512,13 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, return ret; last_item = MLX5_FLOW_ITEM_METER_COLOR; break; + case RTE_FLOW_ITEM_TYPE_PHY_AFFINITY: + ret = flow_dv_validate_item_phy_affinity(dev, items, + attr, error); + if (ret < 0) + return ret; + last_item = MLX5_FLOW_ITEM_PHY_AFFINITY; + break; default: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, @@ -9981,7 +10057,7 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key, const struct rte_flow_item_tag *tag_vv = item->spec; const struct rte_flow_item_tag *tag_v; const struct rte_flow_item_tag *tag_m; - enum modify_reg reg; + int reg; uint32_t index; if (MLX5_ITEM_VALID(item, key_type)) @@ -9996,7 +10072,7 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key, else reg = flow_hw_get_reg_id(RTE_FLOW_ITEM_TYPE_TAG, index); MLX5_ASSERT(reg > 0); - flow_dv_match_meta_reg(key, reg, tag_v->data, tag_m->data); + flow_dv_match_meta_reg(key, (enum modify_reg)reg, tag_v->data, tag_m->data); } /** @@ -10639,6 +10715,22 @@ flow_dv_translate_item_meter_color(struct rte_eth_dev *dev, void *key, flow_dv_match_meta_reg(key, (enum modify_reg)reg, value, mask); } +static void +flow_dv_translate_item_phy_affinity(void *key, + const struct rte_flow_item *item, + uint32_t key_type) +{ + const struct rte_flow_item_phy_affinity *affinity_v; + const struct rte_flow_item_phy_affinity *affinity_m; + void *misc_v; + + MLX5_ITEM_UPDATE(item, key_type, affinity_v, affinity_m, + &rte_flow_item_phy_affinity_mask); + misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters); + MLX5_SET(fte_match_set_misc, misc_v, lag_rx_port_affinity, + affinity_v->affinity & affinity_m->affinity); +} + static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 }; #define HEADER_IS_ZERO(match_criteria, headers) \ @@ -13430,6 +13522,10 @@ flow_dv_translate_items(struct rte_eth_dev *dev, last_item = flow_dv_translate_item_integrity(items, wks, key_type); break; + case RTE_FLOW_ITEM_TYPE_PHY_AFFINITY: + flow_dv_translate_item_phy_affinity(key, items, key_type); + last_item = MLX5_FLOW_ITEM_PHY_AFFINITY; + break; default: break; } diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index 20c71ff7f0..e5ca86ca4b 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -4715,6 +4715,20 @@ flow_hw_pattern_validate(struct rte_eth_dev *dev, "Unsupported meter color register"); break; } + case RTE_FLOW_ITEM_TYPE_PHY_AFFINITY: + { + if (!priv->sh->lag_rx_port_affinity_en) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Unsupported phy affinity with Older FW"); + if ((attr->transfer && priv->fdb_def_rule) || attr->egress) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Phy affinity item not supported" + " with egress or transfer" + " attribute"); + break; + } case RTE_FLOW_ITEM_TYPE_VOID: case RTE_FLOW_ITEM_TYPE_ETH: case RTE_FLOW_ITEM_TYPE_VLAN: -- 2.18.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/3] drivers: enhance the Tx queue affinity 2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang 2023-02-03 5:21 ` [PATCH 1/3] drivers: add lag Rx port affinity in PRM Jiawei Wang 2023-02-03 5:21 ` [PATCH 2/3] net/mlx5: add PHY affinity item support Jiawei Wang @ 2023-02-03 5:21 ` Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang 3 siblings, 0 replies; 9+ messages in thread From: Jiawei Wang @ 2023-02-03 5:21 UTC (permalink / raw) To: viacheslavo, orika, Matan Azrad; +Cc: dev, rasland Previous patch supports the tx phy affinity configuration in the Tx queue API, it supports to set the affinity value per Queue. This patch updates TIS creation with tx_phy_affinity value of Tx queue, TIS index 1 goes to hardware port 0, TIS index 2 goes to hardware port 1, and TIS index 0 is reserved for default HWS hash mode. Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com> --- drivers/common/mlx5/mlx5_prm.h | 8 ------- drivers/net/mlx5/mlx5.c | 43 +++++++++++++++------------------- drivers/net/mlx5/mlx5_devx.c | 24 ++++++++++--------- drivers/net/mlx5/mlx5_ethdev.c | 1 + drivers/net/mlx5/mlx5_tx.h | 1 + drivers/net/mlx5/mlx5_txq.c | 8 +++++++ 6 files changed, 42 insertions(+), 43 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 8bbb800206..ded001d0b2 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -2331,14 +2331,6 @@ struct mlx5_ifc_query_nic_vport_context_in_bits { u8 reserved_at_68[0x18]; }; -/* - * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa. - * Each TIS binds to one PF by setting lag_tx_port_affinity (>0). - * Once LAG enabled, we create multiple TISs and bind each one to - * different PFs, then TIS[i] gets affinity i+1 and goes to PF i+1. - */ -#define MLX5_IFC_LAG_MAP_TIS_AFFINITY(index, num) ((num) ? \ - (index) % (num) + 1 : 0) struct mlx5_ifc_tisc_bits { u8 strict_lag_tx_port_affinity[0x1]; u8 reserved_at_1[0x3]; diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index b8643cebdd..c75c98b8b0 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1162,9 +1162,9 @@ mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev) static int mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh) { - int i; struct mlx5_devx_lag_context lag_ctx = { 0 }; struct mlx5_devx_tis_attr tis_attr = { 0 }; + int i; tis_attr.transport_domain = sh->td->id; if (sh->bond.n_port) { @@ -1178,35 +1178,30 @@ mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh) DRV_LOG(ERR, "Failed to query lag affinity."); return -1; } - if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) { - for (i = 0; i < sh->bond.n_port; i++) { - tis_attr.lag_tx_port_affinity = - MLX5_IFC_LAG_MAP_TIS_AFFINITY(i, - sh->bond.n_port); - sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, - &tis_attr); - if (!sh->tis[i]) { - DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device" - " %s.", i, sh->bond.n_port, - sh->ibdev_name); - return -1; - } - } + if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n", sh->bond.n_port, lag_ctx.tx_remap_affinity_1, lag_ctx.tx_remap_affinity_2); - return 0; - } - if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH) + else if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH) DRV_LOG(INFO, "Device %s enabled HW hash based LAG.", sh->ibdev_name); } - tis_attr.lag_tx_port_affinity = 0; - sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr); - if (!sh->tis[0]) { - DRV_LOG(ERR, "Failed to TIS 0 for bonding device" - " %s.", sh->ibdev_name); - return -1; + for (i = 0; i <= sh->bond.n_port; i++) { + /* + * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa. + * Each TIS binds to one PF by setting lag_tx_port_affinity (> 0). + * Once LAG enabled, we create multiple TISs and bind each one to + * different PFs, then TIS[i+1] gets affinity i+1 and goes to PF i+1. + * TIS[0] is reserved for HW Hash mode. + */ + tis_attr.lag_tx_port_affinity = i; + sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr); + if (!sh->tis[i]) { + DRV_LOG(ERR, "Failed to create TIS %d/%d for [bonding] device" + " %s.", i, sh->bond.n_port, + sh->ibdev_name); + return -1; + } } return 0; } diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c index f6e1943fd7..a3fe0b3b98 100644 --- a/drivers/net/mlx5/mlx5_devx.c +++ b/drivers/net/mlx5/mlx5_devx.c @@ -1190,17 +1190,19 @@ static uint32_t mlx5_get_txq_tis_num(struct rte_eth_dev *dev, uint16_t queue_idx) { struct mlx5_priv *priv = dev->data->dev_private; - int tis_idx; - - if (priv->sh->bond.n_port && priv->sh->lag.affinity_mode == - MLX5_LAG_MODE_TIS) { - tis_idx = (priv->lag_affinity_idx + queue_idx) % - priv->sh->bond.n_port; - DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.", - dev->data->port_id, queue_idx, tis_idx + 1, - priv->sh->lag.tx_remap_affinity[tis_idx]); - } else { - tis_idx = 0; + struct mlx5_txq_data *txq_data = (*priv->txqs)[queue_idx]; + int tis_idx = 0; + + if (priv->sh->bond.n_port) { + if (txq_data->tx_phy_affinity) { + tis_idx = txq_data->tx_phy_affinity; + } else if (priv->sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) { + tis_idx = (priv->lag_affinity_idx + queue_idx) % + priv->sh->bond.n_port + 1; + DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.", + dev->data->port_id, queue_idx, tis_idx, + priv->sh->lag.tx_remap_affinity[tis_idx - 1]); + } } MLX5_ASSERT(priv->sh->tis[tis_idx]); return priv->sh->tis[tis_idx]->id; diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 4a85415ff3..354bb4f022 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -352,6 +352,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) info->switch_info.domain_id = priv->domain_id; info->switch_info.port_id = priv->representor_id; info->switch_info.rx_domain = 0; /* No sub Rx domains. */ + info->nb_phy_ports = priv->sh->bond.n_port; if (priv->representor) { uint16_t port_id; diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h index a056be7ca8..674c2aebe5 100644 --- a/drivers/net/mlx5/mlx5_tx.h +++ b/drivers/net/mlx5/mlx5_tx.h @@ -144,6 +144,7 @@ struct mlx5_txq_data { uint16_t inlen_send; /* Ordinary send data inline size. */ uint16_t inlen_empw; /* eMPW max packet size to inline. */ uint16_t inlen_mode; /* Minimal data length to inline. */ + uint8_t tx_phy_affinity; /* TxQ affinity configuration. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ uint64_t offloads; /* Offloads for Tx Queue. */ struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 5543f2c570..4e53706807 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -392,6 +392,13 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, container_of(txq, struct mlx5_txq_ctrl, txq); int res; + if (conf->tx_phy_affinity > priv->num_lag_ports) { + rte_errno = EINVAL; + DRV_LOG(ERR, "port %u unable to setup Tx queue index %u" + " affinity is %u exceeds the maximum %u", dev->data->port_id, + idx, conf->tx_phy_affinity, priv->num_lag_ports); + return -rte_errno; + } res = mlx5_tx_queue_pre_setup(dev, idx, &desc); if (res) return res; @@ -1095,6 +1102,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, tmpl->txq.elts_m = desc - 1; tmpl->txq.port_id = dev->data->port_id; tmpl->txq.idx = idx; + tmpl->txq.tx_phy_affinity = conf->tx_phy_affinity; txq_set_params(tmpl); if (txq_adjust_params(tmpl)) goto error; -- 2.18.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD 2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang ` (2 preceding siblings ...) 2023-02-03 5:21 ` [PATCH 3/3] drivers: enhance the Tx queue affinity Jiawei Wang @ 2023-02-22 12:26 ` Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM Jiawei Wang ` (3 more replies) 3 siblings, 4 replies; 9+ messages in thread From: Jiawei Wang @ 2023-02-22 12:26 UTC (permalink / raw) To: viacheslavo, orika; +Cc: dev, rasland This patch implements RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY support in MLX5 PMD. This patch adds the MLX5 PMD support for two device ops: - map_aggr_tx_affinity - count_aggr_ports This patch series relies on the preceding RTE API: http://patches.dpdk.org/project/dpdk/list/?series=27064 v2: * update the PMD code based on the new RTE API. Jiawei Wang (3): net/mlx5: add lag Rx port affinity in PRM net/mlx5: add aggregated affinity item support net/mlx5: enhance the Tx queue affinity doc/guides/nics/features/default.ini | 1 + doc/guides/nics/features/mlx5.ini | 1 + doc/guides/nics/mlx5.rst | 6 ++ drivers/common/mlx5/mlx5_devx_cmds.c | 3 + drivers/common/mlx5/mlx5_devx_cmds.h | 1 + drivers/common/mlx5/mlx5_prm.h | 15 ++-- drivers/net/mlx5/linux/mlx5_os.c | 6 ++ drivers/net/mlx5/mlx5.c | 49 ++++++------- drivers/net/mlx5/mlx5.h | 3 + drivers/net/mlx5/mlx5_devx.c | 24 ++++--- drivers/net/mlx5/mlx5_flow.h | 3 + drivers/net/mlx5/mlx5_flow_dv.c | 100 ++++++++++++++++++++++++++- drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++ drivers/net/mlx5/mlx5_tx.h | 4 ++ drivers/net/mlx5/mlx5_txq.c | 38 ++++++++++ 15 files changed, 221 insertions(+), 47 deletions(-) -- 2.18.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM 2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang @ 2023-02-22 12:26 ` Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 2/3] net/mlx5: add aggregated affinity item support Jiawei Wang ` (2 subsequent siblings) 3 siblings, 0 replies; 9+ messages in thread From: Jiawei Wang @ 2023-02-22 12:26 UTC (permalink / raw) To: viacheslavo, orika, Matan Azrad; +Cc: dev, rasland This patch adds function to query hca capability via Devx for lag_rx_port_affinity. Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com> --- drivers/common/mlx5/mlx5_devx_cmds.c | 3 +++ drivers/common/mlx5/mlx5_devx_cmds.h | 1 + drivers/common/mlx5/mlx5_prm.h | 7 +++++-- drivers/net/mlx5/linux/mlx5_os.c | 4 ++++ drivers/net/mlx5/mlx5.h | 2 ++ 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c index fb670950ef..bfc6e09eac 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.c +++ b/drivers/common/mlx5/mlx5_devx_cmds.c @@ -1163,6 +1163,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, attr->outer_ipv4_ihl = MLX5_GET (flow_table_nic_cap, hcattr, ft_field_support_2_nic_receive.outer_ipv4_ihl); + attr->lag_rx_port_affinity = MLX5_GET + (flow_table_nic_cap, hcattr, + ft_field_support_2_nic_receive.lag_rx_port_affinity); /* Query HCA offloads for Ethernet protocol. */ hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc, MLX5_GET_HCA_CAP_OP_MOD_ETHERNET_OFFLOAD_CAPS | diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h index 6b5a6b8667..8e68eeaf37 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.h +++ b/drivers/common/mlx5/mlx5_devx_cmds.h @@ -293,6 +293,7 @@ struct mlx5_hca_attr { uint32_t flow_counter_access_aso:1; uint32_t flow_access_aso_opc_mod:8; uint32_t cross_vhca:1; + uint32_t lag_rx_port_affinity:1; }; /* LAG Context. */ diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 613cc6face..26a1f0717d 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -906,7 +906,8 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 vxlan_vni[0x18]; u8 reserved_at_b8[0x8]; u8 geneve_vni[0x18]; - u8 reserved_at_e4[0x6]; + u8 lag_rx_port_affinity[0x4]; + u8 reserved_at_e8[0x2]; u8 geneve_tlv_option_0_exist[0x1]; u8 geneve_oam[0x1]; u8 reserved_at_e0[0xc]; @@ -2069,7 +2070,9 @@ struct mlx5_ifc_ft_fields_support_bits { * Table 1872 - Flow Table Fields Supported 2 Format */ struct mlx5_ifc_ft_fields_support_2_bits { - u8 reserved_at_0[0xd]; + u8 reserved_at_0[0xa]; + u8 lag_rx_port_affinity[0x1]; + u8 reserved_at_c[0x2]; u8 hash_result[0x1]; u8 reserved_at_e[0x1]; u8 tunnel_header_2_3[0x1]; diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index a71474c90a..60462da39d 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1390,6 +1390,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, DRV_LOG(DEBUG, "DV flow is not supported!"); } #endif + if (hca_attr->lag_rx_port_affinity) { + sh->lag_rx_port_affinity_en = 1; + DRV_LOG(DEBUG, "LAG Rx Port Affinity enabled"); + } } /* Process parameters and store port configuration on priv structure. */ err = mlx5_port_args_config(priv, mkvlist, &priv->config); diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index a766fb408e..32797008c1 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1386,6 +1386,8 @@ struct mlx5_dev_ctx_shared { uint32_t hws_tags:1; /* Check if tags info for HWS initialized. */ uint32_t shared_mark_enabled:1; /* If mark action is enabled on Rxqs (shared E-Switch domain). */ + uint32_t lag_rx_port_affinity_en:1; + /* lag_rx_port_affinity is supported. */ uint32_t hws_max_log_bulk_sz:5; /* Log of minimal HWS counters created hard coded. */ uint32_t hws_max_nb_counters; /* Maximal number for HWS counters. */ -- 2.18.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 2/3] net/mlx5: add aggregated affinity item support 2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM Jiawei Wang @ 2023-02-22 12:26 ` Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 3/3] net/mlx5: enhance the Tx queue affinity Jiawei Wang 2023-02-26 14:35 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Raslan Darawsheh 3 siblings, 0 replies; 9+ messages in thread From: Jiawei Wang @ 2023-02-22 12:26 UTC (permalink / raw) To: viacheslavo, orika, Ferruh Yigit, Matan Azrad; +Cc: dev, rasland This patch adds the new aggregated affinity item support in PMD: RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY. This patch adds the validation function for the new item, it works for NIC-RX and FDB rule on ROOT-table only. Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com> --- doc/guides/nics/features/default.ini | 1 + doc/guides/nics/features/mlx5.ini | 1 + doc/guides/nics/mlx5.rst | 6 ++ drivers/net/mlx5/linux/mlx5_os.c | 2 + drivers/net/mlx5/mlx5.h | 1 + drivers/net/mlx5/mlx5_flow.h | 3 + drivers/net/mlx5/mlx5_flow_dv.c | 100 ++++++++++++++++++++++++++- drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++ 8 files changed, 126 insertions(+), 2 deletions(-) diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini index e249f62f31..0834bdc300 100644 --- a/doc/guides/nics/features/default.ini +++ b/doc/guides/nics/features/default.ini @@ -145,6 +145,7 @@ udp = vlan = vxlan = vxlan_gpe = +aggr_affinity = [rte_flow actions] age = diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini index c58e1934e9..7276e5bd1a 100644 --- a/doc/guides/nics/features/mlx5.ini +++ b/doc/guides/nics/features/mlx5.ini @@ -90,6 +90,7 @@ vlan = Y vxlan = Y vxlan_gpe = Y represented_port = Y +aggr_affinity = Y [rte_flow actions] age = I diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 6510e74fb9..dbf0c7a4bc 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -107,6 +107,7 @@ Features - Sub-Function representors. - Sub-Function. - Matching on represented port. +- Matching on aggregated affinity. Limitations @@ -615,6 +616,11 @@ Limitations - The NIC egress flow rules on representor port are not supported. +- Match on aggregated affinity: + + - Supports NIC ingress flow in group 0. + - Supports E-Switch flow in group 0 and depends on + device-managed flow steering (DMFS) mode. Statistics ---------- diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 60462da39d..1c26b30702 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1394,6 +1394,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, sh->lag_rx_port_affinity_en = 1; DRV_LOG(DEBUG, "LAG Rx Port Affinity enabled"); } + priv->num_lag_ports = hca_attr->num_lag_ports; + DRV_LOG(DEBUG, "The number of lag ports is %d", priv->num_lag_ports); } /* Process parameters and store port configuration on priv structure. */ err = mlx5_port_args_config(priv, mkvlist, &priv->config); diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 32797008c1..581179cecd 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1681,6 +1681,7 @@ struct mlx5_priv { unsigned int mtr_reg_share:1; /* Whether support meter REG_C share. */ unsigned int lb_used:1; /* Loopback queue is referred to. */ uint32_t mark_enabled:1; /* If mark action is enabled on rxqs. */ + uint32_t num_lag_ports:4; /* Number of ports can be bonded. */ uint16_t domain_id; /* Switch domain identifier. */ uint16_t vport_id; /* Associated VF vport index (if any). */ uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */ diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 4bef2296b8..3a72f4eb4d 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -223,6 +223,9 @@ enum mlx5_feature_name { #define MLX5_FLOW_ITEM_OUTER_IPV6_ROUTING_EXT (UINT64_C(1) << 45) #define MLX5_FLOW_ITEM_INNER_IPV6_ROUTING_EXT (UINT64_C(1) << 46) +/* Aggregated affinity item */ +#define MLX5_FLOW_ITEM_AGGR_AFFINITY (UINT64_C(1) << 49) + /* Outer Masks. */ #define MLX5_FLOW_LAYER_OUTER_L3 \ (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6) diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index f93dd4073c..94d944bade 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -3774,6 +3774,75 @@ flow_dv_validate_item_meter_color(struct rte_eth_dev *dev, return 0; } +/** + * Validate aggregated affinity item. + * + * @param[in] dev + * Pointer to the rte_eth_dev structure. + * @param[in] item + * Item specification. + * @param[in] attr + * Attributes of flow that includes this item. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_item_aggr_affinity(struct rte_eth_dev *dev, + const struct rte_flow_item *item, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + struct mlx5_priv *priv = dev->data->dev_private; + const struct rte_flow_item_aggr_affinity *spec = item->spec; + const struct rte_flow_item_aggr_affinity *mask = item->mask; + struct rte_flow_item_aggr_affinity nic_mask = { + .affinity = UINT8_MAX + }; + int ret; + + if (!priv->sh->lag_rx_port_affinity_en) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Unsupported aggregated affinity with Older FW"); + if ((attr->transfer && priv->fdb_def_rule) || + attr->egress || attr->group) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "aggregated affinity is not supported with egress or FDB on non root table"); + if (!spec) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "data cannot be empty"); + if (spec->affinity == 0) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "zero affinity number not supported"); + if (spec->affinity > priv->num_lag_ports) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "exceed max affinity number in lag ports"); + if (!mask) + mask = &rte_flow_item_aggr_affinity_mask; + if (!mask->affinity) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, NULL, + "mask cannot be zero"); + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, + (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_aggr_affinity), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); + if (ret < 0) + return ret; + return 0; +} + int flow_dv_encap_decap_match_cb(void *tool_ctx __rte_unused, struct mlx5_list_entry *entry, void *cb_ctx) @@ -7464,6 +7533,13 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, return ret; last_item = MLX5_FLOW_ITEM_METER_COLOR; break; + case RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY: + ret = flow_dv_validate_item_aggr_affinity(dev, items, + attr, error); + if (ret < 0) + return ret; + last_item = MLX5_FLOW_ITEM_AGGR_AFFINITY; + break; default: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, @@ -10002,7 +10078,7 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key, const struct rte_flow_item_tag *tag_vv = item->spec; const struct rte_flow_item_tag *tag_v; const struct rte_flow_item_tag *tag_m; - enum modify_reg reg; + int reg; uint32_t index; if (MLX5_ITEM_VALID(item, key_type)) @@ -10017,7 +10093,7 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key, else reg = flow_hw_get_reg_id(RTE_FLOW_ITEM_TYPE_TAG, index); MLX5_ASSERT(reg > 0); - flow_dv_match_meta_reg(key, reg, tag_v->data, tag_m->data); + flow_dv_match_meta_reg(key, (enum modify_reg)reg, tag_v->data, tag_m->data); } /** @@ -10719,6 +10795,22 @@ flow_dv_translate_item_meter_color(struct rte_eth_dev *dev, void *key, flow_dv_match_meta_reg(key, (enum modify_reg)reg, value, mask); } +static void +flow_dv_translate_item_aggr_affinity(void *key, + const struct rte_flow_item *item, + uint32_t key_type) +{ + const struct rte_flow_item_aggr_affinity *affinity_v; + const struct rte_flow_item_aggr_affinity *affinity_m; + void *misc_v; + + MLX5_ITEM_UPDATE(item, key_type, affinity_v, affinity_m, + &rte_flow_item_aggr_affinity_mask); + misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters); + MLX5_SET(fte_match_set_misc, misc_v, lag_rx_port_affinity, + affinity_v->affinity & affinity_m->affinity); +} + static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 }; #define HEADER_IS_ZERO(match_criteria, headers) \ @@ -13516,6 +13608,10 @@ flow_dv_translate_items(struct rte_eth_dev *dev, last_item = flow_dv_translate_item_integrity(items, wks, key_type); break; + case RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY: + flow_dv_translate_item_aggr_affinity(key, items, key_type); + last_item = MLX5_FLOW_ITEM_AGGR_AFFINITY; + break; default: break; } diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index a9c7045a3e..07766dd8f1 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -4723,6 +4723,20 @@ flow_hw_pattern_validate(struct rte_eth_dev *dev, "Unsupported meter color register"); break; } + case RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY: + { + if (!priv->sh->lag_rx_port_affinity_en) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Unsupported aggregated affinity with Older FW"); + if ((attr->transfer && priv->fdb_def_rule) || attr->egress) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Aggregated affinity item not supported" + " with egress or transfer" + " attribute"); + break; + } case RTE_FLOW_ITEM_TYPE_VOID: case RTE_FLOW_ITEM_TYPE_ETH: case RTE_FLOW_ITEM_TYPE_VLAN: -- 2.18.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 3/3] net/mlx5: enhance the Tx queue affinity 2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 2/3] net/mlx5: add aggregated affinity item support Jiawei Wang @ 2023-02-22 12:26 ` Jiawei Wang 2023-02-26 14:35 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Raslan Darawsheh 3 siblings, 0 replies; 9+ messages in thread From: Jiawei Wang @ 2023-02-22 12:26 UTC (permalink / raw) To: viacheslavo, orika, Matan Azrad; +Cc: dev, rasland The rte_eth_dev_map_aggr_tx_affinity() was introduced in ethdev lib, it was used to set the affinity value per Tx queue. This patch adds the MLX5 PMD support for two device ops: - map_aggr_tx_affinity - count_aggr_ports After maps a Tx queue with an aggregated port by call map_aggr_tx_affinity() and starts sending traffic, the MLX5 PMD updates TIS creation with tx_aggr_affinity value of Tx queue. TIS index 1 goes to first physical port, TIS index 2 goes to second physical port, and so on, TIS index 0 is reserved for default HW hash mode. Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com> --- drivers/common/mlx5/mlx5_prm.h | 8 ------ drivers/net/mlx5/mlx5.c | 49 +++++++++++++++++----------------- drivers/net/mlx5/mlx5_devx.c | 24 +++++++++-------- drivers/net/mlx5/mlx5_tx.h | 4 +++ drivers/net/mlx5/mlx5_txq.c | 38 ++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 43 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 26a1f0717d..2f5aeecaa9 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -2363,14 +2363,6 @@ struct mlx5_ifc_query_nic_vport_context_in_bits { u8 reserved_at_68[0x18]; }; -/* - * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa. - * Each TIS binds to one PF by setting lag_tx_port_affinity (>0). - * Once LAG enabled, we create multiple TISs and bind each one to - * different PFs, then TIS[i] gets affinity i+1 and goes to PF i+1. - */ -#define MLX5_IFC_LAG_MAP_TIS_AFFINITY(index, num) ((num) ? \ - (index) % (num) + 1 : 0) struct mlx5_ifc_tisc_bits { u8 strict_lag_tx_port_affinity[0x1]; u8 reserved_at_1[0x3]; diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index f55c1caca0..8c8f71d508 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1257,9 +1257,9 @@ mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev) static int mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh) { - int i; struct mlx5_devx_lag_context lag_ctx = { 0 }; struct mlx5_devx_tis_attr tis_attr = { 0 }; + int i; tis_attr.transport_domain = sh->td->id; if (sh->bond.n_port) { @@ -1273,35 +1273,30 @@ mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh) DRV_LOG(ERR, "Failed to query lag affinity."); return -1; } - if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) { - for (i = 0; i < sh->bond.n_port; i++) { - tis_attr.lag_tx_port_affinity = - MLX5_IFC_LAG_MAP_TIS_AFFINITY(i, - sh->bond.n_port); - sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, - &tis_attr); - if (!sh->tis[i]) { - DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device" - " %s.", i, sh->bond.n_port, - sh->ibdev_name); - return -1; - } - } + if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n", sh->bond.n_port, lag_ctx.tx_remap_affinity_1, lag_ctx.tx_remap_affinity_2); - return 0; - } - if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH) + else if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH) DRV_LOG(INFO, "Device %s enabled HW hash based LAG.", sh->ibdev_name); } - tis_attr.lag_tx_port_affinity = 0; - sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr); - if (!sh->tis[0]) { - DRV_LOG(ERR, "Failed to TIS 0 for bonding device" - " %s.", sh->ibdev_name); - return -1; + for (i = 0; i <= sh->bond.n_port; i++) { + /* + * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa. + * Each TIS binds to one PF by setting lag_tx_port_affinity (> 0). + * Once LAG enabled, we create multiple TISs and bind each one to + * different PFs, then TIS[i+1] gets affinity i+1 and goes to PF i+1. + * TIS[0] is reserved for HW Hash mode. + */ + tis_attr.lag_tx_port_affinity = i; + sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr); + if (!sh->tis[i]) { + DRV_LOG(ERR, "Failed to create TIS %d/%d for [bonding] device" + " %s.", i, sh->bond.n_port, + sh->ibdev_name); + return -1; + } } return 0; } @@ -2335,6 +2330,8 @@ const struct eth_dev_ops mlx5_dev_ops = { .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, .get_monitor_addr = mlx5_get_monitor_addr, + .count_aggr_ports = mlx5_count_aggr_ports, + .map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity, }; /* Available operations from secondary process. */ @@ -2358,6 +2355,8 @@ const struct eth_dev_ops mlx5_dev_sec_ops = { .tx_burst_mode_get = mlx5_tx_burst_mode_get, .get_module_info = mlx5_get_module_info, .get_module_eeprom = mlx5_get_module_eeprom, + .count_aggr_ports = mlx5_count_aggr_ports, + .map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity, }; /* Available operations in flow isolated mode. */ @@ -2422,6 +2421,8 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = { .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, .get_monitor_addr = mlx5_get_monitor_addr, + .count_aggr_ports = mlx5_count_aggr_ports, + .map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity, }; /** diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c index f6e1943fd7..d02cedb202 100644 --- a/drivers/net/mlx5/mlx5_devx.c +++ b/drivers/net/mlx5/mlx5_devx.c @@ -1190,17 +1190,19 @@ static uint32_t mlx5_get_txq_tis_num(struct rte_eth_dev *dev, uint16_t queue_idx) { struct mlx5_priv *priv = dev->data->dev_private; - int tis_idx; - - if (priv->sh->bond.n_port && priv->sh->lag.affinity_mode == - MLX5_LAG_MODE_TIS) { - tis_idx = (priv->lag_affinity_idx + queue_idx) % - priv->sh->bond.n_port; - DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.", - dev->data->port_id, queue_idx, tis_idx + 1, - priv->sh->lag.tx_remap_affinity[tis_idx]); - } else { - tis_idx = 0; + struct mlx5_txq_data *txq_data = (*priv->txqs)[queue_idx]; + int tis_idx = 0; + + if (priv->sh->bond.n_port) { + if (txq_data->tx_aggr_affinity) { + tis_idx = txq_data->tx_aggr_affinity; + } else if (priv->sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) { + tis_idx = (priv->lag_affinity_idx + queue_idx) % + priv->sh->bond.n_port + 1; + DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.", + dev->data->port_id, queue_idx, tis_idx, + priv->sh->lag.tx_remap_affinity[tis_idx - 1]); + } } MLX5_ASSERT(priv->sh->tis[tis_idx]); return priv->sh->tis[tis_idx]->id; diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h index a056be7ca8..d0c6303a2d 100644 --- a/drivers/net/mlx5/mlx5_tx.h +++ b/drivers/net/mlx5/mlx5_tx.h @@ -144,6 +144,7 @@ struct mlx5_txq_data { uint16_t inlen_send; /* Ordinary send data inline size. */ uint16_t inlen_empw; /* eMPW max packet size to inline. */ uint16_t inlen_mode; /* Minimal data length to inline. */ + uint8_t tx_aggr_affinity; /* TxQ affinity configuration. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ uint64_t offloads; /* Offloads for Tx Queue. */ struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ @@ -218,6 +219,9 @@ void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); +int mlx5_count_aggr_ports(struct rte_eth_dev *dev); +int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint8_t affinity); /* mlx5_tx.c */ diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 419e913559..1e0e61a620 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -1365,3 +1365,41 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev) ts_mask : 0; } } + +int mlx5_count_aggr_ports(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + + return priv->sh->bond.n_port; +} + +int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint8_t affinity) +{ + struct mlx5_txq_ctrl *txq_ctrl; + struct mlx5_txq_data *txq; + struct mlx5_priv *priv; + + priv = dev->data->dev_private; + txq = (*priv->txqs)[tx_queue_id]; + if (!txq) + return -1; + txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); + if (tx_queue_id >= priv->txqs_n) { + DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", + dev->data->port_id, tx_queue_id, priv->txqs_n); + rte_errno = EOVERFLOW; + return -rte_errno; + } + if (affinity > priv->num_lag_ports) { + DRV_LOG(ERR, "port %u unable to setup Tx queue index %u" + " affinity is %u exceeds the maximum %u", dev->data->port_id, + tx_queue_id, affinity, priv->num_lag_ports); + rte_errno = EINVAL; + return -rte_errno; + } + DRV_LOG(DEBUG, "port %u configuring queue %u for aggregated affinity %u", + dev->data->port_id, tx_queue_id, affinity); + txq_ctrl->txq.tx_aggr_affinity = affinity; + return 0; +} -- 2.18.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD 2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang ` (2 preceding siblings ...) 2023-02-22 12:26 ` [PATCH v2 3/3] net/mlx5: enhance the Tx queue affinity Jiawei Wang @ 2023-02-26 14:35 ` Raslan Darawsheh 3 siblings, 0 replies; 9+ messages in thread From: Raslan Darawsheh @ 2023-02-26 14:35 UTC (permalink / raw) To: Jiawei(Jonny) Wang, Slava Ovsiienko, Ori Kam; +Cc: dev Hi, > -----Original Message----- > From: Jiawei(Jonny) Wang <jiaweiw@nvidia.com> > Sent: Wednesday, February 22, 2023 2:26 PM > To: Slava Ovsiienko <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com> > Cc: dev@dpdk.org; Raslan Darawsheh <rasland@nvidia.com> > Subject: [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 > PMD > > This patch implements RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY support in > MLX5 PMD. > > This patch adds the MLX5 PMD support for two device ops: > - map_aggr_tx_affinity > - count_aggr_ports > > This patch series relies on the preceding RTE API: > http://patches.dpdk.org/project/dpdk/list/?series=27064 > > v2: > * update the PMD code based on the new RTE API. > > Jiawei Wang (3): > net/mlx5: add lag Rx port affinity in PRM > net/mlx5: add aggregated affinity item support > net/mlx5: enhance the Tx queue affinity > > doc/guides/nics/features/default.ini | 1 + > doc/guides/nics/features/mlx5.ini | 1 + > doc/guides/nics/mlx5.rst | 6 ++ > drivers/common/mlx5/mlx5_devx_cmds.c | 3 + > drivers/common/mlx5/mlx5_devx_cmds.h | 1 + > drivers/common/mlx5/mlx5_prm.h | 15 ++-- > drivers/net/mlx5/linux/mlx5_os.c | 6 ++ > drivers/net/mlx5/mlx5.c | 49 ++++++------- > drivers/net/mlx5/mlx5.h | 3 + > drivers/net/mlx5/mlx5_devx.c | 24 ++++--- > drivers/net/mlx5/mlx5_flow.h | 3 + > drivers/net/mlx5/mlx5_flow_dv.c | 100 > ++++++++++++++++++++++++++- > drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++ > drivers/net/mlx5/mlx5_tx.h | 4 ++ > drivers/net/mlx5/mlx5_txq.c | 38 ++++++++++ > 15 files changed, 221 insertions(+), 47 deletions(-) > > -- > 2.18.1 Series applied to next-net-mlx, Kindest regards, Raslan Darawsheh ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2023-02-26 14:35 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2023-02-03 5:21 [PATCH 0/3] add new PHY affinity support in MLX5 PMD Jiawei Wang 2023-02-03 5:21 ` [PATCH 1/3] drivers: add lag Rx port affinity in PRM Jiawei Wang 2023-02-03 5:21 ` [PATCH 2/3] net/mlx5: add PHY affinity item support Jiawei Wang 2023-02-03 5:21 ` [PATCH 3/3] drivers: enhance the Tx queue affinity Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 1/3] net/mlx5: add lag Rx port affinity in PRM Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 2/3] net/mlx5: add aggregated affinity item support Jiawei Wang 2023-02-22 12:26 ` [PATCH v2 3/3] net/mlx5: enhance the Tx queue affinity Jiawei Wang 2023-02-26 14:35 ` [PATCH v2 0/3] Add Tx queue mapping of aggregated ports in MLX5 PMD Raslan Darawsheh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).