DPDK patches and discussions
 help / color / mirror / Atom feed
From: Xueming Li <xuemingl@nvidia.com>
To: <dev@dpdk.org>
Cc: <xuemingl@nvidia.com>, Matan Azrad <matan@nvidia.com>,
	"Viacheslav Ovsiienko" <viacheslavo@nvidia.com>
Subject: [dpdk-dev] [PATCH 8/8] net/mlx5: enable DevX Tx queue creation
Date: Mon, 27 Sep 2021 16:32:56 +0800	[thread overview]
Message-ID: <20210927083256.337450-9-xuemingl@nvidia.com> (raw)
In-Reply-To: <20210927083256.337450-1-xuemingl@nvidia.com>

Verbs API has limitation to support port number larger 255 by design. To
support more representors on a single Verbs device, must enable DevX
API.

DevX SQ was disabled since all representors need a FDB default miss flow
to redirect packets sent from CPU to peer port(SF, VF or HPF).

Kernel creates representor default miss flow automatically for Verbs QP.
For DevX sq, PMD must to create it manually.

The default miss root flow matches esw-manager vport and sqn. Since root
table flow created on kernel, vport redirect action is not supported, so
split the default miss flow into:
1. per eswitch FDB root flow that matches ESW manager vport ID, jump to
   group 1.
2. per sq FDB flow in group 1 that matches ESW manager vport ID and sqn,
   redirect packet to peer vport.

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 62 +-------------------------
 drivers/net/mlx5/mlx5.h          |  2 +
 drivers/net/mlx5/mlx5_devx.c     | 10 ++---
 drivers/net/mlx5/mlx5_devx.h     |  2 +
 drivers/net/mlx5/mlx5_flow.c     | 74 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_trigger.c  | 11 ++++-
 6 files changed, 94 insertions(+), 67 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index e9256ad5245..bcf040a8524 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -697,56 +697,6 @@ mlx5_init_once(void)
 	return ret;
 }
 
-/**
- * Create the Tx queue DevX/Verbs object.
- *
- * @param dev
- *   Pointer to Ethernet device.
- * @param idx
- *   Queue index in DPDK Tx queue array.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_os_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx)
-{
-	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
-	struct mlx5_txq_ctrl *txq_ctrl =
-			container_of(txq_data, struct mlx5_txq_ctrl, txq);
-
-	if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN)
-		return mlx5_txq_devx_obj_new(dev, idx);
-#ifdef HAVE_MLX5DV_DEVX_UAR_OFFSET
-	if (!priv->config.dv_esw_en)
-		return mlx5_txq_devx_obj_new(dev, idx);
-#endif
-	return mlx5_txq_ibv_obj_new(dev, idx);
-}
-
-/**
- * Release an Tx DevX/verbs queue object.
- *
- * @param txq_obj
- *   DevX/Verbs Tx queue object.
- */
-static void
-mlx5_os_txq_obj_release(struct mlx5_txq_obj *txq_obj)
-{
-	if (txq_obj->txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
-		mlx5_txq_devx_obj_release(txq_obj);
-		return;
-	}
-#ifdef HAVE_MLX5DV_DEVX_UAR_OFFSET
-	if (!txq_obj->txq_ctrl->priv->config.dv_esw_en) {
-		mlx5_txq_devx_obj_release(txq_obj);
-		return;
-	}
-#endif
-	mlx5_txq_ibv_obj_release(txq_obj);
-}
-
 /**
  * DV flow counter mode detect and config.
  *
@@ -1812,16 +1762,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 						ibv_obj_ops.drop_action_create;
 		priv->obj_ops.drop_action_destroy =
 						ibv_obj_ops.drop_action_destroy;
-#ifndef HAVE_MLX5DV_DEVX_UAR_OFFSET
-		priv->obj_ops.txq_obj_modify = ibv_obj_ops.txq_obj_modify;
-#else
-		if (config->dv_esw_en)
-			priv->obj_ops.txq_obj_modify =
-						ibv_obj_ops.txq_obj_modify;
-#endif
-		/* Use specific wrappers for Tx object. */
-		priv->obj_ops.txq_obj_new = mlx5_os_txq_obj_new;
-		priv->obj_ops.txq_obj_release = mlx5_os_txq_obj_release;
 		mlx5_queue_counter_id_prepare(eth_dev);
 		priv->obj_ops.lb_dummy_queue_create =
 					mlx5_rxq_ibv_obj_dummy_lb_create;
@@ -1832,7 +1772,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	}
 	if (config->tx_pp &&
 	    (priv->config.dv_esw_en ||
-	     priv->obj_ops.txq_obj_new != mlx5_os_txq_obj_new)) {
+	     priv->obj_ops.txq_obj_new != mlx5_txq_devx_obj_new)) {
 		/*
 		 * HAVE_MLX5DV_DEVX_UAR_OFFSET is required to support
 		 * packet pacing and already checked above.
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e02714e2319..63737a1dafe 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1698,6 +1698,8 @@ int mlx5_ctrl_flow(struct rte_eth_dev *dev,
 		   struct rte_flow_item_eth *eth_mask);
 int mlx5_flow_lacp_miss(struct rte_eth_dev *dev);
 struct rte_flow *mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev);
+uint32_t mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev,
+					    uint32_t txq);
 void mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
 				       uint64_t async_id, int status);
 void mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh);
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index a1db53577a2..a49602cb957 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -102,9 +102,9 @@ mlx5_devx_modify_rq(struct mlx5_rxq_obj *rxq_obj, uint8_t type)
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_devx_modify_sq(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
-		    uint8_t dev_port)
+int
+mlx5_txq_devx_modify(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
+		     uint8_t dev_port)
 {
 	struct mlx5_devx_modify_sq_attr msq_attr = { 0 };
 	int ret;
@@ -1118,7 +1118,7 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
 	*txq_data->qp_db = 0;
 	txq_data->qp_num_8s = txq_obj->sq_obj.sq->id << 8;
 	/* Change Send Queue state to Ready-to-Send. */
-	ret = mlx5_devx_modify_sq(txq_obj, MLX5_TXQ_MOD_RST2RDY, 0);
+	ret = mlx5_txq_devx_modify(txq_obj, MLX5_TXQ_MOD_RST2RDY, 0);
 	if (ret) {
 		rte_errno = errno;
 		DRV_LOG(ERR,
@@ -1187,7 +1187,7 @@ struct mlx5_obj_ops devx_obj_ops = {
 	.drop_action_create = mlx5_devx_drop_action_create,
 	.drop_action_destroy = mlx5_devx_drop_action_destroy,
 	.txq_obj_new = mlx5_txq_devx_obj_new,
-	.txq_obj_modify = mlx5_devx_modify_sq,
+	.txq_obj_modify = mlx5_txq_devx_modify,
 	.txq_obj_release = mlx5_txq_devx_obj_release,
 	.lb_dummy_queue_create = NULL,
 	.lb_dummy_queue_release = NULL,
diff --git a/drivers/net/mlx5/mlx5_devx.h b/drivers/net/mlx5/mlx5_devx.h
index bc8a8d6b73c..a95207a6b9a 100644
--- a/drivers/net/mlx5/mlx5_devx.h
+++ b/drivers/net/mlx5/mlx5_devx.h
@@ -8,6 +8,8 @@
 #include "mlx5.h"
 
 int mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx);
+int mlx5_txq_devx_modify(struct mlx5_txq_obj *obj,
+			 enum mlx5_txq_modify_type type, uint8_t dev_port);
 void mlx5_txq_devx_obj_release(struct mlx5_txq_obj *txq_obj);
 
 extern struct mlx5_obj_ops devx_obj_ops;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 8dc79340f2d..71933e03772 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -6571,6 +6571,80 @@ mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
 						   actions, false, &error);
 }
 
+/**
+ * Create a dedicated flow rule on e-switch table 1, matches ESW manager
+ * and sq number, directs all packets to peer vport.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param txq
+ *   Txq index.
+ *
+ * @return
+ *   Flow ID on success, 0 otherwise and rte_errno is set.
+ */
+uint32_t
+mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
+{
+	struct rte_flow_attr attr = {
+		.group = 0,
+		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
+		.ingress = 1,
+		.egress = 0,
+		.transfer = 1,
+	};
+	struct rte_flow_item_port_id port_spec = {
+		.id = MLX5_PORT_ESW_MGR,
+	};
+	struct mlx5_rte_flow_item_tx_queue txq_spec = {
+		.queue = txq,
+	};
+	struct rte_flow_item pattern[] = {
+		{
+			.type = RTE_FLOW_ITEM_TYPE_PORT_ID,
+			.spec = &port_spec,
+		},
+		{
+			.type = (enum rte_flow_item_type)
+				MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
+			.spec = &txq_spec,
+		},
+		{
+			.type = RTE_FLOW_ITEM_TYPE_END,
+		},
+	};
+	struct rte_flow_action_jump jump = {
+		.group = 1,
+	};
+	struct rte_flow_action_port_id port = {
+		.id = dev->data->port_id,
+	};
+	struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_JUMP,
+			.conf = &jump,
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		},
+	};
+	struct rte_flow_error error;
+
+	/*
+	 * Creates group 0, highest priority jump flow.
+	 * Matches txq to bypass kernel packets.
+	 */
+	if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
+			     false, &error) == 0)
+		return 0;
+	/* Create group 1, lowest priority redirect flow for txq. */
+	attr.group = 1;
+	actions[0].conf = &port;
+	actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
+	return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
+				actions, false, &error);
+}
+
 /**
  * Validate a flow supported by the NIC.
  *
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 54173bfacb2..42d8bb31128 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1255,9 +1255,18 @@ mlx5_traffic_enable(struct rte_eth_dev *dev)
 				goto error;
 			}
 		}
+		if ((priv->representor || priv->master) &&
+		    priv->config.dv_esw_en) {
+			if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
+				DRV_LOG(ERR,
+					"Port %u Tx queue %u SQ create representor devx default miss rule failed.",
+					dev->data->port_id, i);
+				goto error;
+			}
+		}
 		mlx5_txq_release(dev, i);
 	}
-	if (priv->config.dv_esw_en && !priv->config.vf && !priv->config.sf) {
+	if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
 		if (mlx5_flow_create_esw_table_zero_flow(dev))
 			priv->fdb_def_rule = 1;
 		else
-- 
2.33.0


  parent reply	other threads:[~2021-09-27  8:34 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-27  8:32 [dpdk-dev] [PATCH 0/8] net/mlx5: support more than 255 representors Xueming Li
2021-09-27  8:32 ` [dpdk-dev] [PATCH 1/8] common/mlx5: add netlink API to get RDMA port state Xueming Li
2021-09-27  8:32 ` [dpdk-dev] [PATCH 2/8] net/mlx5: use netlink when IB port greater than 255 Xueming Li
2021-09-27  8:32 ` [dpdk-dev] [PATCH 3/8] net/mlx5: improve Verbs flow priority discover for scalable Xueming Li
2021-09-27  8:32 ` [dpdk-dev] [PATCH 4/8] net/mlx5: check DevX to support more Verb ports Xueming Li
2021-09-27  8:32 ` [dpdk-dev] [PATCH 5/8] net/mlx5: support flow item port of switch manager Xueming Li
2021-09-27  8:32 ` [dpdk-dev] [PATCH 6/8] net/mlx5: supports flow item of normal Tx queue Xueming Li
2021-09-27  8:32 ` [dpdk-dev] [PATCH 7/8] net/mlx5: fix internal root table flow priroity Xueming Li
2021-09-27  8:32 ` Xueming Li [this message]
2021-10-16  8:07 ` [dpdk-dev] [PATCH v2 0/8] net/mlx5: support more than 255 representors Xueming Li
2021-10-16  8:07   ` [dpdk-dev] [PATCH v2 1/8] common/mlx5: add netlink API to get RDMA port state Xueming Li
2021-10-19  8:23     ` Slava Ovsiienko
2021-10-16  8:07   ` [dpdk-dev] [PATCH v2 2/8] net/mlx5: use netlink when IB port greater than 255 Xueming Li
2021-10-19  8:24     ` Slava Ovsiienko
2021-10-16  8:07   ` [dpdk-dev] [PATCH v2 3/8] net/mlx5: improve Verbs flow priority discover for scalable Xueming Li
2021-10-19  8:26     ` Slava Ovsiienko
2021-10-16  8:07   ` [dpdk-dev] [PATCH v2 4/8] net/mlx5: support E-Switch manager egress traffic match Xueming Li
2021-10-19  8:26     ` Slava Ovsiienko
2021-10-16  8:07   ` [dpdk-dev] [PATCH v2 5/8] net/mlx5: supports flow item of normal Tx queue Xueming Li
2021-10-19  8:27     ` Slava Ovsiienko
2021-10-16  8:07   ` [dpdk-dev] [PATCH v2 6/8] net/mlx5: fix internal root table flow priroity Xueming Li
2021-10-19  8:28     ` Slava Ovsiienko
2021-10-16  8:07   ` [dpdk-dev] [PATCH v2 7/8] net/mlx5: enable DevX Tx queue creation Xueming Li
2021-10-19  8:29     ` Slava Ovsiienko
2021-10-16  8:07   ` [dpdk-dev] [PATCH v2 8/8] net/mlx5: check DevX to support more Verbs ports Xueming Li
2021-10-19  8:30     ` Slava Ovsiienko
2021-10-19 10:34 ` [dpdk-dev] [PATCH v3 0/8] net/mlx5: support more than 255 representors Xueming Li
2021-10-19 10:34   ` [dpdk-dev] [PATCH v3 1/8] common/mlx5: add netlink API to get RDMA port state Xueming Li
2021-10-21 13:34     ` Ferruh Yigit
2021-10-19 10:34   ` [dpdk-dev] [PATCH v3 2/8] net/mlx5: use netlink when IB port greater than 255 Xueming Li
2021-10-19 10:34   ` [dpdk-dev] [PATCH v3 3/8] net/mlx5: improve Verbs flow priority discover for scalable Xueming Li
2021-10-19 10:34   ` [dpdk-dev] [PATCH v3 4/8] net/mlx5: support E-Switch manager egress traffic match Xueming Li
2021-10-19 10:34   ` [dpdk-dev] [PATCH v3 5/8] net/mlx5: supports flow item of normal Tx queue Xueming Li
2021-10-19 10:34   ` [dpdk-dev] [PATCH v3 6/8] net/mlx5: fix internal root table flow priroity Xueming Li
2021-10-19 10:35   ` [dpdk-dev] [PATCH v3 7/8] net/mlx5: enable DevX Tx queue creation Xueming Li
2021-10-19 10:35   ` [dpdk-dev] [PATCH v3 8/8] net/mlx5: check DevX to support more Verbs ports Xueming Li
2021-10-20 13:40   ` [dpdk-dev] [PATCH v3 0/8] net/mlx5: support more than 255 representors Raslan Darawsheh
2021-10-20 16:00     ` Xueming(Steven) Li
2021-10-22  9:11 ` [dpdk-dev] [PATCH v4 " Xueming Li
2021-10-22  9:11   ` [dpdk-dev] [PATCH v4 1/8] common/mlx5: add netlink API to get RDMA port state Xueming Li
2021-10-22  9:11   ` [dpdk-dev] [PATCH v4 2/8] net/mlx5: use netlink when IB port greater than 255 Xueming Li
2021-10-22  9:11   ` [dpdk-dev] [PATCH v4 3/8] net/mlx5: improve Verbs flow priority discover for scalable Xueming Li
2021-10-22  9:11   ` [dpdk-dev] [PATCH v4 4/8] net/mlx5: support E-Switch manager egress traffic match Xueming Li
2021-10-22  9:11   ` [dpdk-dev] [PATCH v4 5/8] net/mlx5: supports flow item of normal Tx queue Xueming Li
2021-10-22  9:11   ` [dpdk-dev] [PATCH v4 6/8] net/mlx5: fix internal root table flow priroity Xueming Li
2021-10-22  9:11   ` [dpdk-dev] [PATCH v4 7/8] net/mlx5: enable DevX Tx queue creation Xueming Li
2021-10-22  9:11   ` [dpdk-dev] [PATCH v4 8/8] net/mlx5: check DevX to support more Verbs ports Xueming Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210927083256.337450-9-xuemingl@nvidia.com \
    --to=xuemingl@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).