DPDK patches and discussions
 help / color / mirror / Atom feed
From: Dariusz Sosnowski <dsosnowski@nvidia.com>
To: Viacheslav Ovsiienko <viacheslavo@nvidia.com>,
	Ori Kam <orika@nvidia.com>,  Suanming Mou <suanmingm@nvidia.com>,
	Matan Azrad <matan@nvidia.com>
Cc: <dev@dpdk.org>, Raslan Darawsheh <rasland@nvidia.com>,
	Bing Zhao <bingz@nvidia.com>
Subject: [PATCH v2 01/11] net/mlx5: allocate local DR rule action buffers
Date: Thu, 29 Feb 2024 12:51:46 +0100	[thread overview]
Message-ID: <20240229115157.201671-2-dsosnowski@nvidia.com> (raw)
In-Reply-To: <20240229115157.201671-1-dsosnowski@nvidia.com>

Goal of this is to remove the unnecessary copying of precalculated
mlx5dr_rule_action structures used to create HWS flow rules.

Before this patch, during template table creation an array of these
structures was calculated for each actions template used.
Each of these structures contained either full action definition or
partial (depends on mask configuration).
During flow creation, this array was copied to stack and later passed to
mlx5dr_rule_create().

This patch removes this copy by implementing the following:

- Allocate an array of mlx5dr_rule_action structures for each actions
  template and queue.
- Populate them with precalculated data from relevant actions templates.
- During flow creation, construction of unmasked actions works on an
  array dedicated for the specific queue and actions template.
- Pass this buffer to mlx5dr_rule_create directly.

Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
Acked-by: Ori Kam <orika@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    | 13 +++++++++
 drivers/net/mlx5/mlx5_flow_hw.c | 51 +++++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 7aa24f7c52..02af0a08fa 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1566,6 +1566,10 @@ struct mlx5_matcher_info {
 	uint32_t refcnt;
 };
 
+struct mlx5_dr_rule_action_container {
+	struct mlx5dr_rule_action acts[MLX5_HW_MAX_ACTS];
+} __rte_cache_aligned;
+
 struct rte_flow_template_table {
 	LIST_ENTRY(rte_flow_template_table) next;
 	struct mlx5_flow_group *grp; /* The group rte_flow_template_table uses. */
@@ -1585,6 +1589,15 @@ struct rte_flow_template_table {
 	uint32_t refcnt; /* Table reference counter. */
 	struct mlx5_tbl_multi_pattern_ctx mpctx;
 	struct mlx5dr_matcher_attr matcher_attr;
+	/**
+	 * Variable length array of containers containing precalculated templates of DR actions
+	 * arrays. This array is allocated at template table creation time and contains
+	 * one container per each queue, per each actions template.
+	 * Essentially rule_acts is a 2-dimensional array indexed with (AT index, queue) pair.
+	 * Each container will provide a local "queue buffer" to work on for flow creation
+	 * operations when using a given actions template.
+	 */
+	struct mlx5_dr_rule_action_container rule_acts[];
 };
 
 static __rte_always_inline struct mlx5dr_matcher *
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 9620b7f576..ef91a23a9b 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -2512,6 +2512,34 @@ __flow_hw_actions_translate(struct rte_eth_dev *dev,
 				  "fail to create rte table");
 }
 
+static __rte_always_inline struct mlx5dr_rule_action *
+flow_hw_get_dr_action_buffer(struct mlx5_priv *priv,
+			     struct rte_flow_template_table *table,
+			     uint8_t action_template_index,
+			     uint32_t queue)
+{
+	uint32_t offset = action_template_index * priv->nb_queue + queue;
+
+	return &table->rule_acts[offset].acts[0];
+}
+
+static void
+flow_hw_populate_rule_acts_caches(struct rte_eth_dev *dev,
+				  struct rte_flow_template_table *table,
+				  uint8_t at_idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	uint32_t q;
+
+	for (q = 0; q < priv->nb_queue; ++q) {
+		struct mlx5dr_rule_action *rule_acts =
+				flow_hw_get_dr_action_buffer(priv, table, at_idx, q);
+
+		rte_memcpy(rule_acts, table->ats[at_idx].acts.rule_acts,
+			   sizeof(table->ats[at_idx].acts.rule_acts));
+	}
+}
+
 /**
  * Translate rte_flow actions to DR action.
  *
@@ -2539,6 +2567,7 @@ flow_hw_actions_translate(struct rte_eth_dev *dev,
 						tbl->ats[i].action_template,
 						&tbl->mpctx, error))
 			goto err;
+		flow_hw_populate_rule_acts_caches(dev, tbl, i);
 	}
 	ret = mlx5_tbl_multi_pattern_process(dev, tbl, &tbl->mpctx.segments[0],
 					     rte_log2_u32(tbl->cfg.attr.nb_flows),
@@ -2928,7 +2957,6 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 	struct mlx5_aso_mtr *aso_mtr;
 	struct mlx5_multi_pattern_segment *mp_segment = NULL;
 
-	rte_memcpy(rule_acts, hw_acts->rule_acts, sizeof(*rule_acts) * at->dr_actions_num);
 	attr.group = table->grp->group_id;
 	ft_flag = mlx5_hw_act_flag[!!table->grp->group_id][table->type];
 	if (table->type == MLX5DR_TABLE_TYPE_FDB) {
@@ -3335,7 +3363,7 @@ flow_hw_async_flow_create(struct rte_eth_dev *dev,
 		.user_data = user_data,
 		.burst = attr->postpone,
 	};
-	struct mlx5dr_rule_action rule_acts[MLX5_HW_MAX_ACTS];
+	struct mlx5dr_rule_action *rule_acts;
 	struct rte_flow_hw *flow = NULL;
 	struct mlx5_hw_q_job *job = NULL;
 	const struct rte_flow_item *rule_items;
@@ -3358,6 +3386,7 @@ flow_hw_async_flow_create(struct rte_eth_dev *dev,
 	mlx5_ipool_malloc(table->resource, &res_idx);
 	if (!res_idx)
 		goto error;
+	rule_acts = flow_hw_get_dr_action_buffer(priv, table, action_template_index, queue);
 	/*
 	 * Set the table here in order to know the destination table
 	 * when free the flow afterward.
@@ -3479,7 +3508,7 @@ flow_hw_async_flow_create_by_index(struct rte_eth_dev *dev,
 		.user_data = user_data,
 		.burst = attr->postpone,
 	};
-	struct mlx5dr_rule_action rule_acts[MLX5_HW_MAX_ACTS];
+	struct mlx5dr_rule_action *rule_acts;
 	struct rte_flow_hw *flow = NULL;
 	struct mlx5_hw_q_job *job = NULL;
 	uint32_t flow_idx = 0;
@@ -3501,6 +3530,7 @@ flow_hw_async_flow_create_by_index(struct rte_eth_dev *dev,
 	mlx5_ipool_malloc(table->resource, &res_idx);
 	if (!res_idx)
 		goto error;
+	rule_acts = flow_hw_get_dr_action_buffer(priv, table, action_template_index, queue);
 	/*
 	 * Set the table here in order to know the destination table
 	 * when free the flow afterwards.
@@ -3610,7 +3640,7 @@ flow_hw_async_flow_update(struct rte_eth_dev *dev,
 		.user_data = user_data,
 		.burst = attr->postpone,
 	};
-	struct mlx5dr_rule_action rule_acts[MLX5_HW_MAX_ACTS];
+	struct mlx5dr_rule_action *rule_acts;
 	struct rte_flow_hw *of = (struct rte_flow_hw *)flow;
 	struct rte_flow_hw *nf;
 	struct rte_flow_template_table *table = of->table;
@@ -3628,6 +3658,7 @@ flow_hw_async_flow_update(struct rte_eth_dev *dev,
 		goto error;
 	nf = job->upd_flow;
 	memset(nf, 0, sizeof(struct rte_flow_hw));
+	rule_acts = flow_hw_get_dr_action_buffer(priv, table, action_template_index, queue);
 	/*
 	 * Set the table here in order to know the destination table
 	 * when free the flow afterwards.
@@ -4354,6 +4385,7 @@ mlx5_hw_build_template_table(struct rte_eth_dev *dev,
 			i++;
 			goto at_error;
 		}
+		flow_hw_populate_rule_acts_caches(dev, tbl, i);
 	}
 	tbl->nb_action_templates = nb_action_templates;
 	if (mlx5_is_multi_pattern_active(&tbl->mpctx)) {
@@ -4442,6 +4474,7 @@ flow_hw_table_create(struct rte_eth_dev *dev,
 	uint32_t i = 0, max_tpl = MLX5_HW_TBL_MAX_ITEM_TEMPLATE;
 	uint32_t nb_flows = rte_align32pow2(attr->nb_flows);
 	bool port_started = !!dev->data->dev_started;
+	size_t tbl_mem_size;
 	int err;
 
 	/* HWS layer accepts only 1 item template with root table. */
@@ -4461,8 +4494,16 @@ flow_hw_table_create(struct rte_eth_dev *dev,
 		rte_errno = EINVAL;
 		goto error;
 	}
+	/*
+	 * Amount of memory required for rte_flow_template_table struct:
+	 * - Size of the struct itself.
+	 * - VLA of DR rule action containers at the end =
+	 *     number of actions templates * number of queues * size of DR rule actions container.
+	 */
+	tbl_mem_size = sizeof(*tbl);
+	tbl_mem_size += nb_action_templates * priv->nb_queue * sizeof(tbl->rule_acts[0]);
 	/* Allocate the table memory. */
-	tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*tbl), 0, rte_socket_id());
+	tbl = mlx5_malloc(MLX5_MEM_ZERO, tbl_mem_size, RTE_CACHE_LINE_SIZE, rte_socket_id());
 	if (!tbl)
 		goto error;
 	tbl->cfg = *table_cfg;
-- 
2.39.2


  reply	other threads:[~2024-02-29 11:52 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-28 17:00 [PATCH 00/11] net/mlx5: flow insertion performance improvements Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 01/11] net/mlx5: allocate local DR rule action buffers Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 02/11] net/mlx5/hws: add check for matcher rule update support Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 03/11] net/mlx5/hws: add check if matcher contains complex rules Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 04/11] net/mlx5: skip the unneeded resource index allocation Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 05/11] net/mlx5: remove action params from job Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 06/11] net/mlx5: remove flow pattern " Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 07/11] net/mlx5: remove updated flow " Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 08/11] net/mlx5: use flow as operation container Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 09/11] net/mlx5: move rarely used flow fields outside Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 10/11] net/mlx5: reuse flow fields Dariusz Sosnowski
2024-02-28 17:00 ` [PATCH 11/11] net/mlx5: remove unneeded device status checking Dariusz Sosnowski
2024-02-29  8:52 ` [PATCH 00/11] net/mlx5: flow insertion performance improvements Ori Kam
2024-02-29 11:51 ` [PATCH v2 " Dariusz Sosnowski
2024-02-29 11:51   ` Dariusz Sosnowski [this message]
2024-02-29 11:51   ` [PATCH v2 02/11] net/mlx5/hws: add check for matcher rule update support Dariusz Sosnowski
2024-02-29 11:51   ` [PATCH v2 03/11] net/mlx5/hws: add check if matcher contains complex rules Dariusz Sosnowski
2024-02-29 11:51   ` [PATCH v2 04/11] net/mlx5: skip the unneeded resource index allocation Dariusz Sosnowski
2024-02-29 11:51   ` [PATCH v2 05/11] net/mlx5: remove action params from job Dariusz Sosnowski
2024-02-29 11:51   ` [PATCH v2 06/11] net/mlx5: remove flow pattern " Dariusz Sosnowski
2024-02-29 11:51   ` [PATCH v2 07/11] net/mlx5: remove updated flow " Dariusz Sosnowski
2024-02-29 11:51   ` [PATCH v2 08/11] net/mlx5: use flow as operation container Dariusz Sosnowski
2024-02-29 11:51   ` [PATCH v2 09/11] net/mlx5: move rarely used flow fields outside Dariusz Sosnowski
2024-02-29 11:51   ` [PATCH v2 10/11] net/mlx5: reuse flow fields Dariusz Sosnowski
2024-02-29 11:51   ` [PATCH v2 11/11] net/mlx5: remove unneeded device status checking Dariusz Sosnowski
2024-03-03 12:16   ` [PATCH v2 00/11] net/mlx5: flow insertion performance improvements Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240229115157.201671-2-dsosnowski@nvidia.com \
    --to=dsosnowski@nvidia.com \
    --cc=bingz@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=orika@nvidia.com \
    --cc=rasland@nvidia.com \
    --cc=suanmingm@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).