DPDK patches and discussions
 help / color / mirror / Atom feed
* [RFC PATCH] ethdev: advertise flow restore in mbuf
@ 2023-05-05 10:31 David Marchand
  2023-05-24 12:57 ` David Marchand
                   ` (5 more replies)
  0 siblings, 6 replies; 25+ messages in thread
From: David Marchand @ 2023-05-05 10:31 UTC (permalink / raw)
  To: dev
  Cc: thomas, i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Viacheslav Ovsiienko, Andrew Rybchenko, Ferruh Yigit, Ori Kam

As reported by Ilya [1], unconditionally calling
rte_flow_get_restore_info() impacts an application performance for drivers
that do not provide this ops.
It could also impact processing of packets that require no call to
rte_flow_get_restore_info() at all.

Advertise in mbuf (via a dynamic flag) whether the driver has more
metadata to provide via rte_flow_get_restore_info().
The application can then call it only when required.

Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Note: I did not test this RFC patch yet but I hope we can resume and
maybe conclude on the discussion for the tunnel offloading API.

---
 app/test-pmd/util.c              |  9 +++++----
 drivers/net/mlx5/linux/mlx5_os.c | 14 ++++++++++----
 drivers/net/mlx5/mlx5.h          |  3 ++-
 drivers/net/mlx5/mlx5_flow.c     | 26 ++++++++++++++++++++------
 drivers/net/mlx5/mlx5_rx.c       |  2 +-
 drivers/net/mlx5/mlx5_rx.h       |  1 +
 drivers/net/mlx5/mlx5_trigger.c  |  4 ++--
 drivers/net/sfc/sfc_dp.c         |  9 ++-------
 lib/ethdev/ethdev_driver.h       |  8 ++++++++
 lib/ethdev/rte_flow.c            | 29 +++++++++++++++++++++++++++++
 lib/ethdev/rte_flow.h            | 19 ++++++++++++++++++-
 lib/ethdev/version.map           |  4 ++++
 12 files changed, 102 insertions(+), 26 deletions(-)

diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index f9df5f69ef..5aa69ed545 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 	char print_buf[MAX_STRING_LEN];
 	size_t buf_size = MAX_STRING_LEN;
 	size_t cur_len = 0;
+	uint64_t restore_info_dynflag;
 
 	if (!nb_pkts)
 		return;
+	restore_info_dynflag = rte_flow_restore_info_dynflag();
 	MKDUMPSTR(print_buf, buf_size, cur_len,
 		  "port %u/queue %u: %s %u packets\n", port_id, queue,
 		  is_rx ? "received" : "sent", (unsigned int) nb_pkts);
 	for (i = 0; i < nb_pkts; i++) {
-		int ret;
 		struct rte_flow_error error;
 		struct rte_flow_restore_info info = { 0, };
 
 		mb = pkts[i];
+		ol_flags = mb->ol_flags;
 		if (rxq_share > 0)
 			MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ",
 				  mb->port);
@@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
 		packet_type = mb->packet_type;
 		is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
-		ret = rte_flow_get_restore_info(port_id, mb, &info, &error);
-		if (!ret) {
+		if ((ol_flags & restore_info_dynflag) != 0 &&
+				rte_flow_get_restore_info(port_id, mb, &info, &error) == 0) {
 			MKDUMPSTR(print_buf, buf_size, cur_len,
 				  "restore info:");
 			if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
@@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
 			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
-		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
 			MKDUMPSTR(print_buf, buf_size, cur_len,
 				  " - RSS hash=0x%x",
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 980234e2ac..e6e3784013 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -575,11 +575,17 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
 		goto error;
 	}
 #endif
-	if (!sh->tunnel_hub && sh->config.dv_miss_info)
+	if (!sh->tunnel_hub && sh->config.dv_miss_info) {
+		err = mlx5_flow_restore_info_register();
+		if (err) {
+			DRV_LOG(ERR, "Could not register mbuf dynflag for rte_flow_get_restore_info");
+			goto error;
+		}
 		err = mlx5_alloc_tunnel_hub(sh);
-	if (err) {
-		DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed err=%d", err);
-		goto error;
+		if (err) {
+			DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed err=%d", err);
+			goto error;
+		}
 	}
 	if (sh->config.reclaim_mode == MLX5_RCM_AGGR) {
 		mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 9eae692037..77cdc802da 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -2116,7 +2116,8 @@ int mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
 int mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev, struct rte_flow *flow,
 		FILE *file, struct rte_flow_error *error);
 #endif
-void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
+int mlx5_flow_restore_info_register(void);
+void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
 int mlx5_validate_action_ct(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index d0275fdd00..715b7d327d 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1779,6 +1779,20 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
 	priv->sh->shared_mark_enabled = 0;
 }
 
+static uint64_t mlx5_restore_info_dynflag;
+
+int
+mlx5_flow_restore_info_register(void)
+{
+	int err = 0;
+
+	if (mlx5_restore_info_dynflag == 0) {
+		if (rte_flow_restore_info_dynflag_register(&mlx5_restore_info_dynflag) < 0)
+			err = ENOMEM;
+	}
+	return err;
+}
+
 /**
  * Set the Rx queue dynamic metadata (mask and offset) for a flow
  *
@@ -1786,7 +1800,7 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
  *   Pointer to the Ethernet device structure.
  */
 void
-mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
+mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	unsigned int i;
@@ -1809,6 +1823,9 @@ mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
 		}
+		data->mark_flag = RTE_MBUF_F_RX_FDIR_ID;
+		if (is_tunnel_offload_active(dev))
+			data->mark_flag |= mlx5_restore_info_dynflag;
 	}
 }
 
@@ -11453,11 +11470,8 @@ mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
 	const struct mlx5_flow_tbl_data_entry *tble;
 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
 
-	if (!is_tunnel_offload_active(dev)) {
-		info->flags = 0;
-		return 0;
-	}
-
+	if ((ol_flags & mlx5_restore_info_dynflag) == 0)
+		goto err;
 	if ((ol_flags & mask) != mask)
 		goto err;
 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index a2be523e9e..71c4638251 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
 		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
 			pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
 			if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
-				pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
+				pkt->ol_flags |= rxq->mark_flag;
 				pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
 			}
 		}
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 52c35c83f8..3514edd84e 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -136,6 +136,7 @@ struct mlx5_rxq_data {
 	struct mlx5_uar_data uar_data; /* CQ doorbell. */
 	uint32_t cqn; /* CQ number. */
 	uint8_t cq_arm_sn; /* CQ arm seq number. */
+	uint64_t mark_flag; /* ol_flags to set with marks. */
 	uint32_t tunnel; /* Tunnel information. */
 	int timestamp_offset; /* Dynamic mbuf field for timestamp. */
 	uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index bbaa7d2aa0..7bdb897612 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 			dev->data->port_id);
 		goto error;
 	}
-	/* Set a mask and offset of dynamic metadata flows into Rx queues. */
-	mlx5_flow_rxq_dynf_metadata_set(dev);
+	/* Set dynamic fields and flags into Rx queues. */
+	mlx5_flow_rxq_dynf_set(dev);
 	/* Set flags and context to convert Rx timestamps. */
 	mlx5_rxq_timestamp_set(dev);
 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c
index 9f2093b353..8b2dbea325 100644
--- a/drivers/net/sfc/sfc_dp.c
+++ b/drivers/net/sfc/sfc_dp.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <errno.h>
 
+#include <ethdev_driver.h>
 #include <rte_log.h>
 #include <rte_mbuf_dyn.h>
 
@@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void)
 		.size = sizeof(uint8_t),
 		.align = __alignof__(uint8_t),
 	};
-	static const struct rte_mbuf_dynflag ft_ctx_id_valid = {
-		.name = "rte_net_sfc_dynflag_ft_ctx_id_valid",
-	};
 
 	int field_offset;
-	int flag;
 
 	SFC_GENERIC_LOG(INFO, "%s() entry", __func__);
 
@@ -156,15 +153,13 @@ sfc_dp_ft_ctx_id_register(void)
 		return -1;
 	}
 
-	flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid);
-	if (flag < 0) {
+	if (rte_flow_restore_info_dynflag_register(&sfc_dp_ft_ctx_id_valid) < 0) {
 		SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id dynflag",
 				__func__);
 		return -1;
 	}
 
 	sfc_dp_ft_ctx_id_offset = field_offset;
-	sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag;
 
 	SFC_GENERIC_LOG(INFO, "%s() done", __func__);
 
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 2c9d615fb5..6c17d84d1b 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -1949,6 +1949,14 @@ __rte_internal
 int
 rte_eth_ip_reassembly_dynfield_register(int *field_offset, int *flag);
 
+/**
+ * @internal
+ * Register mbuf dynamic flag for rte_flow_get_restore_info.
+ */
+__rte_internal
+int
+rte_flow_restore_info_dynflag_register(uint64_t *flag);
+
 
 /*
  * Legacy ethdev API used internally by drivers.
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index 69e6e749f7..10cd9d12ba 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -1401,6 +1401,35 @@ rte_flow_get_restore_info(uint16_t port_id,
 				  NULL, rte_strerror(ENOTSUP));
 }
 
+static struct {
+	const struct rte_mbuf_dynflag desc;
+	uint64_t value;
+} flow_restore_info_dynflag = {
+	.desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", },
+};
+
+uint64_t
+rte_flow_restore_info_dynflag(void)
+{
+	return flow_restore_info_dynflag.value;
+}
+
+int
+rte_flow_restore_info_dynflag_register(uint64_t *flag)
+{
+	if (flow_restore_info_dynflag.value == 0) {
+		int offset = rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc);
+
+		if (offset < 0)
+			return -1;
+		flow_restore_info_dynflag.value = RTE_BIT64(offset);
+	}
+	if (*flag)
+		*flag = rte_flow_restore_info_dynflag();
+
+	return 0;
+}
+
 int
 rte_flow_tunnel_action_decap_release(uint16_t port_id,
 				     struct rte_flow_action *actions,
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index 713ba8b65c..5ce2db4bbd 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -4918,7 +4918,24 @@ rte_flow_tunnel_match(uint16_t port_id,
 		      struct rte_flow_error *error);
 
 /**
- * Populate the current packet processing state, if exists, for the given mbuf.
+ * On reception of a mbuf from HW, a call to rte_flow_get_restore_info() may be
+ * required to retrieve some metadata.
+ * This function returns the associated mbuf ol_flags.
+ *
+ * Note: the dynamic flag is registered during the probing of the first device
+ * that requires it. If this function returns a non 0 value, this value won't
+ * change for the rest of the life of the application.
+ *
+ * @return
+ *   The offload flag indicating rte_flow_get_restore_info() must be called.
+ */
+__rte_experimental
+uint64_t
+rte_flow_restore_info_dynflag(void);
+
+/**
+ * If a mbuf contains the rte_flow_restore_info_dynflag() flag in ol_flags,
+ * populate the current packet processing state.
  *
  * One should negotiate tunnel metadata delivery from the NIC to the HW.
  * @see rte_eth_rx_metadata_negotiate()
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 357d1a88c0..bf5668e928 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -299,6 +299,9 @@ EXPERIMENTAL {
 	rte_flow_action_handle_query_update;
 	rte_flow_async_action_handle_query_update;
 	rte_flow_async_create_by_index;
+
+	# added in 23.07
+	rte_flow_restore_info_dynflag;
 };
 
 INTERNAL {
@@ -328,4 +331,5 @@ INTERNAL {
 	rte_eth_representor_id_get;
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
+	rte_flow_restore_info_dynflag_register;
 };
-- 
2.40.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-05-05 10:31 [RFC PATCH] ethdev: advertise flow restore in mbuf David Marchand
@ 2023-05-24 12:57 ` David Marchand
  2023-05-24 16:00 ` Ori Kam
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 25+ messages in thread
From: David Marchand @ 2023-05-24 12:57 UTC (permalink / raw)
  To: Ori Kam, thomas, Ferruh Yigit
  Cc: dev, i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Viacheslav Ovsiienko, Andrew Rybchenko

Hello guys,

On Fri, May 5, 2023 at 12:31 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
>
> Advertise in mbuf (via a dynamic flag) whether the driver has more
> metadata to provide via rte_flow_get_restore_info().
> The application can then call it only when required.
>
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Note: I did not test this RFC patch yet but I hope we can resume and
> maybe conclude on the discussion for the tunnel offloading API.

Any comment?
Thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-05-05 10:31 [RFC PATCH] ethdev: advertise flow restore in mbuf David Marchand
  2023-05-24 12:57 ` David Marchand
@ 2023-05-24 16:00 ` Ori Kam
  2023-05-24 18:44   ` David Marchand
  2023-06-14 16:46 ` Slava Ovsiienko
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 25+ messages in thread
From: Ori Kam @ 2023-05-24 16:00 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Slava Ovsiienko, Andrew Rybchenko, Ferruh Yigit

Hi David

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Friday, May 5, 2023 1:31 PM
> 
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Advertise in mbuf (via a dynamic flag) whether the driver has more
> metadata to provide via rte_flow_get_restore_info().
> The application can then call it only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> 7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Note: I did not test this RFC patch yet but I hope we can resume and
> maybe conclude on the discussion for the tunnel offloading API.
> 

I think your approach has a good base but what happens if 
it is not relevant for all flows? In this case your solution will not work.

In any case I think there are some issues with the implementation.


> ---
>  app/test-pmd/util.c              |  9 +++++----
>  drivers/net/mlx5/linux/mlx5_os.c | 14 ++++++++++----
>  drivers/net/mlx5/mlx5.h          |  3 ++-
>  drivers/net/mlx5/mlx5_flow.c     | 26 ++++++++++++++++++++------
>  drivers/net/mlx5/mlx5_rx.c       |  2 +-
>  drivers/net/mlx5/mlx5_rx.h       |  1 +
>  drivers/net/mlx5/mlx5_trigger.c  |  4 ++--
>  drivers/net/sfc/sfc_dp.c         |  9 ++-------
>  lib/ethdev/ethdev_driver.h       |  8 ++++++++
>  lib/ethdev/rte_flow.c            | 29 +++++++++++++++++++++++++++++
>  lib/ethdev/rte_flow.h            | 19 ++++++++++++++++++-
>  lib/ethdev/version.map           |  4 ++++
>  12 files changed, 102 insertions(+), 26 deletions(-)
> 
> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
> index f9df5f69ef..5aa69ed545 100644
> --- a/app/test-pmd/util.c
> +++ b/app/test-pmd/util.c
> @@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  	char print_buf[MAX_STRING_LEN];
>  	size_t buf_size = MAX_STRING_LEN;
>  	size_t cur_len = 0;
> +	uint64_t restore_info_dynflag;
> 
>  	if (!nb_pkts)
>  		return;
> +	restore_info_dynflag = rte_flow_restore_info_dynflag();
>  	MKDUMPSTR(print_buf, buf_size, cur_len,
>  		  "port %u/queue %u: %s %u packets\n", port_id, queue,
>  		  is_rx ? "received" : "sent", (unsigned int) nb_pkts);
>  	for (i = 0; i < nb_pkts; i++) {
> -		int ret;
>  		struct rte_flow_error error;
>  		struct rte_flow_restore_info info = { 0, };
> 
>  		mb = pkts[i];
> +		ol_flags = mb->ol_flags;
>  		if (rxq_share > 0)
>  			MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ",
>  				  mb->port);
> @@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
>  		packet_type = mb->packet_type;
>  		is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
> -		ret = rte_flow_get_restore_info(port_id, mb, &info, &error);
> -		if (!ret) {
> +		if ((ol_flags & restore_info_dynflag) != 0 &&
> +				rte_flow_get_restore_info(port_id, mb, &info,
> &error) == 0) {
>  			MKDUMPSTR(print_buf, buf_size, cur_len,
>  				  "restore info:");
>  			if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
> @@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  			  " - pool=%s - type=0x%04x - length=%u -
> nb_segs=%d",
>  			  mb->pool->name, eth_type, (unsigned int) mb-
> >pkt_len,
>  			  (int)mb->nb_segs);
> -		ol_flags = mb->ol_flags;
>  		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
>  			MKDUMPSTR(print_buf, buf_size, cur_len,
>  				  " - RSS hash=0x%x",
> diff --git a/drivers/net/mlx5/linux/mlx5_os.c
> b/drivers/net/mlx5/linux/mlx5_os.c
> index 980234e2ac..e6e3784013 100644
> --- a/drivers/net/mlx5/linux/mlx5_os.c
> +++ b/drivers/net/mlx5/linux/mlx5_os.c
> @@ -575,11 +575,17 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
>  		goto error;
>  	}
>  #endif
> -	if (!sh->tunnel_hub && sh->config.dv_miss_info)
> +	if (!sh->tunnel_hub && sh->config.dv_miss_info) {
> +		err = mlx5_flow_restore_info_register();
> +		if (err) {
> +			DRV_LOG(ERR, "Could not register mbuf dynflag for
> rte_flow_get_restore_info");
> +			goto error;
> +		}
>  		err = mlx5_alloc_tunnel_hub(sh);
> -	if (err) {
> -		DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed err=%d", err);
> -		goto error;
> +		if (err) {
> +			DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed
> err=%d", err);
> +			goto error;
> +		}
>  	}
>  	if (sh->config.reclaim_mode == MLX5_RCM_AGGR) {
>  		mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1);
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 9eae692037..77cdc802da 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -2116,7 +2116,8 @@ int mlx5_flow_query_counter(struct rte_eth_dev
> *dev, struct rte_flow *flow,
>  int mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev, struct rte_flow
> *flow,
>  		FILE *file, struct rte_flow_error *error);
>  #endif
> -void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
> +int mlx5_flow_restore_info_register(void);
> +void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev);
>  int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
>  			uint32_t nb_contexts, struct rte_flow_error *error);
>  int mlx5_validate_action_ct(struct rte_eth_dev *dev,
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index d0275fdd00..715b7d327d 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -1779,6 +1779,20 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
>  	priv->sh->shared_mark_enabled = 0;
>  }
> 
> +static uint64_t mlx5_restore_info_dynflag;
> +
> +int
> +mlx5_flow_restore_info_register(void)
> +{
> +	int err = 0;
> +
> +	if (mlx5_restore_info_dynflag == 0) {
> +		if
> (rte_flow_restore_info_dynflag_register(&mlx5_restore_info_dynflag) < 0)
> +			err = ENOMEM;
> +	}
> +	return err;
> +}
> +
>  /**
>   * Set the Rx queue dynamic metadata (mask and offset) for a flow
>   *
> @@ -1786,7 +1800,7 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
>   *   Pointer to the Ethernet device structure.
>   */
>  void
> -mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
> +mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)

Why did you change the name?

>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	unsigned int i;
> @@ -1809,6 +1823,9 @@ mlx5_flow_rxq_dynf_metadata_set(struct
> rte_eth_dev *dev)
>  			data->flow_meta_offset =
> rte_flow_dynf_metadata_offs;
>  			data->flow_meta_port_mask = priv->sh-
> >dv_meta_mask;
>  		}
> +		data->mark_flag = RTE_MBUF_F_RX_FDIR_ID;
> +		if (is_tunnel_offload_active(dev))
> +			data->mark_flag |= mlx5_restore_info_dynflag;
>  	}
>  }
> 
> @@ -11453,11 +11470,8 @@ mlx5_flow_tunnel_get_restore_info(struct
> rte_eth_dev *dev,
>  	const struct mlx5_flow_tbl_data_entry *tble;
>  	const uint64_t mask = RTE_MBUF_F_RX_FDIR |
> RTE_MBUF_F_RX_FDIR_ID;
> 
> -	if (!is_tunnel_offload_active(dev)) {
> -		info->flags = 0;
> -		return 0;
> -	}
> -
> +	if ((ol_flags & mlx5_restore_info_dynflag) == 0)
> +		goto err;
>  	if ((ol_flags & mask) != mask)
>  		goto err;
>  	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
> diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
> index a2be523e9e..71c4638251 100644
> --- a/drivers/net/mlx5/mlx5_rx.c
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct
> rte_mbuf *pkt,
>  		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
>  			pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
>  			if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
> -				pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
> +				pkt->ol_flags |= rxq->mark_flag;
>  				pkt->hash.fdir.hi =
> mlx5_flow_mark_get(mark);
>  			}
>  		}
> diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
> index 52c35c83f8..3514edd84e 100644
> --- a/drivers/net/mlx5/mlx5_rx.h
> +++ b/drivers/net/mlx5/mlx5_rx.h
> @@ -136,6 +136,7 @@ struct mlx5_rxq_data {
>  	struct mlx5_uar_data uar_data; /* CQ doorbell. */
>  	uint32_t cqn; /* CQ number. */
>  	uint8_t cq_arm_sn; /* CQ arm seq number. */
> +	uint64_t mark_flag; /* ol_flags to set with marks. */
>  	uint32_t tunnel; /* Tunnel information. */
>  	int timestamp_offset; /* Dynamic mbuf field for timestamp. */
>  	uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */
> diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
> index bbaa7d2aa0..7bdb897612 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  			dev->data->port_id);
>  		goto error;
>  	}
> -	/* Set a mask and offset of dynamic metadata flows into Rx queues.
> */
> -	mlx5_flow_rxq_dynf_metadata_set(dev);
> +	/* Set dynamic fields and flags into Rx queues. */
> +	mlx5_flow_rxq_dynf_set(dev);
>  	/* Set flags and context to convert Rx timestamps. */
>  	mlx5_rxq_timestamp_set(dev);
>  	/* Set a mask and offset of scheduling on timestamp into Tx queues.
> */
> diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c
> index 9f2093b353..8b2dbea325 100644
> --- a/drivers/net/sfc/sfc_dp.c
> +++ b/drivers/net/sfc/sfc_dp.c
> @@ -11,6 +11,7 @@
>  #include <string.h>
>  #include <errno.h>
> 
> +#include <ethdev_driver.h>
>  #include <rte_log.h>
>  #include <rte_mbuf_dyn.h>
> 
> @@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void)
>  		.size = sizeof(uint8_t),
>  		.align = __alignof__(uint8_t),
>  	};
> -	static const struct rte_mbuf_dynflag ft_ctx_id_valid = {
> -		.name = "rte_net_sfc_dynflag_ft_ctx_id_valid",
> -	};
> 
>  	int field_offset;
> -	int flag;
> 
>  	SFC_GENERIC_LOG(INFO, "%s() entry", __func__);
> 
> @@ -156,15 +153,13 @@ sfc_dp_ft_ctx_id_register(void)
>  		return -1;
>  	}
> 
> -	flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid);
> -	if (flag < 0) {
> +	if (rte_flow_restore_info_dynflag_register(&sfc_dp_ft_ctx_id_valid) <
> 0) {
>  		SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id
> dynflag",
>  				__func__);
>  		return -1;
>  	}
> 
>  	sfc_dp_ft_ctx_id_offset = field_offset;
> -	sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag;
> 
>  	SFC_GENERIC_LOG(INFO, "%s() done", __func__);
> 
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> index 2c9d615fb5..6c17d84d1b 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -1949,6 +1949,14 @@ __rte_internal
>  int
>  rte_eth_ip_reassembly_dynfield_register(int *field_offset, int *flag);
> 
> +/**
> + * @internal
> + * Register mbuf dynamic flag for rte_flow_get_restore_info.
> + */
> +__rte_internal
> +int
> +rte_flow_restore_info_dynflag_register(uint64_t *flag);
> +
> 
>  /*
>   * Legacy ethdev API used internally by drivers.
> diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
> index 69e6e749f7..10cd9d12ba 100644
> --- a/lib/ethdev/rte_flow.c
> +++ b/lib/ethdev/rte_flow.c
> @@ -1401,6 +1401,35 @@ rte_flow_get_restore_info(uint16_t port_id,
>  				  NULL, rte_strerror(ENOTSUP));
>  }
> 
> +static struct {
> +	const struct rte_mbuf_dynflag desc;
> +	uint64_t value;
> +} flow_restore_info_dynflag = {
> +	.desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", },
> +};

Why not have this structure in the register function?

> +
> +uint64_t
> +rte_flow_restore_info_dynflag(void)
> +{
> +	return flow_restore_info_dynflag.value;
> +}
> +
> +int
> +rte_flow_restore_info_dynflag_register(uint64_t *flag)
> +{
> +	if (flow_restore_info_dynflag.value == 0) {
> +		int offset =
> rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc);
> +
> +		if (offset < 0)
> +			return -1;
> +		flow_restore_info_dynflag.value = RTE_BIT64(offset);
> +	}
> +	if (*flag)
> +		*flag = rte_flow_restore_info_dynflag();
> +
> +	return 0;
> +}
> +
>  int
>  rte_flow_tunnel_action_decap_release(uint16_t port_id,
>  				     struct rte_flow_action *actions,
> diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
> index 713ba8b65c..5ce2db4bbd 100644
> --- a/lib/ethdev/rte_flow.h
> +++ b/lib/ethdev/rte_flow.h
> @@ -4918,7 +4918,24 @@ rte_flow_tunnel_match(uint16_t port_id,
>  		      struct rte_flow_error *error);
> 
>  /**
> - * Populate the current packet processing state, if exists, for the given mbuf.
> + * On reception of a mbuf from HW, a call to rte_flow_get_restore_info()
> may be
> + * required to retrieve some metadata.
> + * This function returns the associated mbuf ol_flags.
> + *
> + * Note: the dynamic flag is registered during the probing of the first device
> + * that requires it. If this function returns a non 0 value, this value won't
> + * change for the rest of the life of the application.
> + *
> + * @return
> + *   The offload flag indicating rte_flow_get_restore_info() must be called.
> + */
> +__rte_experimental
> +uint64_t
> +rte_flow_restore_info_dynflag(void);
> +
> +/**
> + * If a mbuf contains the rte_flow_restore_info_dynflag() flag in ol_flags,
> + * populate the current packet processing state.
>   *
>   * One should negotiate tunnel metadata delivery from the NIC to the HW.
>   * @see rte_eth_rx_metadata_negotiate()
> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
> index 357d1a88c0..bf5668e928 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -299,6 +299,9 @@ EXPERIMENTAL {
>  	rte_flow_action_handle_query_update;
>  	rte_flow_async_action_handle_query_update;
>  	rte_flow_async_create_by_index;
> +
> +	# added in 23.07
> +	rte_flow_restore_info_dynflag;
>  };
> 
>  INTERNAL {
> @@ -328,4 +331,5 @@ INTERNAL {
>  	rte_eth_representor_id_get;
>  	rte_eth_switch_domain_alloc;
>  	rte_eth_switch_domain_free;
> +	rte_flow_restore_info_dynflag_register;
>  };
> --
> 2.40.0

Best,
Ori


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-05-24 16:00 ` Ori Kam
@ 2023-05-24 18:44   ` David Marchand
  2023-06-01  8:48     ` David Marchand
  0 siblings, 1 reply; 25+ messages in thread
From: David Marchand @ 2023-05-24 18:44 UTC (permalink / raw)
  To: Ori Kam
  Cc: dev, NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Slava Ovsiienko, Andrew Rybchenko, Ferruh Yigit

Hello Ori,

On Wed, May 24, 2023 at 6:00 PM Ori Kam <orika@nvidia.com> wrote:
> > As reported by Ilya [1], unconditionally calling
> > rte_flow_get_restore_info() impacts an application performance for drivers
> > that do not provide this ops.
> > It could also impact processing of packets that require no call to
> > rte_flow_get_restore_info() at all.
> >
> > Advertise in mbuf (via a dynamic flag) whether the driver has more
> > metadata to provide via rte_flow_get_restore_info().
> > The application can then call it only when required.
> >
> > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> > 7f659bfa40de@ovn.org/
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
> > Note: I did not test this RFC patch yet but I hope we can resume and
> > maybe conclude on the discussion for the tunnel offloading API.
> >
>
> I think your approach has a good base but what happens if
> it is not relevant for all flows? In this case your solution will not work.

Sorry, I am not following.
Could you develop?


>
> In any case I think there are some issues with the implementation.
>
>
> > ---
> >  app/test-pmd/util.c              |  9 +++++----
> >  drivers/net/mlx5/linux/mlx5_os.c | 14 ++++++++++----
> >  drivers/net/mlx5/mlx5.h          |  3 ++-
> >  drivers/net/mlx5/mlx5_flow.c     | 26 ++++++++++++++++++++------
> >  drivers/net/mlx5/mlx5_rx.c       |  2 +-
> >  drivers/net/mlx5/mlx5_rx.h       |  1 +
> >  drivers/net/mlx5/mlx5_trigger.c  |  4 ++--
> >  drivers/net/sfc/sfc_dp.c         |  9 ++-------
> >  lib/ethdev/ethdev_driver.h       |  8 ++++++++
> >  lib/ethdev/rte_flow.c            | 29 +++++++++++++++++++++++++++++
> >  lib/ethdev/rte_flow.h            | 19 ++++++++++++++++++-
> >  lib/ethdev/version.map           |  4 ++++
> >  12 files changed, 102 insertions(+), 26 deletions(-)
> >
> > diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
> > index f9df5f69ef..5aa69ed545 100644
> > --- a/app/test-pmd/util.c
> > +++ b/app/test-pmd/util.c
> > @@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> > struct rte_mbuf *pkts[],
> >       char print_buf[MAX_STRING_LEN];
> >       size_t buf_size = MAX_STRING_LEN;
> >       size_t cur_len = 0;
> > +     uint64_t restore_info_dynflag;
> >
> >       if (!nb_pkts)
> >               return;
> > +     restore_info_dynflag = rte_flow_restore_info_dynflag();
> >       MKDUMPSTR(print_buf, buf_size, cur_len,
> >                 "port %u/queue %u: %s %u packets\n", port_id, queue,
> >                 is_rx ? "received" : "sent", (unsigned int) nb_pkts);
> >       for (i = 0; i < nb_pkts; i++) {
> > -             int ret;
> >               struct rte_flow_error error;
> >               struct rte_flow_restore_info info = { 0, };
> >
> >               mb = pkts[i];
> > +             ol_flags = mb->ol_flags;
> >               if (rxq_share > 0)
> >                       MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ",
> >                                 mb->port);
> > @@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> > struct rte_mbuf *pkts[],
> >               eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
> >               packet_type = mb->packet_type;
> >               is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
> > -             ret = rte_flow_get_restore_info(port_id, mb, &info, &error);
> > -             if (!ret) {
> > +             if ((ol_flags & restore_info_dynflag) != 0 &&
> > +                             rte_flow_get_restore_info(port_id, mb, &info,
> > &error) == 0) {
> >                       MKDUMPSTR(print_buf, buf_size, cur_len,
> >                                 "restore info:");
> >                       if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
> > @@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> > struct rte_mbuf *pkts[],
> >                         " - pool=%s - type=0x%04x - length=%u -
> > nb_segs=%d",
> >                         mb->pool->name, eth_type, (unsigned int) mb-
> > >pkt_len,
> >                         (int)mb->nb_segs);
> > -             ol_flags = mb->ol_flags;
> >               if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
> >                       MKDUMPSTR(print_buf, buf_size, cur_len,
> >                                 " - RSS hash=0x%x",
> > diff --git a/drivers/net/mlx5/linux/mlx5_os.c
> > b/drivers/net/mlx5/linux/mlx5_os.c
> > index 980234e2ac..e6e3784013 100644
> > --- a/drivers/net/mlx5/linux/mlx5_os.c
> > +++ b/drivers/net/mlx5/linux/mlx5_os.c
> > @@ -575,11 +575,17 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
> >               goto error;
> >       }
> >  #endif
> > -     if (!sh->tunnel_hub && sh->config.dv_miss_info)
> > +     if (!sh->tunnel_hub && sh->config.dv_miss_info) {
> > +             err = mlx5_flow_restore_info_register();
> > +             if (err) {
> > +                     DRV_LOG(ERR, "Could not register mbuf dynflag for
> > rte_flow_get_restore_info");
> > +                     goto error;
> > +             }
> >               err = mlx5_alloc_tunnel_hub(sh);
> > -     if (err) {
> > -             DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed err=%d", err);
> > -             goto error;
> > +             if (err) {
> > +                     DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed
> > err=%d", err);
> > +                     goto error;
> > +             }
> >       }
> >       if (sh->config.reclaim_mode == MLX5_RCM_AGGR) {
> >               mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1);
> > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> > index 9eae692037..77cdc802da 100644
> > --- a/drivers/net/mlx5/mlx5.h
> > +++ b/drivers/net/mlx5/mlx5.h
> > @@ -2116,7 +2116,8 @@ int mlx5_flow_query_counter(struct rte_eth_dev
> > *dev, struct rte_flow *flow,
> >  int mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev, struct rte_flow
> > *flow,
> >               FILE *file, struct rte_flow_error *error);
> >  #endif
> > -void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
> > +int mlx5_flow_restore_info_register(void);
> > +void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev);
> >  int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
> >                       uint32_t nb_contexts, struct rte_flow_error *error);
> >  int mlx5_validate_action_ct(struct rte_eth_dev *dev,
> > diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> > index d0275fdd00..715b7d327d 100644
> > --- a/drivers/net/mlx5/mlx5_flow.c
> > +++ b/drivers/net/mlx5/mlx5_flow.c
> > @@ -1779,6 +1779,20 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
> >       priv->sh->shared_mark_enabled = 0;
> >  }
> >
> > +static uint64_t mlx5_restore_info_dynflag;
> > +
> > +int
> > +mlx5_flow_restore_info_register(void)
> > +{
> > +     int err = 0;
> > +
> > +     if (mlx5_restore_info_dynflag == 0) {
> > +             if
> > (rte_flow_restore_info_dynflag_register(&mlx5_restore_info_dynflag) < 0)
> > +                     err = ENOMEM;
> > +     }
> > +     return err;
> > +}
> > +
> >  /**
> >   * Set the Rx queue dynamic metadata (mask and offset) for a flow
> >   *
> > @@ -1786,7 +1800,7 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
> >   *   Pointer to the Ethernet device structure.
> >   */
> >  void
> > -mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
> > +mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
>
> Why did you change the name?

This helper now prepares multiple dynamic flags.
If you have a better name.. ?


>
> >  {
> >       struct mlx5_priv *priv = dev->data->dev_private;
> >       unsigned int i;
> > @@ -1809,6 +1823,9 @@ mlx5_flow_rxq_dynf_metadata_set(struct
> > rte_eth_dev *dev)
> >                       data->flow_meta_offset =
> > rte_flow_dynf_metadata_offs;
> >                       data->flow_meta_port_mask = priv->sh-
> > >dv_meta_mask;
> >               }
> > +             data->mark_flag = RTE_MBUF_F_RX_FDIR_ID;
> > +             if (is_tunnel_offload_active(dev))
> > +                     data->mark_flag |= mlx5_restore_info_dynflag;
> >       }
> >  }
> >
> > @@ -11453,11 +11470,8 @@ mlx5_flow_tunnel_get_restore_info(struct
> > rte_eth_dev *dev,
> >       const struct mlx5_flow_tbl_data_entry *tble;
> >       const uint64_t mask = RTE_MBUF_F_RX_FDIR |
> > RTE_MBUF_F_RX_FDIR_ID;
> >
> > -     if (!is_tunnel_offload_active(dev)) {
> > -             info->flags = 0;
> > -             return 0;
> > -     }
> > -
> > +     if ((ol_flags & mlx5_restore_info_dynflag) == 0)
> > +             goto err;
> >       if ((ol_flags & mask) != mask)
> >               goto err;
> >       tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
> > diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
> > index a2be523e9e..71c4638251 100644
> > --- a/drivers/net/mlx5/mlx5_rx.c
> > +++ b/drivers/net/mlx5/mlx5_rx.c
> > @@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct
> > rte_mbuf *pkt,
> >               if (MLX5_FLOW_MARK_IS_VALID(mark)) {
> >                       pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
> >                       if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
> > -                             pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
> > +                             pkt->ol_flags |= rxq->mark_flag;
> >                               pkt->hash.fdir.hi =
> > mlx5_flow_mark_get(mark);
> >                       }
> >               }
> > diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
> > index 52c35c83f8..3514edd84e 100644
> > --- a/drivers/net/mlx5/mlx5_rx.h
> > +++ b/drivers/net/mlx5/mlx5_rx.h
> > @@ -136,6 +136,7 @@ struct mlx5_rxq_data {
> >       struct mlx5_uar_data uar_data; /* CQ doorbell. */
> >       uint32_t cqn; /* CQ number. */
> >       uint8_t cq_arm_sn; /* CQ arm seq number. */
> > +     uint64_t mark_flag; /* ol_flags to set with marks. */
> >       uint32_t tunnel; /* Tunnel information. */
> >       int timestamp_offset; /* Dynamic mbuf field for timestamp. */
> >       uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */
> > diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
> > index bbaa7d2aa0..7bdb897612 100644
> > --- a/drivers/net/mlx5/mlx5_trigger.c
> > +++ b/drivers/net/mlx5/mlx5_trigger.c
> > @@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
> >                       dev->data->port_id);
> >               goto error;
> >       }
> > -     /* Set a mask and offset of dynamic metadata flows into Rx queues.
> > */
> > -     mlx5_flow_rxq_dynf_metadata_set(dev);
> > +     /* Set dynamic fields and flags into Rx queues. */
> > +     mlx5_flow_rxq_dynf_set(dev);
> >       /* Set flags and context to convert Rx timestamps. */
> >       mlx5_rxq_timestamp_set(dev);
> >       /* Set a mask and offset of scheduling on timestamp into Tx queues.
> > */
> > diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c
> > index 9f2093b353..8b2dbea325 100644
> > --- a/drivers/net/sfc/sfc_dp.c
> > +++ b/drivers/net/sfc/sfc_dp.c
> > @@ -11,6 +11,7 @@
> >  #include <string.h>
> >  #include <errno.h>
> >
> > +#include <ethdev_driver.h>
> >  #include <rte_log.h>
> >  #include <rte_mbuf_dyn.h>
> >
> > @@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void)
> >               .size = sizeof(uint8_t),
> >               .align = __alignof__(uint8_t),
> >       };
> > -     static const struct rte_mbuf_dynflag ft_ctx_id_valid = {
> > -             .name = "rte_net_sfc_dynflag_ft_ctx_id_valid",
> > -     };
> >
> >       int field_offset;
> > -     int flag;
> >
> >       SFC_GENERIC_LOG(INFO, "%s() entry", __func__);
> >
> > @@ -156,15 +153,13 @@ sfc_dp_ft_ctx_id_register(void)
> >               return -1;
> >       }
> >
> > -     flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid);
> > -     if (flag < 0) {
> > +     if (rte_flow_restore_info_dynflag_register(&sfc_dp_ft_ctx_id_valid) <
> > 0) {
> >               SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id
> > dynflag",
> >                               __func__);
> >               return -1;
> >       }
> >
> >       sfc_dp_ft_ctx_id_offset = field_offset;
> > -     sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag;
> >
> >       SFC_GENERIC_LOG(INFO, "%s() done", __func__);
> >
> > diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> > index 2c9d615fb5..6c17d84d1b 100644
> > --- a/lib/ethdev/ethdev_driver.h
> > +++ b/lib/ethdev/ethdev_driver.h
> > @@ -1949,6 +1949,14 @@ __rte_internal
> >  int
> >  rte_eth_ip_reassembly_dynfield_register(int *field_offset, int *flag);
> >
> > +/**
> > + * @internal
> > + * Register mbuf dynamic flag for rte_flow_get_restore_info.
> > + */
> > +__rte_internal
> > +int
> > +rte_flow_restore_info_dynflag_register(uint64_t *flag);
> > +
> >
> >  /*
> >   * Legacy ethdev API used internally by drivers.
> > diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
> > index 69e6e749f7..10cd9d12ba 100644
> > --- a/lib/ethdev/rte_flow.c
> > +++ b/lib/ethdev/rte_flow.c
> > @@ -1401,6 +1401,35 @@ rte_flow_get_restore_info(uint16_t port_id,
> >                                 NULL, rte_strerror(ENOTSUP));
> >  }
> >
> > +static struct {
> > +     const struct rte_mbuf_dynflag desc;
> > +     uint64_t value;
> > +} flow_restore_info_dynflag = {
> > +     .desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", },
> > +};
>
> Why not have this structure in the register function?

The dynamic flag value is resolved once, at register time.
The accessor below uses this symbol to skip a lookup.


>
> > +
> > +uint64_t
> > +rte_flow_restore_info_dynflag(void)
> > +{
> > +     return flow_restore_info_dynflag.value;
> > +}
> > +
> > +int
> > +rte_flow_restore_info_dynflag_register(uint64_t *flag)
> > +{
> > +     if (flow_restore_info_dynflag.value == 0) {
> > +             int offset =
> > rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc);
> > +
> > +             if (offset < 0)
> > +                     return -1;
> > +             flow_restore_info_dynflag.value = RTE_BIT64(offset);
> > +     }
> > +     if (*flag)
> > +             *flag = rte_flow_restore_info_dynflag();
> > +
> > +     return 0;
> > +}
> > +
> >  int
> >  rte_flow_tunnel_action_decap_release(uint16_t port_id,
> >                                    struct rte_flow_action *actions,

-- 
David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-05-24 18:44   ` David Marchand
@ 2023-06-01  8:48     ` David Marchand
  2023-06-01  9:31       ` Ori Kam
  0 siblings, 1 reply; 25+ messages in thread
From: David Marchand @ 2023-06-01  8:48 UTC (permalink / raw)
  To: Ori Kam
  Cc: dev, NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Slava Ovsiienko, Andrew Rybchenko, Ferruh Yigit

On Wed, May 24, 2023 at 8:44 PM David Marchand
<david.marchand@redhat.com> wrote:
> On Wed, May 24, 2023 at 6:00 PM Ori Kam <orika@nvidia.com> wrote:
> > > As reported by Ilya [1], unconditionally calling
> > > rte_flow_get_restore_info() impacts an application performance for drivers
> > > that do not provide this ops.
> > > It could also impact processing of packets that require no call to
> > > rte_flow_get_restore_info() at all.
> > >
> > > Advertise in mbuf (via a dynamic flag) whether the driver has more
> > > metadata to provide via rte_flow_get_restore_info().
> > > The application can then call it only when required.
> > >
> > > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> > > 7f659bfa40de@ovn.org/
> > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > ---
> > > Note: I did not test this RFC patch yet but I hope we can resume and
> > > maybe conclude on the discussion for the tunnel offloading API.
> > >
> >
> > I think your approach has a good base but what happens if
> > it is not relevant for all flows? In this case your solution will not work.
>
> Sorry, I am not following.
> Could you develop?

I still don't get your comment, could you give an example/usecase
where this approach can't work?
Thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-06-01  8:48     ` David Marchand
@ 2023-06-01  9:31       ` Ori Kam
  2023-06-01  9:43         ` David Marchand
  0 siblings, 1 reply; 25+ messages in thread
From: Ori Kam @ 2023-06-01  9:31 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Slava Ovsiienko, Andrew Rybchenko, Ferruh Yigit



> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Thursday, June 1, 2023 11:48 AM
> 
> On Wed, May 24, 2023 at 8:44 PM David Marchand
> <david.marchand@redhat.com> wrote:
> > On Wed, May 24, 2023 at 6:00 PM Ori Kam <orika@nvidia.com> wrote:
> > > > As reported by Ilya [1], unconditionally calling
> > > > rte_flow_get_restore_info() impacts an application performance for
> drivers
> > > > that do not provide this ops.
> > > > It could also impact processing of packets that require no call to
> > > > rte_flow_get_restore_info() at all.
> > > >
> > > > Advertise in mbuf (via a dynamic flag) whether the driver has more
> > > > metadata to provide via rte_flow_get_restore_info().
> > > > The application can then call it only when required.
> > > >
> > > > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> > > > 7f659bfa40de@ovn.org/
> > > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > > ---
> > > > Note: I did not test this RFC patch yet but I hope we can resume and
> > > > maybe conclude on the discussion for the tunnel offloading API.
> > > >
> > >
> > > I think your approach has a good base but what happens if
> > > it is not relevant for all flows? In this case your solution will not work.
> >
> > Sorry, I am not following.
> > Could you develop?
> 
> I still don't get your comment, could you give an example/usecase
> where this approach can't work?
> Thanks.
> 
I'm think of a use case that some flows have the restore info, while
other don't for example, we get arp packets or some packets that
are not tunneled, and we also get tunneled packets.

Or for example PMD supports this flag but the application didn't offload such a rule yet.

In those cases application will be slow even if he didn't offload the rules,
I assume we can say that if application wants to use this he should know
that other packets will have some performance issues.

From my point of view if application requested the tunnel offload it should
always check this function.

> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-06-01  9:31       ` Ori Kam
@ 2023-06-01  9:43         ` David Marchand
  2023-06-01 10:02           ` Ori Kam
  0 siblings, 1 reply; 25+ messages in thread
From: David Marchand @ 2023-06-01  9:43 UTC (permalink / raw)
  To: Ori Kam
  Cc: dev, NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Slava Ovsiienko, Andrew Rybchenko, Ferruh Yigit

On Thu, Jun 1, 2023 at 11:31 AM Ori Kam <orika@nvidia.com> wrote:
>
>
>
> > -----Original Message-----
> > From: David Marchand <david.marchand@redhat.com>
> > Sent: Thursday, June 1, 2023 11:48 AM
> >
> > On Wed, May 24, 2023 at 8:44 PM David Marchand
> > <david.marchand@redhat.com> wrote:
> > > On Wed, May 24, 2023 at 6:00 PM Ori Kam <orika@nvidia.com> wrote:
> > > > > As reported by Ilya [1], unconditionally calling
> > > > > rte_flow_get_restore_info() impacts an application performance for
> > drivers
> > > > > that do not provide this ops.
> > > > > It could also impact processing of packets that require no call to
> > > > > rte_flow_get_restore_info() at all.
> > > > >
> > > > > Advertise in mbuf (via a dynamic flag) whether the driver has more
> > > > > metadata to provide via rte_flow_get_restore_info().
> > > > > The application can then call it only when required.
> > > > >
> > > > > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> > > > > 7f659bfa40de@ovn.org/
> > > > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > > > ---
> > > > > Note: I did not test this RFC patch yet but I hope we can resume and
> > > > > maybe conclude on the discussion for the tunnel offloading API.
> > > > >
> > > >
> > > > I think your approach has a good base but what happens if
> > > > it is not relevant for all flows? In this case your solution will not work.
> > >
> > > Sorry, I am not following.
> > > Could you develop?
> >
> > I still don't get your comment, could you give an example/usecase
> > where this approach can't work?
> > Thanks.
> >
> I'm think of a use case that some flows have the restore info, while
> other don't for example, we get arp packets or some packets that
> are not tunneled, and we also get tunneled packets.
>
> Or for example PMD supports this flag but the application didn't offload such a rule yet.

Again, maybe I missed something, but my proposal is for a *per packet*
report from the driver.
I am not for a global driver capability, if this is what you have in mind.


>
> In those cases application will be slow even if he didn't offload the rules,
> I assume we can say that if application wants to use this he should know
> that other packets will have some performance issues.
>
> From my point of view if application requested the tunnel offload it should
> always check this function.

With a per packet flag, the application only calls restore_info when
such tunnel offload rules have been requested, and only for packets
that require it.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-06-01  9:43         ` David Marchand
@ 2023-06-01 10:02           ` Ori Kam
  2023-06-01 10:03             ` David Marchand
  0 siblings, 1 reply; 25+ messages in thread
From: Ori Kam @ 2023-06-01 10:02 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Slava Ovsiienko, Andrew Rybchenko, Ferruh Yigit

Hi David,

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Thursday, June 1, 2023 12:43 PM
> Subject: Re: [RFC PATCH] ethdev: advertise flow restore in mbuf
> 
> On Thu, Jun 1, 2023 at 11:31 AM Ori Kam <orika@nvidia.com> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: David Marchand <david.marchand@redhat.com>
> > > Sent: Thursday, June 1, 2023 11:48 AM
> > >
> > > On Wed, May 24, 2023 at 8:44 PM David Marchand
> > > <david.marchand@redhat.com> wrote:
> > > > On Wed, May 24, 2023 at 6:00 PM Ori Kam <orika@nvidia.com> wrote:
> > > > > > As reported by Ilya [1], unconditionally calling
> > > > > > rte_flow_get_restore_info() impacts an application performance for
> > > drivers
> > > > > > that do not provide this ops.
> > > > > > It could also impact processing of packets that require no call to
> > > > > > rte_flow_get_restore_info() at all.
> > > > > >
> > > > > > Advertise in mbuf (via a dynamic flag) whether the driver has more
> > > > > > metadata to provide via rte_flow_get_restore_info().
> > > > > > The application can then call it only when required.
> > > > > >
> > > > > > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> > > > > > 7f659bfa40de@ovn.org/
> > > > > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > > > > ---
> > > > > > Note: I did not test this RFC patch yet but I hope we can resume
> and
> > > > > > maybe conclude on the discussion for the tunnel offloading API.
> > > > > >
> > > > >
> > > > > I think your approach has a good base but what happens if
> > > > > it is not relevant for all flows? In this case your solution will not work.
> > > >
> > > > Sorry, I am not following.
> > > > Could you develop?
> > >
> > > I still don't get your comment, could you give an example/usecase
> > > where this approach can't work?
> > > Thanks.
> > >
> > I'm think of a use case that some flows have the restore info, while
> > other don't for example, we get arp packets or some packets that
> > are not tunneled, and we also get tunneled packets.
> >
> > Or for example PMD supports this flag but the application didn't offload
> such a rule yet.
> 
> Again, maybe I missed something, but my proposal is for a *per packet*
> report from the driver.
> I am not for a global driver capability, if this is what you have in mind.
> 
My bad, per packet solves the issue I was talking about, but it makes it worse.
This means that PMD needs to add logic in it's datapath. This may affect all
traffic. 

> 
> >
> > In those cases application will be slow even if he didn't offload the rules,
> > I assume we can say that if application wants to use this he should know
> > that other packets will have some performance issues.
> >
> > From my point of view if application requested the tunnel offload it should
> > always check this function.
> 
> With a per packet flag, the application only calls restore_info when
> such tunnel offload rules have been requested, and only for packets
> that require it.
> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-06-01 10:02           ` Ori Kam
@ 2023-06-01 10:03             ` David Marchand
  0 siblings, 0 replies; 25+ messages in thread
From: David Marchand @ 2023-06-01 10:03 UTC (permalink / raw)
  To: Ori Kam
  Cc: dev, NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Slava Ovsiienko, Andrew Rybchenko, Ferruh Yigit

On Thu, Jun 1, 2023 at 12:02 PM Ori Kam <orika@nvidia.com> wrote:
>
> Hi David,
>
> > -----Original Message-----
> > From: David Marchand <david.marchand@redhat.com>
> > Sent: Thursday, June 1, 2023 12:43 PM
> > Subject: Re: [RFC PATCH] ethdev: advertise flow restore in mbuf
> >
> > On Thu, Jun 1, 2023 at 11:31 AM Ori Kam <orika@nvidia.com> wrote:
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: David Marchand <david.marchand@redhat.com>
> > > > Sent: Thursday, June 1, 2023 11:48 AM
> > > >
> > > > On Wed, May 24, 2023 at 8:44 PM David Marchand
> > > > <david.marchand@redhat.com> wrote:
> > > > > On Wed, May 24, 2023 at 6:00 PM Ori Kam <orika@nvidia.com> wrote:
> > > > > > > As reported by Ilya [1], unconditionally calling
> > > > > > > rte_flow_get_restore_info() impacts an application performance for
> > > > drivers
> > > > > > > that do not provide this ops.
> > > > > > > It could also impact processing of packets that require no call to
> > > > > > > rte_flow_get_restore_info() at all.
> > > > > > >
> > > > > > > Advertise in mbuf (via a dynamic flag) whether the driver has more
> > > > > > > metadata to provide via rte_flow_get_restore_info().
> > > > > > > The application can then call it only when required.
> > > > > > >
> > > > > > > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> > > > > > > 7f659bfa40de@ovn.org/
> > > > > > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > > > > > ---
> > > > > > > Note: I did not test this RFC patch yet but I hope we can resume
> > and
> > > > > > > maybe conclude on the discussion for the tunnel offloading API.
> > > > > > >
> > > > > >
> > > > > > I think your approach has a good base but what happens if
> > > > > > it is not relevant for all flows? In this case your solution will not work.
> > > > >
> > > > > Sorry, I am not following.
> > > > > Could you develop?
> > > >
> > > > I still don't get your comment, could you give an example/usecase
> > > > where this approach can't work?
> > > > Thanks.
> > > >
> > > I'm think of a use case that some flows have the restore info, while
> > > other don't for example, we get arp packets or some packets that
> > > are not tunneled, and we also get tunneled packets.
> > >
> > > Or for example PMD supports this flag but the application didn't offload
> > such a rule yet.
> >
> > Again, maybe I missed something, but my proposal is for a *per packet*
> > report from the driver.
> > I am not for a global driver capability, if this is what you have in mind.
> >
> My bad, per packet solves the issue I was talking about, but it makes it worse.
> This means that PMD needs to add logic in it's datapath. This may affect all
> traffic.

Well, look at the patch then, I already updated the driver.
I don't expect much of an impact with the current code though.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-05-05 10:31 [RFC PATCH] ethdev: advertise flow restore in mbuf David Marchand
  2023-05-24 12:57 ` David Marchand
  2023-05-24 16:00 ` Ori Kam
@ 2023-06-14 16:46 ` Slava Ovsiienko
  2023-06-15  8:00   ` David Marchand
  2023-06-15 13:47 ` [RFC PATCH v2] " David Marchand
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 25+ messages in thread
From: Slava Ovsiienko @ 2023-06-14 16:46 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Andrew Rybchenko, Ferruh Yigit, Ori Kam

Hi, David

It looks like a good application datapath optimization, as for me.
But I see some concerns:

1. Are we sure the PMD should register the flag, not application?
IIRC, usually application registers needed flags/fields and PMDs just follow.

+       if (!sh->tunnel_hub && sh->config.dv_miss_info) {
+               err = mlx5_flow_restore_info_register();
+               if (err) {
+                       DRV_LOG(ERR, "Could not register mbuf dynflag for rte_flow_get_restore_info");
+                       goto error;
+               }

2. This might have some general mlx5 Rx datapath performance impact (very minor though)
It introduces extra memory access (instead of immed value). We should test thoroughly.
 @@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
                if (MLX5_FLOW_MARK_IS_VALID(mark)) {
                        pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
                        if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
-                               pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
+                               pkt->ol_flags |= rxq->mark_flag;
                                pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
                        }
                }

3. RTE_MBUF_F_RX_FDIR_ID is also handled in vectorized rx_burst() routines.
Please, see:
 - mlx5_rxtx_vec_altivec.h
 - mlx5_rxtx_vec_neon.h
 - mlx5_rxtx_vec_sse.h
This code must be updated, and it also might have some general (regardless of using tunnel offload 
- for all cases) performance impact.

With best regards,
Slava

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Friday, May 5, 2023 1:31 PM
> To: dev@dpdk.org
> Cc: NBU-Contact-Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>;
> i.maximets@ovn.org; Aman Singh <aman.deep.singh@intel.com>; Yuying
> Zhang <yuying.zhang@intel.com>; Matan Azrad <matan@nvidia.com>; Slava
> Ovsiienko <viacheslavo@nvidia.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Ferruh Yigit <ferruh.yigit@amd.com>; Ori
> Kam <orika@nvidia.com>
> Subject: [RFC PATCH] ethdev: advertise flow restore in mbuf
> 
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Advertise in mbuf (via a dynamic flag) whether the driver has more metadata
> to provide via rte_flow_get_restore_info().
> The application can then call it only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> 7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Note: I did not test this RFC patch yet but I hope we can resume and maybe
> conclude on the discussion for the tunnel offloading API.
> 
> ---
>  app/test-pmd/util.c              |  9 +++++----
>  drivers/net/mlx5/linux/mlx5_os.c | 14 ++++++++++----
>  drivers/net/mlx5/mlx5.h          |  3 ++-
>  drivers/net/mlx5/mlx5_flow.c     | 26 ++++++++++++++++++++------
>  drivers/net/mlx5/mlx5_rx.c       |  2 +-
>  drivers/net/mlx5/mlx5_rx.h       |  1 +
>  drivers/net/mlx5/mlx5_trigger.c  |  4 ++--
>  drivers/net/sfc/sfc_dp.c         |  9 ++-------
>  lib/ethdev/ethdev_driver.h       |  8 ++++++++
>  lib/ethdev/rte_flow.c            | 29 +++++++++++++++++++++++++++++
>  lib/ethdev/rte_flow.h            | 19 ++++++++++++++++++-
>  lib/ethdev/version.map           |  4 ++++
>  12 files changed, 102 insertions(+), 26 deletions(-)
> 
> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
> f9df5f69ef..5aa69ed545 100644
> --- a/app/test-pmd/util.c
> +++ b/app/test-pmd/util.c
> @@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  	char print_buf[MAX_STRING_LEN];
>  	size_t buf_size = MAX_STRING_LEN;
>  	size_t cur_len = 0;
> +	uint64_t restore_info_dynflag;
> 
>  	if (!nb_pkts)
>  		return;
> +	restore_info_dynflag = rte_flow_restore_info_dynflag();
>  	MKDUMPSTR(print_buf, buf_size, cur_len,
>  		  "port %u/queue %u: %s %u packets\n", port_id, queue,
>  		  is_rx ? "received" : "sent", (unsigned int) nb_pkts);
>  	for (i = 0; i < nb_pkts; i++) {
> -		int ret;
>  		struct rte_flow_error error;
>  		struct rte_flow_restore_info info = { 0, };
> 
>  		mb = pkts[i];
> +		ol_flags = mb->ol_flags;
>  		if (rxq_share > 0)
>  			MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ",
>  				  mb->port);
> @@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
>  		packet_type = mb->packet_type;
>  		is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
> -		ret = rte_flow_get_restore_info(port_id, mb, &info, &error);
> -		if (!ret) {
> +		if ((ol_flags & restore_info_dynflag) != 0 &&
> +				rte_flow_get_restore_info(port_id, mb, &info,
> &error) == 0) {
>  			MKDUMPSTR(print_buf, buf_size, cur_len,
>  				  "restore info:");
>  			if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
> @@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  			  " - pool=%s - type=0x%04x - length=%u -
> nb_segs=%d",
>  			  mb->pool->name, eth_type, (unsigned int) mb-
> >pkt_len,
>  			  (int)mb->nb_segs);
> -		ol_flags = mb->ol_flags;
>  		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
>  			MKDUMPSTR(print_buf, buf_size, cur_len,
>  				  " - RSS hash=0x%x",
> diff --git a/drivers/net/mlx5/linux/mlx5_os.c
> b/drivers/net/mlx5/linux/mlx5_os.c
> index 980234e2ac..e6e3784013 100644
> --- a/drivers/net/mlx5/linux/mlx5_os.c
> +++ b/drivers/net/mlx5/linux/mlx5_os.c
> @@ -575,11 +575,17 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
>  		goto error;
>  	}
>  #endif
> -	if (!sh->tunnel_hub && sh->config.dv_miss_info)
> +	if (!sh->tunnel_hub && sh->config.dv_miss_info) {
> +		err = mlx5_flow_restore_info_register();
> +		if (err) {
> +			DRV_LOG(ERR, "Could not register mbuf dynflag for
> rte_flow_get_restore_info");
> +			goto error;
> +		}
>  		err = mlx5_alloc_tunnel_hub(sh);
> -	if (err) {
> -		DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed err=%d", err);
> -		goto error;
> +		if (err) {
> +			DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed
> err=%d", err);
> +			goto error;
> +		}
>  	}
>  	if (sh->config.reclaim_mode == MLX5_RCM_AGGR) {
>  		mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1);
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 9eae692037..77cdc802da 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -2116,7 +2116,8 @@ int mlx5_flow_query_counter(struct rte_eth_dev
> *dev, struct rte_flow *flow,  int mlx5_flow_dev_dump_ipool(struct
> rte_eth_dev *dev, struct rte_flow *flow,
>  		FILE *file, struct rte_flow_error *error);  #endif -void
> mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
> +int mlx5_flow_restore_info_register(void);
> +void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev);
>  int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
>  			uint32_t nb_contexts, struct rte_flow_error *error);
> int mlx5_validate_action_ct(struct rte_eth_dev *dev, diff --git
> a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index
> d0275fdd00..715b7d327d 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -1779,6 +1779,20 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
>  	priv->sh->shared_mark_enabled = 0;
>  }
> 
> +static uint64_t mlx5_restore_info_dynflag;
> +
> +int
> +mlx5_flow_restore_info_register(void)
> +{
> +	int err = 0;
> +
> +	if (mlx5_restore_info_dynflag == 0) {
> +		if
> (rte_flow_restore_info_dynflag_register(&mlx5_restore_info_dynflag) < 0)
> +			err = ENOMEM;
> +	}
> +	return err;
> +}
> +
>  /**
>   * Set the Rx queue dynamic metadata (mask and offset) for a flow
>   *
> @@ -1786,7 +1800,7 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
>   *   Pointer to the Ethernet device structure.
>   */
>  void
> -mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
> +mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	unsigned int i;
> @@ -1809,6 +1823,9 @@ mlx5_flow_rxq_dynf_metadata_set(struct
> rte_eth_dev *dev)
>  			data->flow_meta_offset =
> rte_flow_dynf_metadata_offs;
>  			data->flow_meta_port_mask = priv->sh-
> >dv_meta_mask;
>  		}
> +		data->mark_flag = RTE_MBUF_F_RX_FDIR_ID;
> +		if (is_tunnel_offload_active(dev))
> +			data->mark_flag |= mlx5_restore_info_dynflag;
>  	}
>  }
> 
> @@ -11453,11 +11470,8 @@ mlx5_flow_tunnel_get_restore_info(struct
> rte_eth_dev *dev,
>  	const struct mlx5_flow_tbl_data_entry *tble;
>  	const uint64_t mask = RTE_MBUF_F_RX_FDIR |
> RTE_MBUF_F_RX_FDIR_ID;
> 
> -	if (!is_tunnel_offload_active(dev)) {
> -		info->flags = 0;
> -		return 0;
> -	}
> -
> +	if ((ol_flags & mlx5_restore_info_dynflag) == 0)
> +		goto err;
>  	if ((ol_flags & mask) != mask)
>  		goto err;
>  	tble = tunnel_mark_decode(dev, m->hash.fdir.hi); diff --git
> a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c index
> a2be523e9e..71c4638251 100644
> --- a/drivers/net/mlx5/mlx5_rx.c
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct
> rte_mbuf *pkt,
>  		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
>  			pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
>  			if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
> -				pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
> +				pkt->ol_flags |= rxq->mark_flag;
>  				pkt->hash.fdir.hi =
> mlx5_flow_mark_get(mark);
>  			}
>  		}
> diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h index
> 52c35c83f8..3514edd84e 100644
> --- a/drivers/net/mlx5/mlx5_rx.h
> +++ b/drivers/net/mlx5/mlx5_rx.h
> @@ -136,6 +136,7 @@ struct mlx5_rxq_data {
>  	struct mlx5_uar_data uar_data; /* CQ doorbell. */
>  	uint32_t cqn; /* CQ number. */
>  	uint8_t cq_arm_sn; /* CQ arm seq number. */
> +	uint64_t mark_flag; /* ol_flags to set with marks. */
>  	uint32_t tunnel; /* Tunnel information. */
>  	int timestamp_offset; /* Dynamic mbuf field for timestamp. */
>  	uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */
> diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
> index bbaa7d2aa0..7bdb897612 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  			dev->data->port_id);
>  		goto error;
>  	}
> -	/* Set a mask and offset of dynamic metadata flows into Rx queues.
> */
> -	mlx5_flow_rxq_dynf_metadata_set(dev);
> +	/* Set dynamic fields and flags into Rx queues. */
> +	mlx5_flow_rxq_dynf_set(dev);
>  	/* Set flags and context to convert Rx timestamps. */
>  	mlx5_rxq_timestamp_set(dev);
>  	/* Set a mask and offset of scheduling on timestamp into Tx queues.
> */ diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c index
> 9f2093b353..8b2dbea325 100644
> --- a/drivers/net/sfc/sfc_dp.c
> +++ b/drivers/net/sfc/sfc_dp.c
> @@ -11,6 +11,7 @@
>  #include <string.h>
>  #include <errno.h>
> 
> +#include <ethdev_driver.h>
>  #include <rte_log.h>
>  #include <rte_mbuf_dyn.h>
> 
> @@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void)
>  		.size = sizeof(uint8_t),
>  		.align = __alignof__(uint8_t),
>  	};
> -	static const struct rte_mbuf_dynflag ft_ctx_id_valid = {
> -		.name = "rte_net_sfc_dynflag_ft_ctx_id_valid",
> -	};
> 
>  	int field_offset;
> -	int flag;
> 
>  	SFC_GENERIC_LOG(INFO, "%s() entry", __func__);
> 
> @@ -156,15 +153,13 @@ sfc_dp_ft_ctx_id_register(void)
>  		return -1;
>  	}
> 
> -	flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid);
> -	if (flag < 0) {
> +	if (rte_flow_restore_info_dynflag_register(&sfc_dp_ft_ctx_id_valid) <
> +0) {
>  		SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id
> dynflag",
>  				__func__);
>  		return -1;
>  	}
> 
>  	sfc_dp_ft_ctx_id_offset = field_offset;
> -	sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag;
> 
>  	SFC_GENERIC_LOG(INFO, "%s() done", __func__);
> 
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h index
> 2c9d615fb5..6c17d84d1b 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -1949,6 +1949,14 @@ __rte_internal
>  int
>  rte_eth_ip_reassembly_dynfield_register(int *field_offset, int *flag);
> 
> +/**
> + * @internal
> + * Register mbuf dynamic flag for rte_flow_get_restore_info.
> + */
> +__rte_internal
> +int
> +rte_flow_restore_info_dynflag_register(uint64_t *flag);
> +
> 
>  /*
>   * Legacy ethdev API used internally by drivers.
> diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c index
> 69e6e749f7..10cd9d12ba 100644
> --- a/lib/ethdev/rte_flow.c
> +++ b/lib/ethdev/rte_flow.c
> @@ -1401,6 +1401,35 @@ rte_flow_get_restore_info(uint16_t port_id,
>  				  NULL, rte_strerror(ENOTSUP));
>  }
> 
> +static struct {
> +	const struct rte_mbuf_dynflag desc;
> +	uint64_t value;
> +} flow_restore_info_dynflag = {
> +	.desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", }, };
> +
> +uint64_t
> +rte_flow_restore_info_dynflag(void)
> +{
> +	return flow_restore_info_dynflag.value; }
> +
> +int
> +rte_flow_restore_info_dynflag_register(uint64_t *flag) {
> +	if (flow_restore_info_dynflag.value == 0) {
> +		int offset =
> +rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc);
> +
> +		if (offset < 0)
> +			return -1;
> +		flow_restore_info_dynflag.value = RTE_BIT64(offset);
> +	}
> +	if (*flag)
> +		*flag = rte_flow_restore_info_dynflag();
> +
> +	return 0;
> +}
> +
>  int
>  rte_flow_tunnel_action_decap_release(uint16_t port_id,
>  				     struct rte_flow_action *actions, diff --git
> a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index
> 713ba8b65c..5ce2db4bbd 100644
> --- a/lib/ethdev/rte_flow.h
> +++ b/lib/ethdev/rte_flow.h
> @@ -4918,7 +4918,24 @@ rte_flow_tunnel_match(uint16_t port_id,
>  		      struct rte_flow_error *error);
> 
>  /**
> - * Populate the current packet processing state, if exists, for the given mbuf.
> + * On reception of a mbuf from HW, a call to
> +rte_flow_get_restore_info() may be
> + * required to retrieve some metadata.
> + * This function returns the associated mbuf ol_flags.
> + *
> + * Note: the dynamic flag is registered during the probing of the first
> +device
> + * that requires it. If this function returns a non 0 value, this value
> +won't
> + * change for the rest of the life of the application.
> + *
> + * @return
> + *   The offload flag indicating rte_flow_get_restore_info() must be called.
> + */
> +__rte_experimental
> +uint64_t
> +rte_flow_restore_info_dynflag(void);
> +
> +/**
> + * If a mbuf contains the rte_flow_restore_info_dynflag() flag in
> +ol_flags,
> + * populate the current packet processing state.
>   *
>   * One should negotiate tunnel metadata delivery from the NIC to the HW.
>   * @see rte_eth_rx_metadata_negotiate() diff --git a/lib/ethdev/version.map
> b/lib/ethdev/version.map index 357d1a88c0..bf5668e928 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -299,6 +299,9 @@ EXPERIMENTAL {
>  	rte_flow_action_handle_query_update;
>  	rte_flow_async_action_handle_query_update;
>  	rte_flow_async_create_by_index;
> +
> +	# added in 23.07
> +	rte_flow_restore_info_dynflag;
>  };
> 
>  INTERNAL {
> @@ -328,4 +331,5 @@ INTERNAL {
>  	rte_eth_representor_id_get;
>  	rte_eth_switch_domain_alloc;
>  	rte_eth_switch_domain_free;
> +	rte_flow_restore_info_dynflag_register;
>  };
> --
> 2.40.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] ethdev: advertise flow restore in mbuf
  2023-06-14 16:46 ` Slava Ovsiienko
@ 2023-06-15  8:00   ` David Marchand
  0 siblings, 0 replies; 25+ messages in thread
From: David Marchand @ 2023-06-15  8:00 UTC (permalink / raw)
  To: Slava Ovsiienko
  Cc: dev, NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Andrew Rybchenko, Ferruh Yigit, Ori Kam

On Wed, Jun 14, 2023 at 6:46 PM Slava Ovsiienko <viacheslavo@nvidia.com> wrote:
>
> Hi, David
>
> It looks like a good application datapath optimization, as for me.
> But I see some concerns:
>
> 1. Are we sure the PMD should register the flag, not application?
> IIRC, usually application registers needed flags/fields and PMDs just follow.

I agree, this should come from the application.
Maybe this flag should be resolved when the application calls
rte_eth_rx_metadata_negotiate().
For an unknown reason, mlx5 does not implement this ops atm, but I
guess this would be a good place to check for the dv_xmeta_en stuff,
too.


>
> +       if (!sh->tunnel_hub && sh->config.dv_miss_info) {
> +               err = mlx5_flow_restore_info_register();
> +               if (err) {
> +                       DRV_LOG(ERR, "Could not register mbuf dynflag for rte_flow_get_restore_info");
> +                       goto error;
> +               }
>
> 2. This might have some general mlx5 Rx datapath performance impact (very minor though)
> It introduces extra memory access (instead of immed value). We should test thoroughly.
>  @@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
>                 if (MLX5_FLOW_MARK_IS_VALID(mark)) {
>                         pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
>                         if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
> -                               pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
> +                               pkt->ol_flags |= rxq->mark_flag;
>                                 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
>                         }
>                 }

I am curious about the impact of such a change too.


>
> 3. RTE_MBUF_F_RX_FDIR_ID is also handled in vectorized rx_burst() routines.
> Please, see:
>  - mlx5_rxtx_vec_altivec.h
>  - mlx5_rxtx_vec_neon.h
>  - mlx5_rxtx_vec_sse.h
> This code must be updated, and it also might have some general (regardless of using tunnel offload
> - for all cases) performance impact.

Indeed, I forgot to squash some later changes I was experimenting with.
Well, numbers will tell us.

I'll send a RFC v2.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC PATCH v2] ethdev: advertise flow restore in mbuf
  2023-05-05 10:31 [RFC PATCH] ethdev: advertise flow restore in mbuf David Marchand
                   ` (2 preceding siblings ...)
  2023-06-14 16:46 ` Slava Ovsiienko
@ 2023-06-15 13:47 ` David Marchand
  2023-06-19 12:20   ` Andrew Rybchenko
                     ` (2 more replies)
  2023-06-20 11:10 ` [RFC PATCH v3] " David Marchand
  2023-06-21 14:43 ` [PATCH v4] " David Marchand
  5 siblings, 3 replies; 25+ messages in thread
From: David Marchand @ 2023-06-15 13:47 UTC (permalink / raw)
  To: dev
  Cc: thomas, i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Viacheslav Ovsiienko, Ori Kam, Suanming Mou, David Christensen,
	Ruifeng Wang, Bruce Richardson, Konstantin Ananyev,
	Andrew Rybchenko, Ferruh Yigit

As reported by Ilya [1], unconditionally calling
rte_flow_get_restore_info() impacts an application performance for drivers
that do not provide this ops.
It could also impact processing of packets that require no call to
rte_flow_get_restore_info() at all.

Register a dynamic mbuf flag when an application negotiates tunnel
metadata delivery (calling rte_eth_rx_metadata_negotiate() with
RTE_ETH_RX_METADATA_TUNNEL_ID).

Drivers then advertise that metadata can be extracted by setting this
dynamic flag in each mbuf.

The application then calls rte_flow_get_restore_info() only when required.

Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since RFC v1:
- rebased,
- updated vectorized datapath functions for net/mlx5,
- moved dynamic flag register to rte_eth_rx_metadata_negotiate() and
  hid rte_flow_restore_info_dynflag_register() into ethdev internals,

---
 app/test-pmd/util.c                      |  9 +++--
 drivers/net/mlx5/mlx5.c                  |  2 +
 drivers/net/mlx5/mlx5.h                  |  5 ++-
 drivers/net/mlx5/mlx5_flow.c             | 47 +++++++++++++++++++++---
 drivers/net/mlx5/mlx5_rx.c               |  2 +-
 drivers/net/mlx5/mlx5_rx.h               |  1 +
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 16 ++++----
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    |  6 +--
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     |  6 +--
 drivers/net/mlx5/mlx5_trigger.c          |  4 +-
 drivers/net/sfc/sfc_dp.c                 | 14 +------
 lib/ethdev/rte_ethdev.c                  |  5 +++
 lib/ethdev/rte_flow.c                    | 29 +++++++++++++++
 lib/ethdev/rte_flow.h                    | 18 ++++++++-
 lib/ethdev/rte_flow_driver.h             |  6 +++
 lib/ethdev/version.map                   |  1 +
 16 files changed, 130 insertions(+), 41 deletions(-)

diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index f9df5f69ef..5aa69ed545 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 	char print_buf[MAX_STRING_LEN];
 	size_t buf_size = MAX_STRING_LEN;
 	size_t cur_len = 0;
+	uint64_t restore_info_dynflag;
 
 	if (!nb_pkts)
 		return;
+	restore_info_dynflag = rte_flow_restore_info_dynflag();
 	MKDUMPSTR(print_buf, buf_size, cur_len,
 		  "port %u/queue %u: %s %u packets\n", port_id, queue,
 		  is_rx ? "received" : "sent", (unsigned int) nb_pkts);
 	for (i = 0; i < nb_pkts; i++) {
-		int ret;
 		struct rte_flow_error error;
 		struct rte_flow_restore_info info = { 0, };
 
 		mb = pkts[i];
+		ol_flags = mb->ol_flags;
 		if (rxq_share > 0)
 			MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ",
 				  mb->port);
@@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
 		packet_type = mb->packet_type;
 		is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
-		ret = rte_flow_get_restore_info(port_id, mb, &info, &error);
-		if (!ret) {
+		if ((ol_flags & restore_info_dynflag) != 0 &&
+				rte_flow_get_restore_info(port_id, mb, &info, &error) == 0) {
 			MKDUMPSTR(print_buf, buf_size, cur_len,
 				  "restore info:");
 			if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
@@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
 			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
-		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
 			MKDUMPSTR(print_buf, buf_size, cur_len,
 				  " - RSS hash=0x%x",
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index a75fa1b7f0..58fde3af22 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2347,6 +2347,7 @@ const struct eth_dev_ops mlx5_dev_ops = {
 	.get_monitor_addr = mlx5_get_monitor_addr,
 	.count_aggr_ports = mlx5_count_aggr_ports,
 	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
+	.rx_metadata_negotiate = mlx5_flow_rx_metadata_negotiate,
 };
 
 /* Available operations from secondary process. */
@@ -2372,6 +2373,7 @@ const struct eth_dev_ops mlx5_dev_sec_ops = {
 	.get_module_eeprom = mlx5_get_module_eeprom,
 	.count_aggr_ports = mlx5_count_aggr_ports,
 	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
+	.rx_metadata_negotiate = mlx5_flow_rx_metadata_negotiate,
 };
 
 /* Available operations in flow isolated mode. */
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 021049ad2b..bf464613f0 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1745,6 +1745,7 @@ struct mlx5_priv {
 	unsigned int lb_used:1; /* Loopback queue is referred to. */
 	uint32_t mark_enabled:1; /* If mark action is enabled on rxqs. */
 	uint32_t num_lag_ports:4; /* Number of ports can be bonded. */
+	uint32_t tunnel_enabled:1; /* If tunnel offloading is enabled on rxqs. */
 	uint16_t domain_id; /* Switch domain identifier. */
 	uint16_t vport_id; /* Associated VF vport index (if any). */
 	uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */
@@ -2154,7 +2155,9 @@ int mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
 int mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev, struct rte_flow *flow,
 		FILE *file, struct rte_flow_error *error);
 #endif
-void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
+int mlx5_flow_rx_metadata_negotiate(struct rte_eth_dev *dev,
+	uint64_t *features);
+void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
 int mlx5_validate_action_ct(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index eb1d7a6be2..19e567401b 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1796,6 +1796,38 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
 	priv->sh->shared_mark_enabled = 0;
 }
 
+static uint64_t mlx5_restore_info_dynflag;
+
+int
+mlx5_flow_rx_metadata_negotiate(struct rte_eth_dev *dev, uint64_t *features)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	uint64_t supported = 0;
+
+	if (!is_tunnel_offload_active(dev)) {
+		supported |= RTE_ETH_RX_METADATA_USER_FLAG;
+		supported |= RTE_ETH_RX_METADATA_USER_MARK;
+		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0) {
+			DRV_LOG(DEBUG,
+				"tunnel offload was not activated, consider setting dv_xmeta_en=%d",
+				MLX5_XMETA_MODE_MISS_INFO);
+		}
+	} else {
+		supported |= RTE_ETH_RX_METADATA_TUNNEL_ID;
+		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0 &&
+				mlx5_restore_info_dynflag == 0)
+			mlx5_restore_info_dynflag = rte_flow_restore_info_dynflag();
+	}
+
+	if (((*features & supported) & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0)
+		priv->tunnel_enabled = 1;
+	else
+		priv->tunnel_enabled = 0;
+
+	*features &= supported;
+	return 0;
+}
+
 /**
  * Set the Rx queue dynamic metadata (mask and offset) for a flow
  *
@@ -1803,11 +1835,15 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
  *   Pointer to the Ethernet device structure.
  */
 void
-mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
+mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
+	uint64_t mark_flag = RTE_MBUF_F_RX_FDIR_ID;
 	unsigned int i;
 
+	if (priv->tunnel_enabled)
+		mark_flag |= mlx5_restore_info_dynflag;
+
 	for (i = 0; i != priv->rxqs_n; ++i) {
 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
 		struct mlx5_rxq_data *data;
@@ -1826,6 +1862,7 @@ mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
 		}
+		data->mark_flag = mark_flag;
 	}
 }
 
@@ -11560,12 +11597,10 @@ mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
 	uint64_t ol_flags = m->ol_flags;
 	const struct mlx5_flow_tbl_data_entry *tble;
 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
-	if (!is_tunnel_offload_active(dev)) {
-		info->flags = 0;
-		return 0;
-	}
-
+	if (priv->tunnel_enabled == 0)
+		goto err;
 	if ((ol_flags & mask) != mask)
 		goto err;
 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index a2be523e9e..71c4638251 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
 		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
 			pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
 			if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
-				pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
+				pkt->ol_flags |= rxq->mark_flag;
 				pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
 			}
 		}
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 52c35c83f8..3514edd84e 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -136,6 +136,7 @@ struct mlx5_rxq_data {
 	struct mlx5_uar_data uar_data; /* CQ doorbell. */
 	uint32_t cqn; /* CQ number. */
 	uint8_t cq_arm_sn; /* CQ arm seq number. */
+	uint64_t mark_flag; /* ol_flags to set with marks. */
 	uint32_t tunnel; /* Tunnel information. */
 	int timestamp_offset; /* Dynamic mbuf field for timestamp. */
 	uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 14ffff26f4..4d0d05c376 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -296,15 +296,15 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				const __vector unsigned char fdir_all_flags =
 					(__vector unsigned char)
 					(__vector unsigned int){
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID};
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag,
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag,
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag,
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag};
 				__vector unsigned char fdir_id_flags =
 					(__vector unsigned char)
 					(__vector unsigned int){
-					RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID};
+					rxq->mark_flag, rxq->mark_flag,
+					rxq->mark_flag, rxq->mark_flag};
 				/* Extract flow_tag field. */
 				__vector unsigned char ftag0 = vec_perm(mcqe1,
 							zero, flow_mark_shuf);
@@ -632,8 +632,8 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
 			RTE_MBUF_F_RX_FDIR, RTE_MBUF_F_RX_FDIR};
 		__vector unsigned char fdir_id_flags =
 			(__vector unsigned char)(__vector unsigned int){
-			RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID,
-			RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID};
+			rxq->mark_flag, rxq->mark_flag,
+			rxq->mark_flag, rxq->mark_flag};
 		__vector unsigned char flow_tag, invalid_mask;
 
 		flow_tag = (__vector unsigned char)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 75e8ed7e5a..91c85bec6d 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -231,9 +231,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 					vdupq_n_u32(RTE_MBUF_F_RX_FDIR);
 				const uint32x4_t fdir_all_flags =
 					vdupq_n_u32(RTE_MBUF_F_RX_FDIR |
-						    RTE_MBUF_F_RX_FDIR_ID);
+						    rxq->mark_flag);
 				uint32x4_t fdir_id_flags =
-					vdupq_n_u32(RTE_MBUF_F_RX_FDIR_ID);
+					vdupq_n_u32(rxq->mark_flag);
 				uint32x4_t invalid_mask, ftag;
 
 				__asm__ volatile
@@ -446,7 +446,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
 	if (rxq->mark) {
 		const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);
 		const uint32x4_t fdir_flags = vdupq_n_u32(RTE_MBUF_F_RX_FDIR);
-		uint32x4_t fdir_id_flags = vdupq_n_u32(RTE_MBUF_F_RX_FDIR_ID);
+		uint32x4_t fdir_id_flags = vdupq_n_u32(rxq->mark_flag);
 		uint32x4_t invalid_mask;
 
 		/* Check if flow tag is non-zero then set RTE_MBUF_F_RX_FDIR. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index b282f8b8e6..0766952255 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -214,9 +214,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
 				const __m128i fdir_all_flags =
 					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR |
-						       RTE_MBUF_F_RX_FDIR_ID);
+						       rxq->mark_flag);
 				__m128i fdir_id_flags =
-					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR_ID);
+					_mm_set1_epi32(rxq->mark_flag);
 
 				/* Extract flow_tag field. */
 				__m128i ftag0 =
@@ -442,7 +442,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
 	if (rxq->mark) {
 		const __m128i pinfo_ft_mask = _mm_set1_epi32(0xffffff00);
 		const __m128i fdir_flags = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
-		__m128i fdir_id_flags = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR_ID);
+		__m128i fdir_id_flags = _mm_set1_epi32(rxq->mark_flag);
 		__m128i flow_tag, invalid_mask;
 
 		flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index bbaa7d2aa0..7bdb897612 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 			dev->data->port_id);
 		goto error;
 	}
-	/* Set a mask and offset of dynamic metadata flows into Rx queues. */
-	mlx5_flow_rxq_dynf_metadata_set(dev);
+	/* Set dynamic fields and flags into Rx queues. */
+	mlx5_flow_rxq_dynf_set(dev);
 	/* Set flags and context to convert Rx timestamps. */
 	mlx5_rxq_timestamp_set(dev);
 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c
index 9f2093b353..2b0a1d749d 100644
--- a/drivers/net/sfc/sfc_dp.c
+++ b/drivers/net/sfc/sfc_dp.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <errno.h>
 
+#include <rte_ethdev.h>
 #include <rte_log.h>
 #include <rte_mbuf_dyn.h>
 
@@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void)
 		.size = sizeof(uint8_t),
 		.align = __alignof__(uint8_t),
 	};
-	static const struct rte_mbuf_dynflag ft_ctx_id_valid = {
-		.name = "rte_net_sfc_dynflag_ft_ctx_id_valid",
-	};
 
 	int field_offset;
-	int flag;
 
 	SFC_GENERIC_LOG(INFO, "%s() entry", __func__);
 
@@ -156,15 +153,8 @@ sfc_dp_ft_ctx_id_register(void)
 		return -1;
 	}
 
-	flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid);
-	if (flag < 0) {
-		SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id dynflag",
-				__func__);
-		return -1;
-	}
-
+	sfc_dp_ft_ctx_id_valid = rte_flow_restore_info_dynflag();
 	sfc_dp_ft_ctx_id_offset = field_offset;
-	sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag;
 
 	SFC_GENERIC_LOG(INFO, "%s() done", __func__);
 
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 7317015895..2884bc4f45 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -30,6 +30,7 @@
 #include "rte_ethdev.h"
 #include "rte_ethdev_trace_fp.h"
 #include "ethdev_driver.h"
+#include "rte_flow_driver.h"
 #include "ethdev_profile.h"
 #include "ethdev_private.h"
 #include "ethdev_trace.h"
@@ -6441,6 +6442,10 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features)
 		return -EINVAL;
 	}
 
+	if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0 &&
+			rte_flow_restore_info_dynflag_register(NULL) < 0)
+		*features &= ~RTE_ETH_RX_METADATA_TUNNEL_ID;
+
 	if (*dev->dev_ops->rx_metadata_negotiate == NULL)
 		return -ENOTSUP;
 	ret = eth_err(port_id,
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index d41963b42e..5731b90642 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -1441,6 +1441,35 @@ rte_flow_get_restore_info(uint16_t port_id,
 				  NULL, rte_strerror(ENOTSUP));
 }
 
+static struct {
+	const struct rte_mbuf_dynflag desc;
+	uint64_t value;
+} flow_restore_info_dynflag = {
+	.desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", },
+};
+
+uint64_t
+rte_flow_restore_info_dynflag(void)
+{
+	return flow_restore_info_dynflag.value;
+}
+
+int
+rte_flow_restore_info_dynflag_register(uint64_t *flag)
+{
+	if (flow_restore_info_dynflag.value == 0) {
+		int offset = rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc);
+
+		if (offset < 0)
+			return -1;
+		flow_restore_info_dynflag.value = RTE_BIT64(offset);
+	}
+	if (*flag)
+		*flag = rte_flow_restore_info_dynflag();
+
+	return 0;
+}
+
 int
 rte_flow_tunnel_action_decap_release(uint16_t port_id,
 				     struct rte_flow_action *actions,
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index dec454275f..261d95378b 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -5128,7 +5128,23 @@ rte_flow_tunnel_match(uint16_t port_id,
 		      struct rte_flow_error *error);
 
 /**
- * Populate the current packet processing state, if exists, for the given mbuf.
+ * On reception of a mbuf from HW, a call to rte_flow_get_restore_info() may be
+ * required to retrieve some metadata.
+ * This function returns the associated mbuf ol_flags.
+ *
+ * Note: the dynamic flag is registered during a call to
+ * rte_eth_rx_metadata_negotiate() with RTE_ETH_RX_METADATA_TUNNEL_ID.
+ *
+ * @return
+ *   The offload flag indicating rte_flow_get_restore_info() must be called.
+ */
+__rte_experimental
+uint64_t
+rte_flow_restore_info_dynflag(void);
+
+/**
+ * If a mbuf contains the rte_flow_restore_info_dynflag() flag in ol_flags,
+ * populate the current packet processing state.
  *
  * One should negotiate tunnel metadata delivery from the NIC to the HW.
  * @see rte_eth_rx_metadata_negotiate()
diff --git a/lib/ethdev/rte_flow_driver.h b/lib/ethdev/rte_flow_driver.h
index 356b60f523..a28448fc3b 100644
--- a/lib/ethdev/rte_flow_driver.h
+++ b/lib/ethdev/rte_flow_driver.h
@@ -376,6 +376,12 @@ struct rte_flow_ops {
 const struct rte_flow_ops *
 rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
 
+/**
+ * Register mbuf dynamic flag for rte_flow_get_restore_info.
+ */
+int
+rte_flow_restore_info_dynflag_register(uint64_t *flag);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 1a33d72668..0031be32bf 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -311,6 +311,7 @@ EXPERIMENTAL {
 	rte_flow_async_action_list_handle_destroy;
 	rte_flow_async_action_list_handle_query_update;
 	rte_flow_async_actions_update;
+	rte_flow_restore_info_dynflag;
 };
 
 INTERNAL {
-- 
2.40.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH v2] ethdev: advertise flow restore in mbuf
  2023-06-15 13:47 ` [RFC PATCH v2] " David Marchand
@ 2023-06-19 12:20   ` Andrew Rybchenko
  2023-06-19 13:57   ` Slava Ovsiienko
  2023-06-20 10:04   ` Ali Alnubani
  2 siblings, 0 replies; 25+ messages in thread
From: Andrew Rybchenko @ 2023-06-19 12:20 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: thomas, i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Viacheslav Ovsiienko, Ori Kam, Suanming Mou, David Christensen,
	Ruifeng Wang, Bruce Richardson, Konstantin Ananyev, Ferruh Yigit

On 6/15/23 16:47, David Marchand wrote:
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Register a dynamic mbuf flag when an application negotiates tunnel
> metadata delivery (calling rte_eth_rx_metadata_negotiate() with
> RTE_ETH_RX_METADATA_TUNNEL_ID).
> 
> Drivers then advertise that metadata can be extracted by setting this
> dynamic flag in each mbuf.
> 
> The application then calls rte_flow_get_restore_info() only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Changes since RFC v1:
> - rebased,
> - updated vectorized datapath functions for net/mlx5,
> - moved dynamic flag register to rte_eth_rx_metadata_negotiate() and
>    hid rte_flow_restore_info_dynflag_register() into ethdev internals,

For ethdev and net/sfc:
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>



^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [RFC PATCH v2] ethdev: advertise flow restore in mbuf
  2023-06-15 13:47 ` [RFC PATCH v2] " David Marchand
  2023-06-19 12:20   ` Andrew Rybchenko
@ 2023-06-19 13:57   ` Slava Ovsiienko
  2023-06-20 10:04   ` Ali Alnubani
  2 siblings, 0 replies; 25+ messages in thread
From: Slava Ovsiienko @ 2023-06-19 13:57 UTC (permalink / raw)
  To: David Marchand, dev, Ali Alnubani
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad, Ori Kam,
	Suanming Mou, David Christensen, Ruifeng Wang, Bruce Richardson,
	Konstantin Ananyev, Andrew Rybchenko, Ferruh Yigit

Hi,

My primary concern is vectorized rx_burst performance impact.
@Ali Alnubani, could we conduct the performance check, at least with SSE (x86) and Neon (BF2/3) archs?

With best regards,
Slava

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Thursday, June 15, 2023 4:47 PM
> To: dev@dpdk.org
> Cc: NBU-Contact-Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>;
> i.maximets@ovn.org; Aman Singh <aman.deep.singh@intel.com>; Yuying
> Zhang <yuying.zhang@intel.com>; Matan Azrad <matan@nvidia.com>; Slava
> Ovsiienko <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; Suanming
> Mou <suanmingm@nvidia.com>; David Christensen
> <drc@linux.vnet.ibm.com>; Ruifeng Wang <ruifeng.wang@arm.com>; Bruce
> Richardson <bruce.richardson@intel.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Ferruh Yigit <ferruh.yigit@amd.com>
> Subject: [RFC PATCH v2] ethdev: advertise flow restore in mbuf
> 
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Register a dynamic mbuf flag when an application negotiates tunnel metadata
> delivery (calling rte_eth_rx_metadata_negotiate() with
> RTE_ETH_RX_METADATA_TUNNEL_ID).
> 
> Drivers then advertise that metadata can be extracted by setting this dynamic
> flag in each mbuf.
> 
> The application then calls rte_flow_get_restore_info() only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> 7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Changes since RFC v1:
> - rebased,
> - updated vectorized datapath functions for net/mlx5,
> - moved dynamic flag register to rte_eth_rx_metadata_negotiate() and
>   hid rte_flow_restore_info_dynflag_register() into ethdev internals,
> 
> ---
>  app/test-pmd/util.c                      |  9 +++--
>  drivers/net/mlx5/mlx5.c                  |  2 +
>  drivers/net/mlx5/mlx5.h                  |  5 ++-
>  drivers/net/mlx5/mlx5_flow.c             | 47 +++++++++++++++++++++---
>  drivers/net/mlx5/mlx5_rx.c               |  2 +-
>  drivers/net/mlx5/mlx5_rx.h               |  1 +
>  drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 16 ++++----
>  drivers/net/mlx5/mlx5_rxtx_vec_neon.h    |  6 +--
>  drivers/net/mlx5/mlx5_rxtx_vec_sse.h     |  6 +--
>  drivers/net/mlx5/mlx5_trigger.c          |  4 +-
>  drivers/net/sfc/sfc_dp.c                 | 14 +------
>  lib/ethdev/rte_ethdev.c                  |  5 +++
>  lib/ethdev/rte_flow.c                    | 29 +++++++++++++++
>  lib/ethdev/rte_flow.h                    | 18 ++++++++-
>  lib/ethdev/rte_flow_driver.h             |  6 +++
>  lib/ethdev/version.map                   |  1 +
>  16 files changed, 130 insertions(+), 41 deletions(-)
> 
> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
> f9df5f69ef..5aa69ed545 100644
> --- a/app/test-pmd/util.c
> +++ b/app/test-pmd/util.c
> @@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  	char print_buf[MAX_STRING_LEN];
>  	size_t buf_size = MAX_STRING_LEN;
>  	size_t cur_len = 0;
> +	uint64_t restore_info_dynflag;
> 
>  	if (!nb_pkts)
>  		return;
> +	restore_info_dynflag = rte_flow_restore_info_dynflag();
>  	MKDUMPSTR(print_buf, buf_size, cur_len,
>  		  "port %u/queue %u: %s %u packets\n", port_id, queue,
>  		  is_rx ? "received" : "sent", (unsigned int) nb_pkts);
>  	for (i = 0; i < nb_pkts; i++) {
> -		int ret;
>  		struct rte_flow_error error;
>  		struct rte_flow_restore_info info = { 0, };
> 
>  		mb = pkts[i];
> +		ol_flags = mb->ol_flags;
>  		if (rxq_share > 0)
>  			MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ",
>  				  mb->port);
> @@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
>  		packet_type = mb->packet_type;
>  		is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
> -		ret = rte_flow_get_restore_info(port_id, mb, &info, &error);
> -		if (!ret) {
> +		if ((ol_flags & restore_info_dynflag) != 0 &&
> +				rte_flow_get_restore_info(port_id, mb, &info,
> &error) == 0) {
>  			MKDUMPSTR(print_buf, buf_size, cur_len,
>  				  "restore info:");
>  			if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
> @@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  			  " - pool=%s - type=0x%04x - length=%u -
> nb_segs=%d",
>  			  mb->pool->name, eth_type, (unsigned int) mb-
> >pkt_len,
>  			  (int)mb->nb_segs);
> -		ol_flags = mb->ol_flags;
>  		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
>  			MKDUMPSTR(print_buf, buf_size, cur_len,
>  				  " - RSS hash=0x%x",
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> a75fa1b7f0..58fde3af22 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -2347,6 +2347,7 @@ const struct eth_dev_ops mlx5_dev_ops = {
>  	.get_monitor_addr = mlx5_get_monitor_addr,
>  	.count_aggr_ports = mlx5_count_aggr_ports,
>  	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
> +	.rx_metadata_negotiate = mlx5_flow_rx_metadata_negotiate,
>  };
> 
>  /* Available operations from secondary process. */ @@ -2372,6 +2373,7 @@
> const struct eth_dev_ops mlx5_dev_sec_ops = {
>  	.get_module_eeprom = mlx5_get_module_eeprom,
>  	.count_aggr_ports = mlx5_count_aggr_ports,
>  	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
> +	.rx_metadata_negotiate = mlx5_flow_rx_metadata_negotiate,
>  };
> 
>  /* Available operations in flow isolated mode. */ diff --git
> a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 021049ad2b..bf464613f0 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -1745,6 +1745,7 @@ struct mlx5_priv {
>  	unsigned int lb_used:1; /* Loopback queue is referred to. */
>  	uint32_t mark_enabled:1; /* If mark action is enabled on rxqs. */
>  	uint32_t num_lag_ports:4; /* Number of ports can be bonded. */
> +	uint32_t tunnel_enabled:1; /* If tunnel offloading is enabled on rxqs.
> +*/
>  	uint16_t domain_id; /* Switch domain identifier. */
>  	uint16_t vport_id; /* Associated VF vport index (if any). */
>  	uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG.
> */ @@ -2154,7 +2155,9 @@ int mlx5_flow_query_counter(struct rte_eth_dev
> *dev, struct rte_flow *flow,  int mlx5_flow_dev_dump_ipool(struct
> rte_eth_dev *dev, struct rte_flow *flow,
>  		FILE *file, struct rte_flow_error *error);  #endif -void
> mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
> +int mlx5_flow_rx_metadata_negotiate(struct rte_eth_dev *dev,
> +	uint64_t *features);
> +void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev);
>  int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
>  			uint32_t nb_contexts, struct rte_flow_error *error);
> int mlx5_validate_action_ct(struct rte_eth_dev *dev, diff --git
> a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index
> eb1d7a6be2..19e567401b 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -1796,6 +1796,38 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
>  	priv->sh->shared_mark_enabled = 0;
>  }
> 
> +static uint64_t mlx5_restore_info_dynflag;
> +
> +int
> +mlx5_flow_rx_metadata_negotiate(struct rte_eth_dev *dev, uint64_t
> +*features) {
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	uint64_t supported = 0;
> +
> +	if (!is_tunnel_offload_active(dev)) {
> +		supported |= RTE_ETH_RX_METADATA_USER_FLAG;
> +		supported |= RTE_ETH_RX_METADATA_USER_MARK;
> +		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0) {
> +			DRV_LOG(DEBUG,
> +				"tunnel offload was not activated, consider
> setting dv_xmeta_en=%d",
> +				MLX5_XMETA_MODE_MISS_INFO);
> +		}
> +	} else {
> +		supported |= RTE_ETH_RX_METADATA_TUNNEL_ID;
> +		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0 &&
> +				mlx5_restore_info_dynflag == 0)
> +			mlx5_restore_info_dynflag =
> rte_flow_restore_info_dynflag();
> +	}
> +
> +	if (((*features & supported) & RTE_ETH_RX_METADATA_TUNNEL_ID)
> != 0)
> +		priv->tunnel_enabled = 1;
> +	else
> +		priv->tunnel_enabled = 0;
> +
> +	*features &= supported;
> +	return 0;
> +}
> +
>  /**
>   * Set the Rx queue dynamic metadata (mask and offset) for a flow
>   *
> @@ -1803,11 +1835,15 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
>   *   Pointer to the Ethernet device structure.
>   */
>  void
> -mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
> +mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> +	uint64_t mark_flag = RTE_MBUF_F_RX_FDIR_ID;
>  	unsigned int i;
> 
> +	if (priv->tunnel_enabled)
> +		mark_flag |= mlx5_restore_info_dynflag;
> +
>  	for (i = 0; i != priv->rxqs_n; ++i) {
>  		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
>  		struct mlx5_rxq_data *data;
> @@ -1826,6 +1862,7 @@ mlx5_flow_rxq_dynf_metadata_set(struct
> rte_eth_dev *dev)
>  			data->flow_meta_offset =
> rte_flow_dynf_metadata_offs;
>  			data->flow_meta_port_mask = priv->sh-
> >dv_meta_mask;
>  		}
> +		data->mark_flag = mark_flag;
>  	}
>  }
> 
> @@ -11560,12 +11597,10 @@ mlx5_flow_tunnel_get_restore_info(struct
> rte_eth_dev *dev,
>  	uint64_t ol_flags = m->ol_flags;
>  	const struct mlx5_flow_tbl_data_entry *tble;
>  	const uint64_t mask = RTE_MBUF_F_RX_FDIR |
> RTE_MBUF_F_RX_FDIR_ID;
> +	struct mlx5_priv *priv = dev->data->dev_private;
> 
> -	if (!is_tunnel_offload_active(dev)) {
> -		info->flags = 0;
> -		return 0;
> -	}
> -
> +	if (priv->tunnel_enabled == 0)
> +		goto err;
>  	if ((ol_flags & mask) != mask)
>  		goto err;
>  	tble = tunnel_mark_decode(dev, m->hash.fdir.hi); diff --git
> a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c index
> a2be523e9e..71c4638251 100644
> --- a/drivers/net/mlx5/mlx5_rx.c
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct
> rte_mbuf *pkt,
>  		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
>  			pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
>  			if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
> -				pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
> +				pkt->ol_flags |= rxq->mark_flag;
>  				pkt->hash.fdir.hi =
> mlx5_flow_mark_get(mark);
>  			}
>  		}
> diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h index
> 52c35c83f8..3514edd84e 100644
> --- a/drivers/net/mlx5/mlx5_rx.h
> +++ b/drivers/net/mlx5/mlx5_rx.h
> @@ -136,6 +136,7 @@ struct mlx5_rxq_data {
>  	struct mlx5_uar_data uar_data; /* CQ doorbell. */
>  	uint32_t cqn; /* CQ number. */
>  	uint8_t cq_arm_sn; /* CQ arm seq number. */
> +	uint64_t mark_flag; /* ol_flags to set with marks. */
>  	uint32_t tunnel; /* Tunnel information. */
>  	int timestamp_offset; /* Dynamic mbuf field for timestamp. */
>  	uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
> b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
> index 14ffff26f4..4d0d05c376 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
> @@ -296,15 +296,15 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq,
> volatile struct mlx5_cqe *cq,
>  				const __vector unsigned char fdir_all_flags =
>  					(__vector unsigned char)
>  					(__vector unsigned int){
> -					RTE_MBUF_F_RX_FDIR |
> RTE_MBUF_F_RX_FDIR_ID,
> -					RTE_MBUF_F_RX_FDIR |
> RTE_MBUF_F_RX_FDIR_ID,
> -					RTE_MBUF_F_RX_FDIR |
> RTE_MBUF_F_RX_FDIR_ID,
> -					RTE_MBUF_F_RX_FDIR |
> RTE_MBUF_F_RX_FDIR_ID};
> +					RTE_MBUF_F_RX_FDIR | rxq-
> >mark_flag,
> +					RTE_MBUF_F_RX_FDIR | rxq-
> >mark_flag,
> +					RTE_MBUF_F_RX_FDIR | rxq-
> >mark_flag,
> +					RTE_MBUF_F_RX_FDIR | rxq-
> >mark_flag};
>  				__vector unsigned char fdir_id_flags =
>  					(__vector unsigned char)
>  					(__vector unsigned int){
> -					RTE_MBUF_F_RX_FDIR_ID,
> RTE_MBUF_F_RX_FDIR_ID,
> -					RTE_MBUF_F_RX_FDIR_ID,
> RTE_MBUF_F_RX_FDIR_ID};
> +					rxq->mark_flag, rxq->mark_flag,
> +					rxq->mark_flag, rxq->mark_flag};
>  				/* Extract flow_tag field. */
>  				__vector unsigned char ftag0 =
> vec_perm(mcqe1,
>  							zero,
> flow_mark_shuf);
> @@ -632,8 +632,8 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
>  			RTE_MBUF_F_RX_FDIR, RTE_MBUF_F_RX_FDIR};
>  		__vector unsigned char fdir_id_flags =
>  			(__vector unsigned char)(__vector unsigned int){
> -			RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID,
> -			RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID};
> +			rxq->mark_flag, rxq->mark_flag,
> +			rxq->mark_flag, rxq->mark_flag};
>  		__vector unsigned char flow_tag, invalid_mask;
> 
>  		flow_tag = (__vector unsigned char)
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> index 75e8ed7e5a..91c85bec6d 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> @@ -231,9 +231,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq,
> volatile struct mlx5_cqe *cq,
>  					vdupq_n_u32(RTE_MBUF_F_RX_FDIR);
>  				const uint32x4_t fdir_all_flags =
>  					vdupq_n_u32(RTE_MBUF_F_RX_FDIR
> |
> -						    RTE_MBUF_F_RX_FDIR_ID);
> +						    rxq->mark_flag);
>  				uint32x4_t fdir_id_flags =
> -
> 	vdupq_n_u32(RTE_MBUF_F_RX_FDIR_ID);
> +					vdupq_n_u32(rxq->mark_flag);
>  				uint32x4_t invalid_mask, ftag;
> 
>  				__asm__ volatile
> @@ -446,7 +446,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
>  	if (rxq->mark) {
>  		const uint32x4_t ft_def =
> vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);
>  		const uint32x4_t fdir_flags =
> vdupq_n_u32(RTE_MBUF_F_RX_FDIR);
> -		uint32x4_t fdir_id_flags =
> vdupq_n_u32(RTE_MBUF_F_RX_FDIR_ID);
> +		uint32x4_t fdir_id_flags = vdupq_n_u32(rxq->mark_flag);
>  		uint32x4_t invalid_mask;
> 
>  		/* Check if flow tag is non-zero then set
> RTE_MBUF_F_RX_FDIR. */ diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> index b282f8b8e6..0766952255 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> @@ -214,9 +214,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq,
> volatile struct mlx5_cqe *cq,
> 
> 	_mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
>  				const __m128i fdir_all_flags =
> 
> 	_mm_set1_epi32(RTE_MBUF_F_RX_FDIR |
> -
> RTE_MBUF_F_RX_FDIR_ID);
> +						       rxq->mark_flag);
>  				__m128i fdir_id_flags =
> -
> 	_mm_set1_epi32(RTE_MBUF_F_RX_FDIR_ID);
> +					_mm_set1_epi32(rxq->mark_flag);
> 
>  				/* Extract flow_tag field. */
>  				__m128i ftag0 =
> @@ -442,7 +442,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
> __m128i cqes[4],
>  	if (rxq->mark) {
>  		const __m128i pinfo_ft_mask = _mm_set1_epi32(0xffffff00);
>  		const __m128i fdir_flags =
> _mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
> -		__m128i fdir_id_flags =
> _mm_set1_epi32(RTE_MBUF_F_RX_FDIR_ID);
> +		__m128i fdir_id_flags = _mm_set1_epi32(rxq->mark_flag);
>  		__m128i flow_tag, invalid_mask;
> 
>  		flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask); diff --git
> a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index
> bbaa7d2aa0..7bdb897612 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  			dev->data->port_id);
>  		goto error;
>  	}
> -	/* Set a mask and offset of dynamic metadata flows into Rx queues.
> */
> -	mlx5_flow_rxq_dynf_metadata_set(dev);
> +	/* Set dynamic fields and flags into Rx queues. */
> +	mlx5_flow_rxq_dynf_set(dev);
>  	/* Set flags and context to convert Rx timestamps. */
>  	mlx5_rxq_timestamp_set(dev);
>  	/* Set a mask and offset of scheduling on timestamp into Tx queues.
> */ diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c index
> 9f2093b353..2b0a1d749d 100644
> --- a/drivers/net/sfc/sfc_dp.c
> +++ b/drivers/net/sfc/sfc_dp.c
> @@ -11,6 +11,7 @@
>  #include <string.h>
>  #include <errno.h>
> 
> +#include <rte_ethdev.h>
>  #include <rte_log.h>
>  #include <rte_mbuf_dyn.h>
> 
> @@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void)
>  		.size = sizeof(uint8_t),
>  		.align = __alignof__(uint8_t),
>  	};
> -	static const struct rte_mbuf_dynflag ft_ctx_id_valid = {
> -		.name = "rte_net_sfc_dynflag_ft_ctx_id_valid",
> -	};
> 
>  	int field_offset;
> -	int flag;
> 
>  	SFC_GENERIC_LOG(INFO, "%s() entry", __func__);
> 
> @@ -156,15 +153,8 @@ sfc_dp_ft_ctx_id_register(void)
>  		return -1;
>  	}
> 
> -	flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid);
> -	if (flag < 0) {
> -		SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id
> dynflag",
> -				__func__);
> -		return -1;
> -	}
> -
> +	sfc_dp_ft_ctx_id_valid = rte_flow_restore_info_dynflag();
>  	sfc_dp_ft_ctx_id_offset = field_offset;
> -	sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag;
> 
>  	SFC_GENERIC_LOG(INFO, "%s() done", __func__);
> 
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> 7317015895..2884bc4f45 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -30,6 +30,7 @@
>  #include "rte_ethdev.h"
>  #include "rte_ethdev_trace_fp.h"
>  #include "ethdev_driver.h"
> +#include "rte_flow_driver.h"
>  #include "ethdev_profile.h"
>  #include "ethdev_private.h"
>  #include "ethdev_trace.h"
> @@ -6441,6 +6442,10 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id,
> uint64_t *features)
>  		return -EINVAL;
>  	}
> 
> +	if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0 &&
> +			rte_flow_restore_info_dynflag_register(NULL) < 0)
> +		*features &= ~RTE_ETH_RX_METADATA_TUNNEL_ID;
> +
>  	if (*dev->dev_ops->rx_metadata_negotiate == NULL)
>  		return -ENOTSUP;
>  	ret = eth_err(port_id,
> diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c index
> d41963b42e..5731b90642 100644
> --- a/lib/ethdev/rte_flow.c
> +++ b/lib/ethdev/rte_flow.c
> @@ -1441,6 +1441,35 @@ rte_flow_get_restore_info(uint16_t port_id,
>  				  NULL, rte_strerror(ENOTSUP));
>  }
> 
> +static struct {
> +	const struct rte_mbuf_dynflag desc;
> +	uint64_t value;
> +} flow_restore_info_dynflag = {
> +	.desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", }, };
> +
> +uint64_t
> +rte_flow_restore_info_dynflag(void)
> +{
> +	return flow_restore_info_dynflag.value; }
> +
> +int
> +rte_flow_restore_info_dynflag_register(uint64_t *flag) {
> +	if (flow_restore_info_dynflag.value == 0) {
> +		int offset =
> +rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc);
> +
> +		if (offset < 0)
> +			return -1;
> +		flow_restore_info_dynflag.value = RTE_BIT64(offset);
> +	}
> +	if (*flag)
> +		*flag = rte_flow_restore_info_dynflag();
> +
> +	return 0;
> +}
> +
>  int
>  rte_flow_tunnel_action_decap_release(uint16_t port_id,
>  				     struct rte_flow_action *actions, diff --git
> a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index
> dec454275f..261d95378b 100644
> --- a/lib/ethdev/rte_flow.h
> +++ b/lib/ethdev/rte_flow.h
> @@ -5128,7 +5128,23 @@ rte_flow_tunnel_match(uint16_t port_id,
>  		      struct rte_flow_error *error);
> 
>  /**
> - * Populate the current packet processing state, if exists, for the given mbuf.
> + * On reception of a mbuf from HW, a call to
> +rte_flow_get_restore_info() may be
> + * required to retrieve some metadata.
> + * This function returns the associated mbuf ol_flags.
> + *
> + * Note: the dynamic flag is registered during a call to
> + * rte_eth_rx_metadata_negotiate() with
> RTE_ETH_RX_METADATA_TUNNEL_ID.
> + *
> + * @return
> + *   The offload flag indicating rte_flow_get_restore_info() must be called.
> + */
> +__rte_experimental
> +uint64_t
> +rte_flow_restore_info_dynflag(void);
> +
> +/**
> + * If a mbuf contains the rte_flow_restore_info_dynflag() flag in
> +ol_flags,
> + * populate the current packet processing state.
>   *
>   * One should negotiate tunnel metadata delivery from the NIC to the HW.
>   * @see rte_eth_rx_metadata_negotiate() diff --git
> a/lib/ethdev/rte_flow_driver.h b/lib/ethdev/rte_flow_driver.h index
> 356b60f523..a28448fc3b 100644
> --- a/lib/ethdev/rte_flow_driver.h
> +++ b/lib/ethdev/rte_flow_driver.h
> @@ -376,6 +376,12 @@ struct rte_flow_ops {  const struct rte_flow_ops *
> rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
> 
> +/**
> + * Register mbuf dynamic flag for rte_flow_get_restore_info.
> + */
> +int
> +rte_flow_restore_info_dynflag_register(uint64_t *flag);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map index
> 1a33d72668..0031be32bf 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -311,6 +311,7 @@ EXPERIMENTAL {
>  	rte_flow_async_action_list_handle_destroy;
>  	rte_flow_async_action_list_handle_query_update;
>  	rte_flow_async_actions_update;
> +	rte_flow_restore_info_dynflag;
>  };
> 
>  INTERNAL {
> --
> 2.40.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [RFC PATCH v2] ethdev: advertise flow restore in mbuf
  2023-06-15 13:47 ` [RFC PATCH v2] " David Marchand
  2023-06-19 12:20   ` Andrew Rybchenko
  2023-06-19 13:57   ` Slava Ovsiienko
@ 2023-06-20 10:04   ` Ali Alnubani
  2 siblings, 0 replies; 25+ messages in thread
From: Ali Alnubani @ 2023-06-20 10:04 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Slava Ovsiienko, Ori Kam, Suanming Mou, David Christensen,
	Ruifeng Wang, Bruce Richardson, Konstantin Ananyev,
	Andrew Rybchenko, Ferruh Yigit, Raslan Darawsheh

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Thursday, June 15, 2023 4:47 PM
> To: dev@dpdk.org
> Cc: NBU-Contact-Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>;
> i.maximets@ovn.org; Aman Singh <aman.deep.singh@intel.com>; Yuying
> Zhang <yuying.zhang@intel.com>; Matan Azrad <matan@nvidia.com>; Slava
> Ovsiienko <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>;
> Suanming Mou <suanmingm@nvidia.com>; David Christensen
> <drc@linux.vnet.ibm.com>; Ruifeng Wang <ruifeng.wang@arm.com>; Bruce
> Richardson <bruce.richardson@intel.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Ferruh Yigit <ferruh.yigit@amd.com>
> Subject: [RFC PATCH v2] ethdev: advertise flow restore in mbuf
> 
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Register a dynamic mbuf flag when an application negotiates tunnel
> metadata delivery (calling rte_eth_rx_metadata_negotiate() with
> RTE_ETH_RX_METADATA_TUNNEL_ID).
> 
> Drivers then advertise that metadata can be extracted by setting this
> dynamic flag in each mbuf.
> 
> The application then calls rte_flow_get_restore_info() only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> 7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---

Hello David,

App segfaults at startup with this patch applied on main:

$ ./build/app/dpdk-testpmd -a 0000:86:00.0 -- -i

#0  0x0000000000a11133 in rte_flow_restore_info_dynflag_register (flag=0x0) at ../lib/ethdev/rte_flow.c:1467
#1  0x00000000009f6f7f in rte_eth_rx_metadata_negotiate (port_id=0, features=0x7fffffffe280)
    at ../lib/ethdev/rte_ethdev.c:6446
#2  0x00000000004eb003 in eth_rx_metadata_negotiate_mp (port_id=0) at ../app/test-pmd/testpmd.c:572
#3  0x00000000004eca7d in init_config_port_offloads (pid=0, socket_id=1) at ../app/test-pmd/testpmd.c:1627
#4  0x00000000004ecf0d in init_config () at ../app/test-pmd/testpmd.c:1737
#5  0x00000000004f3b15 in main (argc=2, argv=0x7fffffffe5e0) at ../app/test-pmd/testpmd.c:4592

Device: ConnectX-6 Dx

Regards,
Ali

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC PATCH v3] ethdev: advertise flow restore in mbuf
  2023-05-05 10:31 [RFC PATCH] ethdev: advertise flow restore in mbuf David Marchand
                   ` (3 preceding siblings ...)
  2023-06-15 13:47 ` [RFC PATCH v2] " David Marchand
@ 2023-06-20 11:10 ` David Marchand
  2023-06-20 16:43   ` Slava Ovsiienko
  2023-06-21  7:47   ` Ali Alnubani
  2023-06-21 14:43 ` [PATCH v4] " David Marchand
  5 siblings, 2 replies; 25+ messages in thread
From: David Marchand @ 2023-06-20 11:10 UTC (permalink / raw)
  To: dev
  Cc: thomas, i.maximets, alialnu, Aman Singh, Yuying Zhang,
	Matan Azrad, Viacheslav Ovsiienko, Ori Kam, Suanming Mou,
	David Christensen, Ruifeng Wang, Bruce Richardson,
	Konstantin Ananyev, Andrew Rybchenko, Ferruh Yigit

As reported by Ilya [1], unconditionally calling
rte_flow_get_restore_info() impacts an application performance for drivers
that do not provide this ops.
It could also impact processing of packets that require no call to
rte_flow_get_restore_info() at all.

Register a dynamic mbuf flag when an application negotiates tunnel
metadata delivery (calling rte_eth_rx_metadata_negotiate() with
RTE_ETH_RX_METADATA_TUNNEL_ID).

Drivers then advertise that metadata can be extracted by setting this
dynamic flag in each mbuf.

The application then calls rte_flow_get_restore_info() only when required.

Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since RFC v2:
- fixed crash introduced in v2 and removed unneeded argument to 
  rte_flow_restore_info_dynflag_register(),

Changes since RFC v1:
- rebased,
- updated vectorized datapath functions for net/mlx5,
- moved dynamic flag register to rte_eth_rx_metadata_negotiate() and
  hid rte_flow_restore_info_dynflag_register() into ethdev internals,

---
 app/test-pmd/util.c                      |  9 +++--
 drivers/net/mlx5/mlx5.c                  |  2 +
 drivers/net/mlx5/mlx5.h                  |  5 ++-
 drivers/net/mlx5/mlx5_flow.c             | 47 +++++++++++++++++++++---
 drivers/net/mlx5/mlx5_rx.c               |  2 +-
 drivers/net/mlx5/mlx5_rx.h               |  1 +
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 16 ++++----
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    |  6 +--
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     |  6 +--
 drivers/net/mlx5/mlx5_trigger.c          |  4 +-
 drivers/net/sfc/sfc_dp.c                 | 14 +------
 lib/ethdev/rte_ethdev.c                  |  5 +++
 lib/ethdev/rte_flow.c                    | 27 ++++++++++++++
 lib/ethdev/rte_flow.h                    | 18 ++++++++-
 lib/ethdev/rte_flow_driver.h             |  6 +++
 lib/ethdev/version.map                   |  1 +
 16 files changed, 128 insertions(+), 41 deletions(-)

diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index f9df5f69ef..5aa69ed545 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 	char print_buf[MAX_STRING_LEN];
 	size_t buf_size = MAX_STRING_LEN;
 	size_t cur_len = 0;
+	uint64_t restore_info_dynflag;
 
 	if (!nb_pkts)
 		return;
+	restore_info_dynflag = rte_flow_restore_info_dynflag();
 	MKDUMPSTR(print_buf, buf_size, cur_len,
 		  "port %u/queue %u: %s %u packets\n", port_id, queue,
 		  is_rx ? "received" : "sent", (unsigned int) nb_pkts);
 	for (i = 0; i < nb_pkts; i++) {
-		int ret;
 		struct rte_flow_error error;
 		struct rte_flow_restore_info info = { 0, };
 
 		mb = pkts[i];
+		ol_flags = mb->ol_flags;
 		if (rxq_share > 0)
 			MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ",
 				  mb->port);
@@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
 		packet_type = mb->packet_type;
 		is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
-		ret = rte_flow_get_restore_info(port_id, mb, &info, &error);
-		if (!ret) {
+		if ((ol_flags & restore_info_dynflag) != 0 &&
+				rte_flow_get_restore_info(port_id, mb, &info, &error) == 0) {
 			MKDUMPSTR(print_buf, buf_size, cur_len,
 				  "restore info:");
 			if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
@@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
 			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
-		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
 			MKDUMPSTR(print_buf, buf_size, cur_len,
 				  " - RSS hash=0x%x",
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index a75fa1b7f0..58fde3af22 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2347,6 +2347,7 @@ const struct eth_dev_ops mlx5_dev_ops = {
 	.get_monitor_addr = mlx5_get_monitor_addr,
 	.count_aggr_ports = mlx5_count_aggr_ports,
 	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
+	.rx_metadata_negotiate = mlx5_flow_rx_metadata_negotiate,
 };
 
 /* Available operations from secondary process. */
@@ -2372,6 +2373,7 @@ const struct eth_dev_ops mlx5_dev_sec_ops = {
 	.get_module_eeprom = mlx5_get_module_eeprom,
 	.count_aggr_ports = mlx5_count_aggr_ports,
 	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
+	.rx_metadata_negotiate = mlx5_flow_rx_metadata_negotiate,
 };
 
 /* Available operations in flow isolated mode. */
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 021049ad2b..bf464613f0 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1745,6 +1745,7 @@ struct mlx5_priv {
 	unsigned int lb_used:1; /* Loopback queue is referred to. */
 	uint32_t mark_enabled:1; /* If mark action is enabled on rxqs. */
 	uint32_t num_lag_ports:4; /* Number of ports can be bonded. */
+	uint32_t tunnel_enabled:1; /* If tunnel offloading is enabled on rxqs. */
 	uint16_t domain_id; /* Switch domain identifier. */
 	uint16_t vport_id; /* Associated VF vport index (if any). */
 	uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */
@@ -2154,7 +2155,9 @@ int mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
 int mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev, struct rte_flow *flow,
 		FILE *file, struct rte_flow_error *error);
 #endif
-void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
+int mlx5_flow_rx_metadata_negotiate(struct rte_eth_dev *dev,
+	uint64_t *features);
+void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
 int mlx5_validate_action_ct(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index eb1d7a6be2..19e567401b 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1796,6 +1796,38 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
 	priv->sh->shared_mark_enabled = 0;
 }
 
+static uint64_t mlx5_restore_info_dynflag;
+
+int
+mlx5_flow_rx_metadata_negotiate(struct rte_eth_dev *dev, uint64_t *features)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	uint64_t supported = 0;
+
+	if (!is_tunnel_offload_active(dev)) {
+		supported |= RTE_ETH_RX_METADATA_USER_FLAG;
+		supported |= RTE_ETH_RX_METADATA_USER_MARK;
+		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0) {
+			DRV_LOG(DEBUG,
+				"tunnel offload was not activated, consider setting dv_xmeta_en=%d",
+				MLX5_XMETA_MODE_MISS_INFO);
+		}
+	} else {
+		supported |= RTE_ETH_RX_METADATA_TUNNEL_ID;
+		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0 &&
+				mlx5_restore_info_dynflag == 0)
+			mlx5_restore_info_dynflag = rte_flow_restore_info_dynflag();
+	}
+
+	if (((*features & supported) & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0)
+		priv->tunnel_enabled = 1;
+	else
+		priv->tunnel_enabled = 0;
+
+	*features &= supported;
+	return 0;
+}
+
 /**
  * Set the Rx queue dynamic metadata (mask and offset) for a flow
  *
@@ -1803,11 +1835,15 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
  *   Pointer to the Ethernet device structure.
  */
 void
-mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
+mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
+	uint64_t mark_flag = RTE_MBUF_F_RX_FDIR_ID;
 	unsigned int i;
 
+	if (priv->tunnel_enabled)
+		mark_flag |= mlx5_restore_info_dynflag;
+
 	for (i = 0; i != priv->rxqs_n; ++i) {
 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
 		struct mlx5_rxq_data *data;
@@ -1826,6 +1862,7 @@ mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
 		}
+		data->mark_flag = mark_flag;
 	}
 }
 
@@ -11560,12 +11597,10 @@ mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
 	uint64_t ol_flags = m->ol_flags;
 	const struct mlx5_flow_tbl_data_entry *tble;
 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
-	if (!is_tunnel_offload_active(dev)) {
-		info->flags = 0;
-		return 0;
-	}
-
+	if (priv->tunnel_enabled == 0)
+		goto err;
 	if ((ol_flags & mask) != mask)
 		goto err;
 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index a2be523e9e..71c4638251 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
 		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
 			pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
 			if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
-				pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
+				pkt->ol_flags |= rxq->mark_flag;
 				pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
 			}
 		}
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 52c35c83f8..3514edd84e 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -136,6 +136,7 @@ struct mlx5_rxq_data {
 	struct mlx5_uar_data uar_data; /* CQ doorbell. */
 	uint32_t cqn; /* CQ number. */
 	uint8_t cq_arm_sn; /* CQ arm seq number. */
+	uint64_t mark_flag; /* ol_flags to set with marks. */
 	uint32_t tunnel; /* Tunnel information. */
 	int timestamp_offset; /* Dynamic mbuf field for timestamp. */
 	uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 14ffff26f4..4d0d05c376 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -296,15 +296,15 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				const __vector unsigned char fdir_all_flags =
 					(__vector unsigned char)
 					(__vector unsigned int){
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID};
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag,
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag,
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag,
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag};
 				__vector unsigned char fdir_id_flags =
 					(__vector unsigned char)
 					(__vector unsigned int){
-					RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID};
+					rxq->mark_flag, rxq->mark_flag,
+					rxq->mark_flag, rxq->mark_flag};
 				/* Extract flow_tag field. */
 				__vector unsigned char ftag0 = vec_perm(mcqe1,
 							zero, flow_mark_shuf);
@@ -632,8 +632,8 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
 			RTE_MBUF_F_RX_FDIR, RTE_MBUF_F_RX_FDIR};
 		__vector unsigned char fdir_id_flags =
 			(__vector unsigned char)(__vector unsigned int){
-			RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID,
-			RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID};
+			rxq->mark_flag, rxq->mark_flag,
+			rxq->mark_flag, rxq->mark_flag};
 		__vector unsigned char flow_tag, invalid_mask;
 
 		flow_tag = (__vector unsigned char)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 75e8ed7e5a..91c85bec6d 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -231,9 +231,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 					vdupq_n_u32(RTE_MBUF_F_RX_FDIR);
 				const uint32x4_t fdir_all_flags =
 					vdupq_n_u32(RTE_MBUF_F_RX_FDIR |
-						    RTE_MBUF_F_RX_FDIR_ID);
+						    rxq->mark_flag);
 				uint32x4_t fdir_id_flags =
-					vdupq_n_u32(RTE_MBUF_F_RX_FDIR_ID);
+					vdupq_n_u32(rxq->mark_flag);
 				uint32x4_t invalid_mask, ftag;
 
 				__asm__ volatile
@@ -446,7 +446,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
 	if (rxq->mark) {
 		const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);
 		const uint32x4_t fdir_flags = vdupq_n_u32(RTE_MBUF_F_RX_FDIR);
-		uint32x4_t fdir_id_flags = vdupq_n_u32(RTE_MBUF_F_RX_FDIR_ID);
+		uint32x4_t fdir_id_flags = vdupq_n_u32(rxq->mark_flag);
 		uint32x4_t invalid_mask;
 
 		/* Check if flow tag is non-zero then set RTE_MBUF_F_RX_FDIR. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index b282f8b8e6..0766952255 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -214,9 +214,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
 				const __m128i fdir_all_flags =
 					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR |
-						       RTE_MBUF_F_RX_FDIR_ID);
+						       rxq->mark_flag);
 				__m128i fdir_id_flags =
-					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR_ID);
+					_mm_set1_epi32(rxq->mark_flag);
 
 				/* Extract flow_tag field. */
 				__m128i ftag0 =
@@ -442,7 +442,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
 	if (rxq->mark) {
 		const __m128i pinfo_ft_mask = _mm_set1_epi32(0xffffff00);
 		const __m128i fdir_flags = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
-		__m128i fdir_id_flags = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR_ID);
+		__m128i fdir_id_flags = _mm_set1_epi32(rxq->mark_flag);
 		__m128i flow_tag, invalid_mask;
 
 		flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index bbaa7d2aa0..7bdb897612 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 			dev->data->port_id);
 		goto error;
 	}
-	/* Set a mask and offset of dynamic metadata flows into Rx queues. */
-	mlx5_flow_rxq_dynf_metadata_set(dev);
+	/* Set dynamic fields and flags into Rx queues. */
+	mlx5_flow_rxq_dynf_set(dev);
 	/* Set flags and context to convert Rx timestamps. */
 	mlx5_rxq_timestamp_set(dev);
 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c
index 9f2093b353..2b0a1d749d 100644
--- a/drivers/net/sfc/sfc_dp.c
+++ b/drivers/net/sfc/sfc_dp.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <errno.h>
 
+#include <rte_ethdev.h>
 #include <rte_log.h>
 #include <rte_mbuf_dyn.h>
 
@@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void)
 		.size = sizeof(uint8_t),
 		.align = __alignof__(uint8_t),
 	};
-	static const struct rte_mbuf_dynflag ft_ctx_id_valid = {
-		.name = "rte_net_sfc_dynflag_ft_ctx_id_valid",
-	};
 
 	int field_offset;
-	int flag;
 
 	SFC_GENERIC_LOG(INFO, "%s() entry", __func__);
 
@@ -156,15 +153,8 @@ sfc_dp_ft_ctx_id_register(void)
 		return -1;
 	}
 
-	flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid);
-	if (flag < 0) {
-		SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id dynflag",
-				__func__);
-		return -1;
-	}
-
+	sfc_dp_ft_ctx_id_valid = rte_flow_restore_info_dynflag();
 	sfc_dp_ft_ctx_id_offset = field_offset;
-	sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag;
 
 	SFC_GENERIC_LOG(INFO, "%s() done", __func__);
 
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 7317015895..c8202a37ef 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -30,6 +30,7 @@
 #include "rte_ethdev.h"
 #include "rte_ethdev_trace_fp.h"
 #include "ethdev_driver.h"
+#include "rte_flow_driver.h"
 #include "ethdev_profile.h"
 #include "ethdev_private.h"
 #include "ethdev_trace.h"
@@ -6441,6 +6442,10 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features)
 		return -EINVAL;
 	}
 
+	if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0 &&
+			rte_flow_restore_info_dynflag_register() < 0)
+		*features &= ~RTE_ETH_RX_METADATA_TUNNEL_ID;
+
 	if (*dev->dev_ops->rx_metadata_negotiate == NULL)
 		return -ENOTSUP;
 	ret = eth_err(port_id,
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index d41963b42e..271d854f78 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -1441,6 +1441,33 @@ rte_flow_get_restore_info(uint16_t port_id,
 				  NULL, rte_strerror(ENOTSUP));
 }
 
+static struct {
+	const struct rte_mbuf_dynflag desc;
+	uint64_t value;
+} flow_restore_info_dynflag = {
+	.desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", },
+};
+
+uint64_t
+rte_flow_restore_info_dynflag(void)
+{
+	return flow_restore_info_dynflag.value;
+}
+
+int
+rte_flow_restore_info_dynflag_register(void)
+{
+	if (flow_restore_info_dynflag.value == 0) {
+		int offset = rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc);
+
+		if (offset < 0)
+			return -1;
+		flow_restore_info_dynflag.value = RTE_BIT64(offset);
+	}
+
+	return 0;
+}
+
 int
 rte_flow_tunnel_action_decap_release(uint16_t port_id,
 				     struct rte_flow_action *actions,
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index dec454275f..261d95378b 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -5128,7 +5128,23 @@ rte_flow_tunnel_match(uint16_t port_id,
 		      struct rte_flow_error *error);
 
 /**
- * Populate the current packet processing state, if exists, for the given mbuf.
+ * On reception of a mbuf from HW, a call to rte_flow_get_restore_info() may be
+ * required to retrieve some metadata.
+ * This function returns the associated mbuf ol_flags.
+ *
+ * Note: the dynamic flag is registered during a call to
+ * rte_eth_rx_metadata_negotiate() with RTE_ETH_RX_METADATA_TUNNEL_ID.
+ *
+ * @return
+ *   The offload flag indicating rte_flow_get_restore_info() must be called.
+ */
+__rte_experimental
+uint64_t
+rte_flow_restore_info_dynflag(void);
+
+/**
+ * If a mbuf contains the rte_flow_restore_info_dynflag() flag in ol_flags,
+ * populate the current packet processing state.
  *
  * One should negotiate tunnel metadata delivery from the NIC to the HW.
  * @see rte_eth_rx_metadata_negotiate()
diff --git a/lib/ethdev/rte_flow_driver.h b/lib/ethdev/rte_flow_driver.h
index 356b60f523..f9fb01b8a2 100644
--- a/lib/ethdev/rte_flow_driver.h
+++ b/lib/ethdev/rte_flow_driver.h
@@ -376,6 +376,12 @@ struct rte_flow_ops {
 const struct rte_flow_ops *
 rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
 
+/**
+ * Register mbuf dynamic flag for rte_flow_get_restore_info.
+ */
+int
+rte_flow_restore_info_dynflag_register(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 1a33d72668..0031be32bf 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -311,6 +311,7 @@ EXPERIMENTAL {
 	rte_flow_async_action_list_handle_destroy;
 	rte_flow_async_action_list_handle_query_update;
 	rte_flow_async_actions_update;
+	rte_flow_restore_info_dynflag;
 };
 
 INTERNAL {
-- 
2.40.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [RFC PATCH v3] ethdev: advertise flow restore in mbuf
  2023-06-20 11:10 ` [RFC PATCH v3] " David Marchand
@ 2023-06-20 16:43   ` Slava Ovsiienko
  2023-06-21  7:43     ` David Marchand
  2023-06-21 12:53     ` Ori Kam
  2023-06-21  7:47   ` Ali Alnubani
  1 sibling, 2 replies; 25+ messages in thread
From: Slava Ovsiienko @ 2023-06-20 16:43 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Ali Alnubani, Aman Singh, Yuying Zhang, Matan Azrad,
	Ori Kam, Suanming Mou, David Christensen, Ruifeng Wang,
	Bruce Richardson, Konstantin Ananyev, Andrew Rybchenko,
	Ferruh Yigit

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Tuesday, June 20, 2023 2:10 PM
> To: dev@dpdk.org
> Cc: NBU-Contact-Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>;
> i.maximets@ovn.org; Ali Alnubani <alialnu@nvidia.com>; Aman Singh
> <aman.deep.singh@intel.com>; Yuying Zhang <yuying.zhang@intel.com>;
> Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; Suanming Mou
> <suanmingm@nvidia.com>; David Christensen <drc@linux.vnet.ibm.com>;
> Ruifeng Wang <ruifeng.wang@arm.com>; Bruce Richardson
> <bruce.richardson@intel.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Ferruh Yigit <ferruh.yigit@amd.com>
> Subject: [RFC PATCH v3] ethdev: advertise flow restore in mbuf
> 
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Register a dynamic mbuf flag when an application negotiates tunnel metadata
> delivery (calling rte_eth_rx_metadata_negotiate() with
> RTE_ETH_RX_METADATA_TUNNEL_ID).
> 
> Drivers then advertise that metadata can be extracted by setting this dynamic
> flag in each mbuf.
> 
> The application then calls rte_flow_get_restore_info() only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> 7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>

We did not notice the degradation, so:
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>


> ---
> Changes since RFC v2:
> - fixed crash introduced in v2 and removed unneeded argument to
>   rte_flow_restore_info_dynflag_register(),
> 
> Changes since RFC v1:
> - rebased,
> - updated vectorized datapath functions for net/mlx5,
> - moved dynamic flag register to rte_eth_rx_metadata_negotiate() and
>   hid rte_flow_restore_info_dynflag_register() into ethdev internals,
> 
> ---
>  app/test-pmd/util.c                      |  9 +++--
>  drivers/net/mlx5/mlx5.c                  |  2 +
>  drivers/net/mlx5/mlx5.h                  |  5 ++-
>  drivers/net/mlx5/mlx5_flow.c             | 47 +++++++++++++++++++++---
>  drivers/net/mlx5/mlx5_rx.c               |  2 +-
>  drivers/net/mlx5/mlx5_rx.h               |  1 +
>  drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 16 ++++----
>  drivers/net/mlx5/mlx5_rxtx_vec_neon.h    |  6 +--
>  drivers/net/mlx5/mlx5_rxtx_vec_sse.h     |  6 +--
>  drivers/net/mlx5/mlx5_trigger.c          |  4 +-
>  drivers/net/sfc/sfc_dp.c                 | 14 +------
>  lib/ethdev/rte_ethdev.c                  |  5 +++
>  lib/ethdev/rte_flow.c                    | 27 ++++++++++++++
>  lib/ethdev/rte_flow.h                    | 18 ++++++++-
>  lib/ethdev/rte_flow_driver.h             |  6 +++
>  lib/ethdev/version.map                   |  1 +
>  16 files changed, 128 insertions(+), 41 deletions(-)
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH v3] ethdev: advertise flow restore in mbuf
  2023-06-20 16:43   ` Slava Ovsiienko
@ 2023-06-21  7:43     ` David Marchand
  2023-06-21 12:53     ` Ori Kam
  1 sibling, 0 replies; 25+ messages in thread
From: David Marchand @ 2023-06-21  7:43 UTC (permalink / raw)
  To: Slava Ovsiienko, Ali Alnubani
  Cc: dev, NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad, Ori Kam,
	Suanming Mou, David Christensen, Ruifeng Wang, Bruce Richardson,
	Konstantin Ananyev, Andrew Rybchenko, Ferruh Yigit

Hello,

On Tue, Jun 20, 2023 at 6:44 PM Slava Ovsiienko <viacheslavo@nvidia.com> wrote:
> > As reported by Ilya [1], unconditionally calling
> > rte_flow_get_restore_info() impacts an application performance for drivers
> > that do not provide this ops.
> > It could also impact processing of packets that require no call to
> > rte_flow_get_restore_info() at all.
> >
> > Register a dynamic mbuf flag when an application negotiates tunnel metadata
> > delivery (calling rte_eth_rx_metadata_negotiate() with
> > RTE_ETH_RX_METADATA_TUNNEL_ID).
> >
> > Drivers then advertise that metadata can be extracted by setting this dynamic
> > flag in each mbuf.
> >
> > The application then calls rte_flow_get_restore_info() only when required.
> >
> > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> > 7f659bfa40de@ovn.org/
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
>
> We did not notice the degradation, so:
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Cool, thanks for testing!
Should I add some Tested-by: tag from Ali? I guess he was the one who
did the testing.

I'll post a non-RFC patch for the CI to pass on it.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [RFC PATCH v3] ethdev: advertise flow restore in mbuf
  2023-06-20 11:10 ` [RFC PATCH v3] " David Marchand
  2023-06-20 16:43   ` Slava Ovsiienko
@ 2023-06-21  7:47   ` Ali Alnubani
  1 sibling, 0 replies; 25+ messages in thread
From: Ali Alnubani @ 2023-06-21  7:47 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Aman Singh, Yuying Zhang, Matan Azrad,
	Slava Ovsiienko, Ori Kam, Suanming Mou, David Christensen,
	Ruifeng Wang, Bruce Richardson, Konstantin Ananyev,
	Andrew Rybchenko, Ferruh Yigit

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Tuesday, June 20, 2023 2:10 PM
> To: dev@dpdk.org
> Cc: NBU-Contact-Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>;
> i.maximets@ovn.org; Ali Alnubani <alialnu@nvidia.com>; Aman Singh
> <aman.deep.singh@intel.com>; Yuying Zhang <yuying.zhang@intel.com>;
> Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; Suanming Mou
> <suanmingm@nvidia.com>; David Christensen <drc@linux.vnet.ibm.com>;
> Ruifeng Wang <ruifeng.wang@arm.com>; Bruce Richardson
> <bruce.richardson@intel.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Ferruh Yigit <ferruh.yigit@amd.com>
> Subject: [RFC PATCH v3] ethdev: advertise flow restore in mbuf
> 
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Register a dynamic mbuf flag when an application negotiates tunnel
> metadata delivery (calling rte_eth_rx_metadata_negotiate() with
> RTE_ETH_RX_METADATA_TUNNEL_ID).
> 
> Drivers then advertise that metadata can be extracted by setting this
> dynamic flag in each mbuf.
> 
> The application then calls rte_flow_get_restore_info() only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> 7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---

Didn't see a degradation in packet forwarding performance on ConnectX-6 Dx and BlueField-2.

Tested-by: Ali Alnubani <alialnu@nvidia.com>

Thanks

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [RFC PATCH v3] ethdev: advertise flow restore in mbuf
  2023-06-20 16:43   ` Slava Ovsiienko
  2023-06-21  7:43     ` David Marchand
@ 2023-06-21 12:53     ` Ori Kam
  1 sibling, 0 replies; 25+ messages in thread
From: Ori Kam @ 2023-06-21 12:53 UTC (permalink / raw)
  To: Slava Ovsiienko, David Marchand, dev
  Cc: NBU-Contact-Thomas Monjalon (EXTERNAL),
	i.maximets, Ali Alnubani, Aman Singh, Yuying Zhang, Matan Azrad,
	Suanming Mou, David Christensen, Ruifeng Wang, Bruce Richardson,
	Konstantin Ananyev, Andrew Rybchenko, Ferruh Yigit



> -----Original Message-----
> From: Slava Ovsiienko <viacheslavo@nvidia.com>
> Sent: Tuesday, June 20, 2023 7:44 PM
> 
> > -----Original Message-----
> > From: David Marchand <david.marchand@redhat.com>
> > Sent: Tuesday, June 20, 2023 2:10 PM
> > To: dev@dpdk.org
> > Cc: NBU-Contact-Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>;
> > i.maximets@ovn.org; Ali Alnubani <alialnu@nvidia.com>; Aman Singh
> > <aman.deep.singh@intel.com>; Yuying Zhang <yuying.zhang@intel.com>;
> > Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; Suanming Mou
> > <suanmingm@nvidia.com>; David Christensen <drc@linux.vnet.ibm.com>;
> > Ruifeng Wang <ruifeng.wang@arm.com>; Bruce Richardson
> > <bruce.richardson@intel.com>; Konstantin Ananyev
> > <konstantin.v.ananyev@yandex.ru>; Andrew Rybchenko
> > <andrew.rybchenko@oktetlabs.ru>; Ferruh Yigit <ferruh.yigit@amd.com>
> > Subject: [RFC PATCH v3] ethdev: advertise flow restore in mbuf
> >
> > As reported by Ilya [1], unconditionally calling
> > rte_flow_get_restore_info() impacts an application performance for drivers
> > that do not provide this ops.
> > It could also impact processing of packets that require no call to
> > rte_flow_get_restore_info() at all.
> >
> > Register a dynamic mbuf flag when an application negotiates tunnel
> metadata
> > delivery (calling rte_eth_rx_metadata_negotiate() with
> > RTE_ETH_RX_METADATA_TUNNEL_ID).
> >
> > Drivers then advertise that metadata can be extracted by setting this
> dynamic
> > flag in each mbuf.
> >
> > The application then calls rte_flow_get_restore_info() only when required.
> >
> > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> > 7f659bfa40de@ovn.org/
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> 
> We did not notice the degradation, so:
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Acked-by: Ori Kam <orika@nvidia.com>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v4] ethdev: advertise flow restore in mbuf
  2023-05-05 10:31 [RFC PATCH] ethdev: advertise flow restore in mbuf David Marchand
                   ` (4 preceding siblings ...)
  2023-06-20 11:10 ` [RFC PATCH v3] " David Marchand
@ 2023-06-21 14:43 ` David Marchand
  2023-06-21 18:52   ` Ferruh Yigit
  2023-07-31 20:41   ` Ilya Maximets
  5 siblings, 2 replies; 25+ messages in thread
From: David Marchand @ 2023-06-21 14:43 UTC (permalink / raw)
  To: dev
  Cc: thomas, i.maximets, alialnu, Andrew Rybchenko,
	Viacheslav Ovsiienko, Ori Kam, Aman Singh, Yuying Zhang,
	Matan Azrad, Suanming Mou, David Christensen, Ruifeng Wang,
	Bruce Richardson, Konstantin Ananyev, Ferruh Yigit

As reported by Ilya [1], unconditionally calling
rte_flow_get_restore_info() impacts an application performance for drivers
that do not provide this ops.
It could also impact processing of packets that require no call to
rte_flow_get_restore_info() at all.

Register a dynamic mbuf flag when an application negotiates tunnel
metadata delivery (calling rte_eth_rx_metadata_negotiate() with
RTE_ETH_RX_METADATA_TUNNEL_ID).

Drivers then advertise that metadata can be extracted by setting this
dynamic flag in each mbuf.

The application then calls rte_flow_get_restore_info() only when required.

Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Tested-by: Ali Alnubani <alialnu@nvidia.com>
Acked-by: Ori Kam <orika@nvidia.com>
---
Changes since RFC v3:
- rebased on next-net,
- sending as non RFC for CIs that skip RFC patches,

Changes since RFC v2:
- fixed crash introduced in v2 and removed unneeded argument to 
  rte_flow_restore_info_dynflag_register(),

Changes since RFC v1:
- rebased,
- updated vectorized datapath functions for net/mlx5,
- moved dynamic flag register to rte_eth_rx_metadata_negotiate() and
  hid rte_flow_restore_info_dynflag_register() into ethdev internals,

---
 app/test-pmd/util.c                      |  9 +++--
 drivers/net/mlx5/mlx5.c                  |  2 +
 drivers/net/mlx5/mlx5.h                  |  5 ++-
 drivers/net/mlx5/mlx5_flow.c             | 47 +++++++++++++++++++++---
 drivers/net/mlx5/mlx5_rx.c               |  2 +-
 drivers/net/mlx5/mlx5_rx.h               |  1 +
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 16 ++++----
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    |  6 +--
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     |  6 +--
 drivers/net/mlx5/mlx5_trigger.c          |  4 +-
 drivers/net/sfc/sfc_dp.c                 | 14 +------
 lib/ethdev/rte_ethdev.c                  |  5 +++
 lib/ethdev/rte_flow.c                    | 27 ++++++++++++++
 lib/ethdev/rte_flow.h                    | 18 ++++++++-
 lib/ethdev/rte_flow_driver.h             |  6 +++
 lib/ethdev/version.map                   |  1 +
 16 files changed, 128 insertions(+), 41 deletions(-)

diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index f9df5f69ef..5aa69ed545 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 	char print_buf[MAX_STRING_LEN];
 	size_t buf_size = MAX_STRING_LEN;
 	size_t cur_len = 0;
+	uint64_t restore_info_dynflag;
 
 	if (!nb_pkts)
 		return;
+	restore_info_dynflag = rte_flow_restore_info_dynflag();
 	MKDUMPSTR(print_buf, buf_size, cur_len,
 		  "port %u/queue %u: %s %u packets\n", port_id, queue,
 		  is_rx ? "received" : "sent", (unsigned int) nb_pkts);
 	for (i = 0; i < nb_pkts; i++) {
-		int ret;
 		struct rte_flow_error error;
 		struct rte_flow_restore_info info = { 0, };
 
 		mb = pkts[i];
+		ol_flags = mb->ol_flags;
 		if (rxq_share > 0)
 			MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ",
 				  mb->port);
@@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
 		packet_type = mb->packet_type;
 		is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
-		ret = rte_flow_get_restore_info(port_id, mb, &info, &error);
-		if (!ret) {
+		if ((ol_flags & restore_info_dynflag) != 0 &&
+				rte_flow_get_restore_info(port_id, mb, &info, &error) == 0) {
 			MKDUMPSTR(print_buf, buf_size, cur_len,
 				  "restore info:");
 			if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
@@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
 			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
-		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
 			MKDUMPSTR(print_buf, buf_size, cur_len,
 				  " - RSS hash=0x%x",
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index a75fa1b7f0..58fde3af22 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2347,6 +2347,7 @@ const struct eth_dev_ops mlx5_dev_ops = {
 	.get_monitor_addr = mlx5_get_monitor_addr,
 	.count_aggr_ports = mlx5_count_aggr_ports,
 	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
+	.rx_metadata_negotiate = mlx5_flow_rx_metadata_negotiate,
 };
 
 /* Available operations from secondary process. */
@@ -2372,6 +2373,7 @@ const struct eth_dev_ops mlx5_dev_sec_ops = {
 	.get_module_eeprom = mlx5_get_module_eeprom,
 	.count_aggr_ports = mlx5_count_aggr_ports,
 	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
+	.rx_metadata_negotiate = mlx5_flow_rx_metadata_negotiate,
 };
 
 /* Available operations in flow isolated mode. */
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 021049ad2b..bf464613f0 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1745,6 +1745,7 @@ struct mlx5_priv {
 	unsigned int lb_used:1; /* Loopback queue is referred to. */
 	uint32_t mark_enabled:1; /* If mark action is enabled on rxqs. */
 	uint32_t num_lag_ports:4; /* Number of ports can be bonded. */
+	uint32_t tunnel_enabled:1; /* If tunnel offloading is enabled on rxqs. */
 	uint16_t domain_id; /* Switch domain identifier. */
 	uint16_t vport_id; /* Associated VF vport index (if any). */
 	uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */
@@ -2154,7 +2155,9 @@ int mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
 int mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev, struct rte_flow *flow,
 		FILE *file, struct rte_flow_error *error);
 #endif
-void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
+int mlx5_flow_rx_metadata_negotiate(struct rte_eth_dev *dev,
+	uint64_t *features);
+void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
 int mlx5_validate_action_ct(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index eb1d7a6be2..19e567401b 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1796,6 +1796,38 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
 	priv->sh->shared_mark_enabled = 0;
 }
 
+static uint64_t mlx5_restore_info_dynflag;
+
+int
+mlx5_flow_rx_metadata_negotiate(struct rte_eth_dev *dev, uint64_t *features)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	uint64_t supported = 0;
+
+	if (!is_tunnel_offload_active(dev)) {
+		supported |= RTE_ETH_RX_METADATA_USER_FLAG;
+		supported |= RTE_ETH_RX_METADATA_USER_MARK;
+		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0) {
+			DRV_LOG(DEBUG,
+				"tunnel offload was not activated, consider setting dv_xmeta_en=%d",
+				MLX5_XMETA_MODE_MISS_INFO);
+		}
+	} else {
+		supported |= RTE_ETH_RX_METADATA_TUNNEL_ID;
+		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0 &&
+				mlx5_restore_info_dynflag == 0)
+			mlx5_restore_info_dynflag = rte_flow_restore_info_dynflag();
+	}
+
+	if (((*features & supported) & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0)
+		priv->tunnel_enabled = 1;
+	else
+		priv->tunnel_enabled = 0;
+
+	*features &= supported;
+	return 0;
+}
+
 /**
  * Set the Rx queue dynamic metadata (mask and offset) for a flow
  *
@@ -1803,11 +1835,15 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
  *   Pointer to the Ethernet device structure.
  */
 void
-mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
+mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
+	uint64_t mark_flag = RTE_MBUF_F_RX_FDIR_ID;
 	unsigned int i;
 
+	if (priv->tunnel_enabled)
+		mark_flag |= mlx5_restore_info_dynflag;
+
 	for (i = 0; i != priv->rxqs_n; ++i) {
 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
 		struct mlx5_rxq_data *data;
@@ -1826,6 +1862,7 @@ mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
 		}
+		data->mark_flag = mark_flag;
 	}
 }
 
@@ -11560,12 +11597,10 @@ mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
 	uint64_t ol_flags = m->ol_flags;
 	const struct mlx5_flow_tbl_data_entry *tble;
 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
-	if (!is_tunnel_offload_active(dev)) {
-		info->flags = 0;
-		return 0;
-	}
-
+	if (priv->tunnel_enabled == 0)
+		goto err;
 	if ((ol_flags & mask) != mask)
 		goto err;
 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index a2be523e9e..71c4638251 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
 		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
 			pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
 			if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
-				pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
+				pkt->ol_flags |= rxq->mark_flag;
 				pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
 			}
 		}
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 52c35c83f8..3514edd84e 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -136,6 +136,7 @@ struct mlx5_rxq_data {
 	struct mlx5_uar_data uar_data; /* CQ doorbell. */
 	uint32_t cqn; /* CQ number. */
 	uint8_t cq_arm_sn; /* CQ arm seq number. */
+	uint64_t mark_flag; /* ol_flags to set with marks. */
 	uint32_t tunnel; /* Tunnel information. */
 	int timestamp_offset; /* Dynamic mbuf field for timestamp. */
 	uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 14ffff26f4..4d0d05c376 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -296,15 +296,15 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				const __vector unsigned char fdir_all_flags =
 					(__vector unsigned char)
 					(__vector unsigned int){
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID};
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag,
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag,
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag,
+					RTE_MBUF_F_RX_FDIR | rxq->mark_flag};
 				__vector unsigned char fdir_id_flags =
 					(__vector unsigned char)
 					(__vector unsigned int){
-					RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID,
-					RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID};
+					rxq->mark_flag, rxq->mark_flag,
+					rxq->mark_flag, rxq->mark_flag};
 				/* Extract flow_tag field. */
 				__vector unsigned char ftag0 = vec_perm(mcqe1,
 							zero, flow_mark_shuf);
@@ -632,8 +632,8 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
 			RTE_MBUF_F_RX_FDIR, RTE_MBUF_F_RX_FDIR};
 		__vector unsigned char fdir_id_flags =
 			(__vector unsigned char)(__vector unsigned int){
-			RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID,
-			RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID};
+			rxq->mark_flag, rxq->mark_flag,
+			rxq->mark_flag, rxq->mark_flag};
 		__vector unsigned char flow_tag, invalid_mask;
 
 		flow_tag = (__vector unsigned char)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 75e8ed7e5a..91c85bec6d 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -231,9 +231,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 					vdupq_n_u32(RTE_MBUF_F_RX_FDIR);
 				const uint32x4_t fdir_all_flags =
 					vdupq_n_u32(RTE_MBUF_F_RX_FDIR |
-						    RTE_MBUF_F_RX_FDIR_ID);
+						    rxq->mark_flag);
 				uint32x4_t fdir_id_flags =
-					vdupq_n_u32(RTE_MBUF_F_RX_FDIR_ID);
+					vdupq_n_u32(rxq->mark_flag);
 				uint32x4_t invalid_mask, ftag;
 
 				__asm__ volatile
@@ -446,7 +446,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
 	if (rxq->mark) {
 		const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);
 		const uint32x4_t fdir_flags = vdupq_n_u32(RTE_MBUF_F_RX_FDIR);
-		uint32x4_t fdir_id_flags = vdupq_n_u32(RTE_MBUF_F_RX_FDIR_ID);
+		uint32x4_t fdir_id_flags = vdupq_n_u32(rxq->mark_flag);
 		uint32x4_t invalid_mask;
 
 		/* Check if flow tag is non-zero then set RTE_MBUF_F_RX_FDIR. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index b282f8b8e6..0766952255 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -214,9 +214,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
 				const __m128i fdir_all_flags =
 					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR |
-						       RTE_MBUF_F_RX_FDIR_ID);
+						       rxq->mark_flag);
 				__m128i fdir_id_flags =
-					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR_ID);
+					_mm_set1_epi32(rxq->mark_flag);
 
 				/* Extract flow_tag field. */
 				__m128i ftag0 =
@@ -442,7 +442,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
 	if (rxq->mark) {
 		const __m128i pinfo_ft_mask = _mm_set1_epi32(0xffffff00);
 		const __m128i fdir_flags = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
-		__m128i fdir_id_flags = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR_ID);
+		__m128i fdir_id_flags = _mm_set1_epi32(rxq->mark_flag);
 		__m128i flow_tag, invalid_mask;
 
 		flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index bbaa7d2aa0..7bdb897612 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 			dev->data->port_id);
 		goto error;
 	}
-	/* Set a mask and offset of dynamic metadata flows into Rx queues. */
-	mlx5_flow_rxq_dynf_metadata_set(dev);
+	/* Set dynamic fields and flags into Rx queues. */
+	mlx5_flow_rxq_dynf_set(dev);
 	/* Set flags and context to convert Rx timestamps. */
 	mlx5_rxq_timestamp_set(dev);
 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c
index 9f2093b353..2b0a1d749d 100644
--- a/drivers/net/sfc/sfc_dp.c
+++ b/drivers/net/sfc/sfc_dp.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <errno.h>
 
+#include <rte_ethdev.h>
 #include <rte_log.h>
 #include <rte_mbuf_dyn.h>
 
@@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void)
 		.size = sizeof(uint8_t),
 		.align = __alignof__(uint8_t),
 	};
-	static const struct rte_mbuf_dynflag ft_ctx_id_valid = {
-		.name = "rte_net_sfc_dynflag_ft_ctx_id_valid",
-	};
 
 	int field_offset;
-	int flag;
 
 	SFC_GENERIC_LOG(INFO, "%s() entry", __func__);
 
@@ -156,15 +153,8 @@ sfc_dp_ft_ctx_id_register(void)
 		return -1;
 	}
 
-	flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid);
-	if (flag < 0) {
-		SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id dynflag",
-				__func__);
-		return -1;
-	}
-
+	sfc_dp_ft_ctx_id_valid = rte_flow_restore_info_dynflag();
 	sfc_dp_ft_ctx_id_offset = field_offset;
-	sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag;
 
 	SFC_GENERIC_LOG(INFO, "%s() done", __func__);
 
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 5f1a656420..2ee0f63321 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -30,6 +30,7 @@
 #include "rte_ethdev.h"
 #include "rte_ethdev_trace_fp.h"
 #include "ethdev_driver.h"
+#include "rte_flow_driver.h"
 #include "ethdev_profile.h"
 #include "ethdev_private.h"
 #include "ethdev_trace.h"
@@ -6502,6 +6503,10 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features)
 		return -EINVAL;
 	}
 
+	if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0 &&
+			rte_flow_restore_info_dynflag_register() < 0)
+		*features &= ~RTE_ETH_RX_METADATA_TUNNEL_ID;
+
 	if (*dev->dev_ops->rx_metadata_negotiate == NULL)
 		return -ENOTSUP;
 	ret = eth_err(port_id,
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index d41963b42e..271d854f78 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -1441,6 +1441,33 @@ rte_flow_get_restore_info(uint16_t port_id,
 				  NULL, rte_strerror(ENOTSUP));
 }
 
+static struct {
+	const struct rte_mbuf_dynflag desc;
+	uint64_t value;
+} flow_restore_info_dynflag = {
+	.desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", },
+};
+
+uint64_t
+rte_flow_restore_info_dynflag(void)
+{
+	return flow_restore_info_dynflag.value;
+}
+
+int
+rte_flow_restore_info_dynflag_register(void)
+{
+	if (flow_restore_info_dynflag.value == 0) {
+		int offset = rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc);
+
+		if (offset < 0)
+			return -1;
+		flow_restore_info_dynflag.value = RTE_BIT64(offset);
+	}
+
+	return 0;
+}
+
 int
 rte_flow_tunnel_action_decap_release(uint16_t port_id,
 				     struct rte_flow_action *actions,
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index dec454275f..261d95378b 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -5128,7 +5128,23 @@ rte_flow_tunnel_match(uint16_t port_id,
 		      struct rte_flow_error *error);
 
 /**
- * Populate the current packet processing state, if exists, for the given mbuf.
+ * On reception of a mbuf from HW, a call to rte_flow_get_restore_info() may be
+ * required to retrieve some metadata.
+ * This function returns the associated mbuf ol_flags.
+ *
+ * Note: the dynamic flag is registered during a call to
+ * rte_eth_rx_metadata_negotiate() with RTE_ETH_RX_METADATA_TUNNEL_ID.
+ *
+ * @return
+ *   The offload flag indicating rte_flow_get_restore_info() must be called.
+ */
+__rte_experimental
+uint64_t
+rte_flow_restore_info_dynflag(void);
+
+/**
+ * If a mbuf contains the rte_flow_restore_info_dynflag() flag in ol_flags,
+ * populate the current packet processing state.
  *
  * One should negotiate tunnel metadata delivery from the NIC to the HW.
  * @see rte_eth_rx_metadata_negotiate()
diff --git a/lib/ethdev/rte_flow_driver.h b/lib/ethdev/rte_flow_driver.h
index 356b60f523..f9fb01b8a2 100644
--- a/lib/ethdev/rte_flow_driver.h
+++ b/lib/ethdev/rte_flow_driver.h
@@ -376,6 +376,12 @@ struct rte_flow_ops {
 const struct rte_flow_ops *
 rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
 
+/**
+ * Register mbuf dynamic flag for rte_flow_get_restore_info.
+ */
+int
+rte_flow_restore_info_dynflag_register(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 69c9f09a82..fc492ee839 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -311,6 +311,7 @@ EXPERIMENTAL {
 	rte_flow_async_action_list_handle_destroy;
 	rte_flow_async_action_list_handle_query_update;
 	rte_flow_async_actions_update;
+	rte_flow_restore_info_dynflag;
 };
 
 INTERNAL {
-- 
2.40.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] ethdev: advertise flow restore in mbuf
  2023-06-21 14:43 ` [PATCH v4] " David Marchand
@ 2023-06-21 18:52   ` Ferruh Yigit
  2023-07-31 20:41   ` Ilya Maximets
  1 sibling, 0 replies; 25+ messages in thread
From: Ferruh Yigit @ 2023-06-21 18:52 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: thomas, i.maximets, alialnu, Andrew Rybchenko,
	Viacheslav Ovsiienko, Ori Kam, Aman Singh, Yuying Zhang,
	Matan Azrad, Suanming Mou, David Christensen, Ruifeng Wang,
	Bruce Richardson, Konstantin Ananyev

On 6/21/2023 3:43 PM, David Marchand wrote:
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Register a dynamic mbuf flag when an application negotiates tunnel
> metadata delivery (calling rte_eth_rx_metadata_negotiate() with
> RTE_ETH_RX_METADATA_TUNNEL_ID).
> 
> Drivers then advertise that metadata can be extracted by setting this
> dynamic flag in each mbuf.
> 
> The application then calls rte_flow_get_restore_info() only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> Tested-by: Ali Alnubani <alialnu@nvidia.com>
> Acked-by: Ori Kam <orika@nvidia.com>
>

Applied to dpdk-next-net/main, thanks.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] ethdev: advertise flow restore in mbuf
  2023-06-21 14:43 ` [PATCH v4] " David Marchand
  2023-06-21 18:52   ` Ferruh Yigit
@ 2023-07-31 20:41   ` Ilya Maximets
  2023-09-26  9:17     ` David Marchand
  1 sibling, 1 reply; 25+ messages in thread
From: Ilya Maximets @ 2023-07-31 20:41 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: i.maximets, thomas, alialnu, Andrew Rybchenko,
	Viacheslav Ovsiienko, Ori Kam, Aman Singh, Yuying Zhang,
	Matan Azrad, Suanming Mou, David Christensen, Ruifeng Wang,
	Bruce Richardson, Konstantin Ananyev, Ferruh Yigit

On 6/21/23 16:43, David Marchand wrote:
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Register a dynamic mbuf flag when an application negotiates tunnel
> metadata delivery (calling rte_eth_rx_metadata_negotiate() with
> RTE_ETH_RX_METADATA_TUNNEL_ID).
> 
> Drivers then advertise that metadata can be extracted by setting this
> dynamic flag in each mbuf.
> 
> The application then calls rte_flow_get_restore_info() only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> Tested-by: Ali Alnubani <alialnu@nvidia.com>
> Acked-by: Ori Kam <orika@nvidia.com>
> ---
> Changes since RFC v3:
> - rebased on next-net,
> - sending as non RFC for CIs that skip RFC patches,
> 
> Changes since RFC v2:
> - fixed crash introduced in v2 and removed unneeded argument to 
>   rte_flow_restore_info_dynflag_register(),
> 
> Changes since RFC v1:
> - rebased,
> - updated vectorized datapath functions for net/mlx5,
> - moved dynamic flag register to rte_eth_rx_metadata_negotiate() and
>   hid rte_flow_restore_info_dynflag_register() into ethdev internals,
> 
> ---
>  app/test-pmd/util.c                      |  9 +++--
>  drivers/net/mlx5/mlx5.c                  |  2 +
>  drivers/net/mlx5/mlx5.h                  |  5 ++-
>  drivers/net/mlx5/mlx5_flow.c             | 47 +++++++++++++++++++++---
>  drivers/net/mlx5/mlx5_rx.c               |  2 +-
>  drivers/net/mlx5/mlx5_rx.h               |  1 +
>  drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 16 ++++----
>  drivers/net/mlx5/mlx5_rxtx_vec_neon.h    |  6 +--
>  drivers/net/mlx5/mlx5_rxtx_vec_sse.h     |  6 +--
>  drivers/net/mlx5/mlx5_trigger.c          |  4 +-
>  drivers/net/sfc/sfc_dp.c                 | 14 +------
>  lib/ethdev/rte_ethdev.c                  |  5 +++
>  lib/ethdev/rte_flow.c                    | 27 ++++++++++++++
>  lib/ethdev/rte_flow.h                    | 18 ++++++++-
>  lib/ethdev/rte_flow_driver.h             |  6 +++
>  lib/ethdev/version.map                   |  1 +
>  16 files changed, 128 insertions(+), 41 deletions(-)

<snip>

> diff --git a/lib/ethdev/rte_flow_driver.h b/lib/ethdev/rte_flow_driver.h
> index 356b60f523..f9fb01b8a2 100644
> --- a/lib/ethdev/rte_flow_driver.h
> +++ b/lib/ethdev/rte_flow_driver.h
> @@ -376,6 +376,12 @@ struct rte_flow_ops {
>  const struct rte_flow_ops *
>  rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
>  
> +/**
> + * Register mbuf dynamic flag for rte_flow_get_restore_info.
> + */
> +int
> +rte_flow_restore_info_dynflag_register(void);
> +

Hi, David, others.

Is there a reason to not expose this function to the application?

The point is that application will likely want to know the value
of the flag before creating any devices.  I.e. request it once
and use for all devices later without performing a call to an
external library (DPDK).  In current implementation, application
will need to open some device first, and only then the result of
rte_flow_restore_info_dynflag() will become meaningful.

There is no need to require application to call this function,
it can still be called from the rx negotiation API, but it would
be nice if application could know it beforehand, i.e. had control
over when the flag is actually becomes visible.

Alternatively, the _register() could also be called right from
the rte_flow_restore_info_dynflag() at a slight performance cost.
It shouldn't be important though, since drivers do not seem to
call it from a performance-sensitive code.

Thoughts?

Best regards, Ilya Maximets.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] ethdev: advertise flow restore in mbuf
  2023-07-31 20:41   ` Ilya Maximets
@ 2023-09-26  9:17     ` David Marchand
  2023-09-26 19:49       ` Ilya Maximets
  0 siblings, 1 reply; 25+ messages in thread
From: David Marchand @ 2023-09-26  9:17 UTC (permalink / raw)
  To: Ilya Maximets
  Cc: dev, thomas, alialnu, Andrew Rybchenko, Viacheslav Ovsiienko,
	Ori Kam, Aman Singh, Yuying Zhang, Matan Azrad, Suanming Mou,
	David Christensen, Ruifeng Wang, Bruce Richardson,
	Konstantin Ananyev, Ferruh Yigit

Hello Ilya,

On Mon, Jul 31, 2023 at 10:40 PM Ilya Maximets <i.maximets@ovn.org> wrote:
> On 6/21/23 16:43, David Marchand wrote:
> > As reported by Ilya [1], unconditionally calling
> > rte_flow_get_restore_info() impacts an application performance for drivers
> > that do not provide this ops.
> > It could also impact processing of packets that require no call to
> > rte_flow_get_restore_info() at all.
> >
> > Register a dynamic mbuf flag when an application negotiates tunnel
> > metadata delivery (calling rte_eth_rx_metadata_negotiate() with
> > RTE_ETH_RX_METADATA_TUNNEL_ID).
> >
> > Drivers then advertise that metadata can be extracted by setting this
> > dynamic flag in each mbuf.
> >
> > The application then calls rte_flow_get_restore_info() only when required.
> >
> > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> > Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> > Tested-by: Ali Alnubani <alialnu@nvidia.com>
> > Acked-by: Ori Kam <orika@nvidia.com>
> > ---
> > Changes since RFC v3:
> > - rebased on next-net,
> > - sending as non RFC for CIs that skip RFC patches,
> >
> > Changes since RFC v2:
> > - fixed crash introduced in v2 and removed unneeded argument to
> >   rte_flow_restore_info_dynflag_register(),
> >
> > Changes since RFC v1:
> > - rebased,
> > - updated vectorized datapath functions for net/mlx5,
> > - moved dynamic flag register to rte_eth_rx_metadata_negotiate() and
> >   hid rte_flow_restore_info_dynflag_register() into ethdev internals,
> >
> > ---
> >  app/test-pmd/util.c                      |  9 +++--
> >  drivers/net/mlx5/mlx5.c                  |  2 +
> >  drivers/net/mlx5/mlx5.h                  |  5 ++-
> >  drivers/net/mlx5/mlx5_flow.c             | 47 +++++++++++++++++++++---
> >  drivers/net/mlx5/mlx5_rx.c               |  2 +-
> >  drivers/net/mlx5/mlx5_rx.h               |  1 +
> >  drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 16 ++++----
> >  drivers/net/mlx5/mlx5_rxtx_vec_neon.h    |  6 +--
> >  drivers/net/mlx5/mlx5_rxtx_vec_sse.h     |  6 +--
> >  drivers/net/mlx5/mlx5_trigger.c          |  4 +-
> >  drivers/net/sfc/sfc_dp.c                 | 14 +------
> >  lib/ethdev/rte_ethdev.c                  |  5 +++
> >  lib/ethdev/rte_flow.c                    | 27 ++++++++++++++
> >  lib/ethdev/rte_flow.h                    | 18 ++++++++-
> >  lib/ethdev/rte_flow_driver.h             |  6 +++
> >  lib/ethdev/version.map                   |  1 +
> >  16 files changed, 128 insertions(+), 41 deletions(-)
>
> <snip>
>
> > diff --git a/lib/ethdev/rte_flow_driver.h b/lib/ethdev/rte_flow_driver.h
> > index 356b60f523..f9fb01b8a2 100644
> > --- a/lib/ethdev/rte_flow_driver.h
> > +++ b/lib/ethdev/rte_flow_driver.h
> > @@ -376,6 +376,12 @@ struct rte_flow_ops {
> >  const struct rte_flow_ops *
> >  rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
> >
> > +/**
> > + * Register mbuf dynamic flag for rte_flow_get_restore_info.
> > + */
> > +int
> > +rte_flow_restore_info_dynflag_register(void);
> > +
>
> Hi, David, others.
>
> Is there a reason to not expose this function to the application?
>
> The point is that application will likely want to know the value
> of the flag before creating any devices.  I.e. request it once
> and use for all devices later without performing a call to an
> external library (DPDK).  In current implementation, application
> will need to open some device first, and only then the result of
> rte_flow_restore_info_dynflag() will become meaningful.
>
> There is no need to require application to call this function,
> it can still be called from the rx negotiation API, but it would
> be nice if application could know it beforehand, i.e. had control
> over when the flag is actually becomes visible.

DPDK tries to register flags only when needed, as there is not a lot
of space for dyn flags.
Some drivers take some space and applications want some share too.

DPDK can export the _register function for applications to call it
regardless of what driver will be used later.

Yet, I want to be sure why it matters in OVS context.
Is it not enough resolving the flag (by calling
rte_flow_restore_info_dynflag()) once rte_eth_rx_metadata_negotiate
for tunnel metadata is called?
Do you want to avoid an atomic store/load between OVS main thread and
PMD threads?


-- 
David Marchand


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] ethdev: advertise flow restore in mbuf
  2023-09-26  9:17     ` David Marchand
@ 2023-09-26 19:49       ` Ilya Maximets
  0 siblings, 0 replies; 25+ messages in thread
From: Ilya Maximets @ 2023-09-26 19:49 UTC (permalink / raw)
  To: David Marchand
  Cc: i.maximets, dev, thomas, alialnu, Andrew Rybchenko,
	Viacheslav Ovsiienko, Ori Kam, Aman Singh, Yuying Zhang,
	Matan Azrad, Suanming Mou, David Christensen, Ruifeng Wang,
	Bruce Richardson, Konstantin Ananyev, Ferruh Yigit

On 9/26/23 11:17, David Marchand wrote:
> Hello Ilya,
> 
> On Mon, Jul 31, 2023 at 10:40 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>> On 6/21/23 16:43, David Marchand wrote:
>>> As reported by Ilya [1], unconditionally calling
>>> rte_flow_get_restore_info() impacts an application performance for drivers
>>> that do not provide this ops.
>>> It could also impact processing of packets that require no call to
>>> rte_flow_get_restore_info() at all.
>>>
>>> Register a dynamic mbuf flag when an application negotiates tunnel
>>> metadata delivery (calling rte_eth_rx_metadata_negotiate() with
>>> RTE_ETH_RX_METADATA_TUNNEL_ID).
>>>
>>> Drivers then advertise that metadata can be extracted by setting this
>>> dynamic flag in each mbuf.
>>>
>>> The application then calls rte_flow_get_restore_info() only when required.
>>>
>>> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-7f659bfa40de@ovn.org/
>>> Signed-off-by: David Marchand <david.marchand@redhat.com>
>>> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>>> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
>>> Tested-by: Ali Alnubani <alialnu@nvidia.com>
>>> Acked-by: Ori Kam <orika@nvidia.com>
>>> ---
>>> Changes since RFC v3:
>>> - rebased on next-net,
>>> - sending as non RFC for CIs that skip RFC patches,
>>>
>>> Changes since RFC v2:
>>> - fixed crash introduced in v2 and removed unneeded argument to
>>>   rte_flow_restore_info_dynflag_register(),
>>>
>>> Changes since RFC v1:
>>> - rebased,
>>> - updated vectorized datapath functions for net/mlx5,
>>> - moved dynamic flag register to rte_eth_rx_metadata_negotiate() and
>>>   hid rte_flow_restore_info_dynflag_register() into ethdev internals,
>>>
>>> ---
>>>  app/test-pmd/util.c                      |  9 +++--
>>>  drivers/net/mlx5/mlx5.c                  |  2 +
>>>  drivers/net/mlx5/mlx5.h                  |  5 ++-
>>>  drivers/net/mlx5/mlx5_flow.c             | 47 +++++++++++++++++++++---
>>>  drivers/net/mlx5/mlx5_rx.c               |  2 +-
>>>  drivers/net/mlx5/mlx5_rx.h               |  1 +
>>>  drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 16 ++++----
>>>  drivers/net/mlx5/mlx5_rxtx_vec_neon.h    |  6 +--
>>>  drivers/net/mlx5/mlx5_rxtx_vec_sse.h     |  6 +--
>>>  drivers/net/mlx5/mlx5_trigger.c          |  4 +-
>>>  drivers/net/sfc/sfc_dp.c                 | 14 +------
>>>  lib/ethdev/rte_ethdev.c                  |  5 +++
>>>  lib/ethdev/rte_flow.c                    | 27 ++++++++++++++
>>>  lib/ethdev/rte_flow.h                    | 18 ++++++++-
>>>  lib/ethdev/rte_flow_driver.h             |  6 +++
>>>  lib/ethdev/version.map                   |  1 +
>>>  16 files changed, 128 insertions(+), 41 deletions(-)
>>
>> <snip>
>>
>>> diff --git a/lib/ethdev/rte_flow_driver.h b/lib/ethdev/rte_flow_driver.h
>>> index 356b60f523..f9fb01b8a2 100644
>>> --- a/lib/ethdev/rte_flow_driver.h
>>> +++ b/lib/ethdev/rte_flow_driver.h
>>> @@ -376,6 +376,12 @@ struct rte_flow_ops {
>>>  const struct rte_flow_ops *
>>>  rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
>>>
>>> +/**
>>> + * Register mbuf dynamic flag for rte_flow_get_restore_info.
>>> + */
>>> +int
>>> +rte_flow_restore_info_dynflag_register(void);
>>> +
>>
>> Hi, David, others.
>>
>> Is there a reason to not expose this function to the application?
>>
>> The point is that application will likely want to know the value
>> of the flag before creating any devices.  I.e. request it once
>> and use for all devices later without performing a call to an
>> external library (DPDK).  In current implementation, application
>> will need to open some device first, and only then the result of
>> rte_flow_restore_info_dynflag() will become meaningful.
>>
>> There is no need to require application to call this function,
>> it can still be called from the rx negotiation API, but it would
>> be nice if application could know it beforehand, i.e. had control
>> over when the flag is actually becomes visible.
> 
> DPDK tries to register flags only when needed, as there is not a lot
> of space for dyn flags.
> Some drivers take some space and applications want some share too.
> 
> DPDK can export the _register function for applications to call it
> regardless of what driver will be used later.
> 
> Yet, I want to be sure why it matters in OVS context.
> Is it not enough resolving the flag (by calling
> rte_flow_restore_info_dynflag()) once rte_eth_rx_metadata_negotiate
> for tunnel metadata is called?
> Do you want to avoid an atomic store/load between OVS main thread and
> PMD threads?

Yeas, something like this.  I do not have a solid implementation idea
for that though.  I replied to your OVS patch.

Best regards, Ilya Maximets.


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2023-09-26 19:48 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-05 10:31 [RFC PATCH] ethdev: advertise flow restore in mbuf David Marchand
2023-05-24 12:57 ` David Marchand
2023-05-24 16:00 ` Ori Kam
2023-05-24 18:44   ` David Marchand
2023-06-01  8:48     ` David Marchand
2023-06-01  9:31       ` Ori Kam
2023-06-01  9:43         ` David Marchand
2023-06-01 10:02           ` Ori Kam
2023-06-01 10:03             ` David Marchand
2023-06-14 16:46 ` Slava Ovsiienko
2023-06-15  8:00   ` David Marchand
2023-06-15 13:47 ` [RFC PATCH v2] " David Marchand
2023-06-19 12:20   ` Andrew Rybchenko
2023-06-19 13:57   ` Slava Ovsiienko
2023-06-20 10:04   ` Ali Alnubani
2023-06-20 11:10 ` [RFC PATCH v3] " David Marchand
2023-06-20 16:43   ` Slava Ovsiienko
2023-06-21  7:43     ` David Marchand
2023-06-21 12:53     ` Ori Kam
2023-06-21  7:47   ` Ali Alnubani
2023-06-21 14:43 ` [PATCH v4] " David Marchand
2023-06-21 18:52   ` Ferruh Yigit
2023-07-31 20:41   ` Ilya Maximets
2023-09-26  9:17     ` David Marchand
2023-09-26 19:49       ` Ilya Maximets

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).