DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] mbuf: add mbuf physical address field to dynamic field
@ 2022-06-30 16:25 Shijith Thotton
  2022-06-30 16:45 ` Stephen Hemminger
  2022-06-30 16:55 ` Bruce Richardson
  0 siblings, 2 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-06-30 16:25 UTC (permalink / raw)
  To: jerinj; +Cc: thomas, Shijith Thotton, dev, Olivier Matz

If all devices are configured to run in IOVA mode as VA, physical
address field of mbuf (buf_iova) won't be used. In such cases, buf_iova
space is free to use as a dynamic field. So a new dynamic field member
(dynfield2) is added in mbuf structure to make use of that space.

A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify the
mbuf that can use dynfield2.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 lib/mbuf/rte_mbuf.c      |  8 ++++++++
 lib/mbuf/rte_mbuf.h      | 16 +++++++++++++---
 lib/mbuf/rte_mbuf_core.h | 29 ++++++++++++++++++++++-------
 lib/mbuf/rte_mbuf_dyn.c  |  3 +++
 4 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
index a2307cebe6..718b4505c4 100644
--- a/lib/mbuf/rte_mbuf.c
+++ b/lib/mbuf/rte_mbuf.c
@@ -101,6 +101,10 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 	m->port = RTE_MBUF_PORT_INVALID;
 	rte_mbuf_refcnt_set(m, 1);
 	m->next = NULL;
+
+	/* enable dynfield2 if IOVA mode is VA */
+	if (rte_eal_iova_mode() == RTE_IOVA_VA)
+		m->ol_flags = RTE_MBUF_F_DYNFIELD2;
 }
 
 /*
@@ -206,6 +210,10 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
 	rte_mbuf_refcnt_set(m, 1);
 	m->next = NULL;
 
+	/* enable dynfield2 if IOVA mode is VA */
+	if (rte_eal_iova_mode() == RTE_IOVA_VA)
+		m->ol_flags |= RTE_MBUF_F_DYNFIELD2;
+
 	/* init external buffer shared info items */
 	shinfo = RTE_PTR_ADD(m, mbuf_size);
 	m->shinfo = shinfo;
diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
index 9811e8c760..59485f04ed 100644
--- a/lib/mbuf/rte_mbuf.h
+++ b/lib/mbuf/rte_mbuf.h
@@ -1056,9 +1056,11 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
 	RTE_ASSERT(shinfo->free_cb != NULL);
 
 	m->buf_addr = buf_addr;
-	m->buf_iova = buf_iova;
 	m->buf_len = buf_len;
 
+	if (!RTE_MBUF_HAS_DYNFIELD2(m))
+		m->buf_iova = buf_iova;
+
 	m->data_len = 0;
 	m->data_off = 0;
 
@@ -1087,6 +1089,10 @@ static inline void
 rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
 {
 	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1));
+
+	if (RTE_MBUF_HAS_DYNFIELD2(mdst))
+		memcpy(&mdst->dynfield2, &msrc->dynfield2,
+		       sizeof(mdst->dynfield2));
 }
 
 /* internal */
@@ -1143,10 +1149,12 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 
 	mi->data_off = m->data_off;
 	mi->data_len = m->data_len;
-	mi->buf_iova = m->buf_iova;
 	mi->buf_addr = m->buf_addr;
 	mi->buf_len = m->buf_len;
 
+	if (!RTE_MBUF_HAS_DYNFIELD2(mi))
+		mi->buf_iova = m->buf_iova;
+
 	mi->next = NULL;
 	mi->pkt_len = mi->data_len;
 	mi->nb_segs = 1;
@@ -1245,11 +1253,13 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
 	m->ol_flags = 0;
+
+	if (!RTE_MBUF_HAS_DYNFIELD2(m))
+		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
 }
 
 /**
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 3d6ddd6773..a549e36464 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -504,6 +504,8 @@ extern "C" {
 #define RTE_MBUF_F_INDIRECT    (1ULL << 62) /**< Indirect attached mbuf */
 #define IND_ATTACHED_MBUF RTE_DEPRECATED(IND_ATTACHED_MBUF) RTE_MBUF_F_INDIRECT
 
+#define RTE_MBUF_F_DYNFIELD2	(1ULL << 63) /**< dynfield2 mbuf field enabled */
+
 /** Alignment constraint of mbuf private area. */
 #define RTE_MBUF_PRIV_ALIGN 8
 
@@ -579,13 +581,18 @@ struct rte_mbuf {
 	RTE_MARKER cacheline0;
 
 	void *buf_addr;           /**< Virtual address of segment buffer. */
-	/**
-	 * Physical address of segment buffer.
-	 * Force alignment to 8-bytes, so as to ensure we have the exact
-	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
-	 * working on vector drivers easier.
-	 */
-	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+	RTE_STD_C11
+	union {
+		/**
+		 * Physical address of segment buffer if IOVA mode is not VA.
+		 * Force alignment to 8-bytes, so as to ensure we have the exact
+		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
+		 * working on vector drivers easier.
+		 */
+		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+		/* Reserved for dynamic field if IOVA mode is VA. */
+		uint64_t dynfield2;
+	};
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
 	RTE_MARKER64 rearm_data;
@@ -803,6 +810,14 @@ struct rte_mbuf_ext_shared_info {
 #define RTE_MBUF_DIRECT(mb) \
 	(!((mb)->ol_flags & (RTE_MBUF_F_INDIRECT | RTE_MBUF_F_EXTERNAL)))
 
+/**
+ *
+ * Retrurns TRUE if given mbuf has dynfield2 field enabled, or FALSE otherwise.
+ *
+ * dynfield2 field can be enabled if IOVA mode is configured as VA.
+ */
+#define RTE_MBUF_HAS_DYNFIELD2(mb) (!!((mb)->ol_flags & RTE_MBUF_F_DYNFIELD2))
+
 /** Uninitialized or unspecified port. */
 #define RTE_MBUF_PORT_INVALID UINT16_MAX
 /** For backwards compatibility. */
diff --git a/lib/mbuf/rte_mbuf_dyn.c b/lib/mbuf/rte_mbuf_dyn.c
index 4ae79383b5..7bfe50e0e2 100644
--- a/lib/mbuf/rte_mbuf_dyn.c
+++ b/lib/mbuf/rte_mbuf_dyn.c
@@ -127,7 +127,10 @@ init_shared_mem(void)
 		 * rte_mbuf_dynfield_copy().
 		 */
 		memset(shm, 0, sizeof(*shm));
+
 		mark_free(dynfield1);
+		if (rte_eal_iova_mode() == RTE_IOVA_VA)
+			mark_free(dynfield2);
 
 		/* init free_flags */
 		for (mask = RTE_MBUF_F_FIRST_FREE; mask <= RTE_MBUF_F_LAST_FREE; mask <<= 1)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-06-30 16:25 [PATCH] mbuf: add mbuf physical address field to dynamic field Shijith Thotton
@ 2022-06-30 16:45 ` Stephen Hemminger
  2022-07-01 12:16   ` Shijith Thotton
  2022-07-01 12:24   ` Shijith Thotton
  2022-06-30 16:55 ` Bruce Richardson
  1 sibling, 2 replies; 88+ messages in thread
From: Stephen Hemminger @ 2022-06-30 16:45 UTC (permalink / raw)
  To: Shijith Thotton; +Cc: jerinj, thomas, dev, Olivier Matz

On Thu, 30 Jun 2022 21:55:16 +0530
Shijith Thotton <sthotton@marvell.com> wrote:

> If all devices are configured to run in IOVA mode as VA, physical
> address field of mbuf (buf_iova) won't be used. In such cases, buf_iova
> space is free to use as a dynamic field. So a new dynamic field member
> (dynfield2) is added in mbuf structure to make use of that space.
> 
> A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify the
> mbuf that can use dynfield2.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>

This seems like a complex and potentially error prone way to do this.
What is the use case? How much of a performance gain?


^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-06-30 16:25 [PATCH] mbuf: add mbuf physical address field to dynamic field Shijith Thotton
  2022-06-30 16:45 ` Stephen Hemminger
@ 2022-06-30 16:55 ` Bruce Richardson
  2022-07-01  9:48   ` Olivier Matz
  1 sibling, 1 reply; 88+ messages in thread
From: Bruce Richardson @ 2022-06-30 16:55 UTC (permalink / raw)
  To: Shijith Thotton; +Cc: jerinj, thomas, dev, Olivier Matz

On Thu, Jun 30, 2022 at 09:55:16PM +0530, Shijith Thotton wrote:
> If all devices are configured to run in IOVA mode as VA, physical
> address field of mbuf (buf_iova) won't be used. In such cases, buf_iova
> space is free to use as a dynamic field. So a new dynamic field member
> (dynfield2) is added in mbuf structure to make use of that space.
> 
> A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify the
> mbuf that can use dynfield2.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---
I disagree with this patch. The mbuf should always record the iova of the
buffer directly, rather than forcing the drivers to query the EAL mode.
This will likely also break all vector drivers right now, as they are
sensitive to the mbuf layout and the position of the IOVA address in the
buffer.

/Bruce

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-06-30 16:55 ` Bruce Richardson
@ 2022-07-01  9:48   ` Olivier Matz
  2022-07-01 11:53     ` Slava Ovsiienko
  2022-07-01 12:01     ` [EXT] " Shijith Thotton
  0 siblings, 2 replies; 88+ messages in thread
From: Olivier Matz @ 2022-07-01  9:48 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: Shijith Thotton, jerinj, thomas, dev

Hi,

On Thu, Jun 30, 2022 at 05:55:21PM +0100, Bruce Richardson wrote:
> On Thu, Jun 30, 2022 at 09:55:16PM +0530, Shijith Thotton wrote:
> > If all devices are configured to run in IOVA mode as VA, physical
> > address field of mbuf (buf_iova) won't be used. In such cases, buf_iova
> > space is free to use as a dynamic field. So a new dynamic field member
> > (dynfield2) is added in mbuf structure to make use of that space.
> > 
> > A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify the
> > mbuf that can use dynfield2.
> > 
> > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> > ---
> I disagree with this patch. The mbuf should always record the iova of the
> buffer directly, rather than forcing the drivers to query the EAL mode.
> This will likely also break all vector drivers right now, as they are
> sensitive to the mbuf layout and the position of the IOVA address in the
> buffer.

I have the same opinion than Stephen and Bruce. This field is widely used
in DPDK, I don't think it is a good idea to disable it if some conditions
are met.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-07-01  9:48   ` Olivier Matz
@ 2022-07-01 11:53     ` Slava Ovsiienko
  2022-07-01 12:01     ` [EXT] " Shijith Thotton
  1 sibling, 0 replies; 88+ messages in thread
From: Slava Ovsiienko @ 2022-07-01 11:53 UTC (permalink / raw)
  To: Olivier Matz, Bruce Richardson
  Cc: Shijith Thotton, jerinj, NBU-Contact-Thomas Monjalon (EXTERNAL), dev

Hi,

Just to note, some PMDs do not use physical address field at all.
As an example - mlx5 PMD (and it is far from being the only one)
could take an advantage from this patch. Nonetheless, I tend to agree -
for the whole DPDK framework it looks risky. I had the similar thoughts
about removing iova field and I did not dare to propose 😊

With best regards,
Slava

> -----Original Message-----
> From: Olivier Matz <olivier.matz@6wind.com>
> Sent: Friday, July 1, 2022 12:49
> To: Bruce Richardson <bruce.richardson@intel.com>
> Cc: Shijith Thotton <sthotton@marvell.com>; jerinj@marvell.com; NBU-
> Contact-Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; dev@dpdk.org
> Subject: Re: [PATCH] mbuf: add mbuf physical address field to dynamic
> field
> 
> Hi,
> 
> On Thu, Jun 30, 2022 at 05:55:21PM +0100, Bruce Richardson wrote:
> > On Thu, Jun 30, 2022 at 09:55:16PM +0530, Shijith Thotton wrote:
> > > If all devices are configured to run in IOVA mode as VA, physical
> > > address field of mbuf (buf_iova) won't be used. In such cases,
> > > buf_iova space is free to use as a dynamic field. So a new dynamic
> > > field member
> > > (dynfield2) is added in mbuf structure to make use of that space.
> > >
> > > A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify
> > > the mbuf that can use dynfield2.
> > >
> > > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> > > ---
> > I disagree with this patch. The mbuf should always record the iova of
> > the buffer directly, rather than forcing the drivers to query the EAL
> mode.
> > This will likely also break all vector drivers right now, as they are
> > sensitive to the mbuf layout and the position of the IOVA address in
> > the buffer.
> 
> I have the same opinion than Stephen and Bruce. This field is widely
> used in DPDK, I don't think it is a good idea to disable it if some
> conditions are met.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-07-01  9:48   ` Olivier Matz
  2022-07-01 11:53     ` Slava Ovsiienko
@ 2022-07-01 12:01     ` Shijith Thotton
  1 sibling, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-07-01 12:01 UTC (permalink / raw)
  To: Olivier Matz, Bruce Richardson; +Cc: Jerin Jacob Kollanukkaran, thomas, dev

>
>On Thu, Jun 30, 2022 at 05:55:21PM +0100, Bruce Richardson wrote:
>> On Thu, Jun 30, 2022 at 09:55:16PM +0530, Shijith Thotton wrote:
>> > If all devices are configured to run in IOVA mode as VA, physical
>> > address field of mbuf (buf_iova) won't be used. In such cases, buf_iova
>> > space is free to use as a dynamic field. So a new dynamic field member
>> > (dynfield2) is added in mbuf structure to make use of that space.
>> >
>> > A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify the
>> > mbuf that can use dynfield2.
>> >
>> > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> > ---
>> I disagree with this patch. The mbuf should always record the iova of the
>> buffer directly, rather than forcing the drivers to query the EAL mode.
>> This will likely also break all vector drivers right now, as they are
>> sensitive to the mbuf layout and the position of the IOVA address in the
>> buffer.
>
 
Hi Bruce,

The IOVA check should have been bus specific, instead of eal.  The bus IOVA mode
will be VA, only if all devices on the bus has the flag
RTE_PCI_DRV_NEED_IOVA_AS_VA. It was our thought process, but used wrong API for
the check. It should have avoided the issue which you mentioned above.

>I have the same opinion than Stephen and Bruce. This field is widely used
>in DPDK, I don't think it is a good idea to disable it if some conditions
>are met.

Hi Olivier, 

I was under the assumption, buf_iova won't be used directly by the application
(only through wrapper). So that wrappers can check ol_flags before setting
buf_iova.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-06-30 16:45 ` Stephen Hemminger
@ 2022-07-01 12:16   ` Shijith Thotton
  2022-07-01 12:24   ` Shijith Thotton
  1 sibling, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-07-01 12:16 UTC (permalink / raw)
  To: stephen; +Cc: dev, jerinj, olivier.matz, sthotton, thomas



^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-06-30 16:45 ` Stephen Hemminger
  2022-07-01 12:16   ` Shijith Thotton
@ 2022-07-01 12:24   ` Shijith Thotton
  2022-07-03  7:31     ` Morten Brørup
  2022-08-30 13:07     ` [PATCH] mbuf: add mbuf physical address field to dynamic field Ferruh Yigit
  1 sibling, 2 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-07-01 12:24 UTC (permalink / raw)
  To: stephen; +Cc: dev, jerinj, olivier.matz, sthotton, thomas

>> If all devices are configured to run in IOVA mode as VA, physical
>> address field of mbuf (buf_iova) won't be used. In such cases, buf_iova
>> space is free to use as a dynamic field. So a new dynamic field member
>> (dynfield2) is added in mbuf structure to make use of that space.
>>
>> A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify the
>> mbuf that can use dynfield2.
>>
>> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>
> This seems like a complex and potentially error prone way to do this.
> What is the use case?
>

PCI drivers with the flag RTE_PCI_DRV_NEED_IOVA_AS_VA only works in IOVA mode as
VA. buf_iova field of mbuf is not used by those PMDs and can be used as a
dynamic area to save space.

> How much of a performance gain?

No change in performance.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-07-01 12:24   ` Shijith Thotton
@ 2022-07-03  7:31     ` Morten Brørup
  2022-07-04 14:00       ` Bruce Richardson
  2022-08-30 13:07     ` [PATCH] mbuf: add mbuf physical address field to dynamic field Ferruh Yigit
  1 sibling, 1 reply; 88+ messages in thread
From: Morten Brørup @ 2022-07-03  7:31 UTC (permalink / raw)
  To: Shijith Thotton, stephen, Bruce Richardson, olivier.matz
  Cc: dev, jerinj, thomas, Honnappa Nagarahalli

> From: Shijith Thotton [mailto:sthotton@marvell.com]
> Sent: Friday, 1 July 2022 14.25
> 
> >> If all devices are configured to run in IOVA mode as VA, physical
> >> address field of mbuf (buf_iova) won't be used.

Will some of the hardware vendors please comment on this: Has IOVA VA mode become common over time, or is it still an exotic bleeding edge feature?

If it has become common, we should let DPDK evolve accordingly, and consider PA (non-VA) mode legacy, treating it as such. Don't get stuck in the past.

> >> In such cases,
> buf_iova
> >> space is free to use as a dynamic field. So a new dynamic field
> member
> >> (dynfield2) is added in mbuf structure to make use of that space.
> >>
> >> A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify
> the
> >> mbuf that can use dynfield2.
> >>
> >> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> >
> > This seems like a complex and potentially error prone way to do this.

Perhaps this optimization should be a compile time option instead?

> > What is the use case?
> >
> 
> PCI drivers with the flag RTE_PCI_DRV_NEED_IOVA_AS_VA only works in
> IOVA mode as
> VA. buf_iova field of mbuf is not used by those PMDs and can be used as
> a
> dynamic area to save space.
> 
> > How much of a performance gain?
> 
> No change in performance.

Freeing up 8 bytes in the first mbuf cache line is a major improvement!

This could provide a significant performance gain for some applications, by moving private/dynamic mbuf fields from the second to the first cache line, thus avoiding to write to the second cache line in the application's first pipeline stage.



^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-07-03  7:31     ` Morten Brørup
@ 2022-07-04 14:00       ` Bruce Richardson
  2022-08-03 15:34         ` [EXT] " Shijith Thotton
  0 siblings, 1 reply; 88+ messages in thread
From: Bruce Richardson @ 2022-07-04 14:00 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Shijith Thotton, stephen, olivier.matz, dev, jerinj, thomas,
	Honnappa Nagarahalli

On Sun, Jul 03, 2022 at 09:31:01AM +0200, Morten Brørup wrote:
> > From: Shijith Thotton [mailto:sthotton@marvell.com]
> > Sent: Friday, 1 July 2022 14.25
> > 
> > >> If all devices are configured to run in IOVA mode as VA, physical
> > >> address field of mbuf (buf_iova) won't be used.
> 
> Will some of the hardware vendors please comment on this: Has IOVA VA mode become common over time, or is it still an exotic bleeding edge feature?
> 
> If it has become common, we should let DPDK evolve accordingly, and consider PA (non-VA) mode legacy, treating it as such. Don't get stuck in the past.
> 

IOVA as VA mode is indeed common and we are constantly encouraging users to
switch to using vfio to try and take advantage of this.

However, in my experience IOVA as PA is still very, very common too. We
cannot drop support for this mode just yet, unfortunately.

> > >> In such cases,
> > buf_iova
> > >> space is free to use as a dynamic field. So a new dynamic field
> > member
> > >> (dynfield2) is added in mbuf structure to make use of that space.
> > >>
> > >> A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify
> > the
> > >> mbuf that can use dynfield2.
> > >>
> > >> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> > >
> > > This seems like a complex and potentially error prone way to do this.
> 
> Perhaps this optimization should be a compile time option instead?
>

It could indeed be, and probably could be done very safely, in that we could
mark as disabled all drivers when the mode is enabled for a build.
Thereafter, drivers could be marked as VA-only safe as they are updated as
necessary, i.e. use the build system to enforce that only drivers known to
work with the mode are built when the mode is enabled.

That said, verifying all drivers to work with this mode is a decent effort.
Do we have indications of the perf benefit we would get from doing this for
some real-world app?

/Bruce

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-07-04 14:00       ` Bruce Richardson
@ 2022-08-03 15:34         ` Shijith Thotton
  2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
                             ` (4 more replies)
  0 siblings, 5 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-08-03 15:34 UTC (permalink / raw)
  To: Bruce Richardson, Morten Brørup
  Cc: stephen, olivier.matz, dev, Jerin Jacob Kollanukkaran, thomas,
	Honnappa Nagarahalli

Hi Bruce,

>> > >> If all devices are configured to run in IOVA mode as VA, physical
>> > >> address field of mbuf (buf_iova) won't be used.
>>
>> Will some of the hardware vendors please comment on this: Has IOVA VA mode
>become common over time, or is it still an exotic bleeding edge feature?
>>
>> If it has become common, we should let DPDK evolve accordingly, and consider
>PA (non-VA) mode legacy, treating it as such. Don't get stuck in the past.
>>
>
>IOVA as VA mode is indeed common and we are constantly encouraging users to
>switch to using vfio to try and take advantage of this.
>
>However, in my experience IOVA as PA is still very, very common too. We
>cannot drop support for this mode just yet, unfortunately.
>
>> > >> In such cases,
>> > buf_iova
>> > >> space is free to use as a dynamic field. So a new dynamic field
>> > member
>> > >> (dynfield2) is added in mbuf structure to make use of that space.
>> > >>
>> > >> A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify
>> > the
>> > >> mbuf that can use dynfield2.
>> > >>
>> > >> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> > >
>> > > This seems like a complex and potentially error prone way to do this.
>>
>> Perhaps this optimization should be a compile time option instead?
>>
>
>It could indeed be, and probably could be done very safely, in that we could
>mark as disabled all drivers when the mode is enabled for a build.
>Thereafter, drivers could be marked as VA-only safe as they are updated as
>necessary, i.e. use the build system to enforce that only drivers known to
>work with the mode are built when the mode is enabled.
>

I will prepare a patch to enable VA-only build.

>That said, verifying all drivers to work with this mode is a decent effort.
>Do we have indications of the perf benefit we would get from doing this for
>some real-world app?
>
 


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v1 0/4] mbuf dynamic field expansion
  2022-08-03 15:34         ` [EXT] " Shijith Thotton
@ 2022-08-29 15:16           ` Shijith Thotton
  2022-09-07 13:43             ` [PATCH v2 0/5] " Shijith Thotton
                               ` (5 more replies)
  2022-08-29 15:16           ` [PATCH v1 1/4] build: add meson option to configure IOVA mode " Shijith Thotton
                             ` (3 subsequent siblings)
  4 siblings, 6 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-08-29 15:16 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas

This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
deemed unsafe as some components could still use the PA field without checking IOVA mode and there
are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
that drivers which need PA can be disabled during build. This series adds this new meson build
options. Second patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without PA field.

1. https://inbox.dpdk.org/dev/57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605763.git.sthotton@marvell.com/.

Shijith Thotton (4):
  build: add meson option to configure IOVA mode as VA
  mbuf: add second dynamic field member for VA only build
  drivers: mark Marvell cnxk PMDs work with IOVA as VA
  drivers: mark software PMDs work with IOVA as VA

 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 +--
 app/test/test_bpf.c                      |  2 +-
 app/test/test_dmadev.c                   | 33 ++++++---------
 app/test/test_mbuf.c                     | 12 +++---
 app/test/test_pcapng.c                   |  2 +-
 config/arm/meson.build                   |  8 +++-
 config/meson.build                       |  3 ++
 drivers/common/cnxk/meson.build          |  1 +
 drivers/crypto/armv8/meson.build         |  1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
 drivers/crypto/cnxk/meson.build          |  2 +
 drivers/crypto/ipsec_mb/meson.build      |  1 +
 drivers/crypto/null/meson.build          |  1 +
 drivers/crypto/openssl/meson.build       |  1 +
 drivers/dma/cnxk/meson.build             |  1 +
 drivers/dma/skeleton/meson.build         |  1 +
 drivers/event/cnxk/meson.build           |  1 +
 drivers/event/dsw/meson.build            |  1 +
 drivers/event/opdl/meson.build           |  1 +
 drivers/event/skeleton/meson.build       |  1 +
 drivers/event/sw/meson.build             |  1 +
 drivers/mempool/bucket/meson.build       |  1 +
 drivers/mempool/cnxk/meson.build         |  1 +
 drivers/mempool/ring/meson.build         |  1 +
 drivers/mempool/stack/meson.build        |  1 +
 drivers/meson.build                      |  6 +++
 drivers/net/af_packet/meson.build        |  1 +
 drivers/net/af_xdp/meson.build           |  2 +
 drivers/net/bonding/meson.build          |  1 +
 drivers/net/cnxk/cnxk_ethdev.h           |  1 -
 drivers/net/cnxk/meson.build             |  1 +
 drivers/net/failsafe/meson.build         |  1 +
 drivers/net/memif/meson.build            |  1 +
 drivers/net/null/meson.build             |  1 +
 drivers/net/pcap/meson.build             |  1 +
 drivers/net/ring/meson.build             |  1 +
 drivers/net/tap/meson.build              |  1 +
 drivers/raw/cnxk_bphy/meson.build        |  1 +
 drivers/raw/cnxk_gpio/meson.build        |  1 +
 drivers/raw/skeleton/meson.build         |  1 +
 lib/eal/include/rte_common.h             | 17 ++++++++
 lib/eal/linux/eal.c                      |  7 +++
 lib/mbuf/rte_mbuf.c                      |  8 ++--
 lib/mbuf/rte_mbuf.h                      | 18 +++++---
 lib/mbuf/rte_mbuf_core.h                 | 32 +++++++++++---
 lib/mbuf/rte_mbuf_dyn.c                  |  2 +
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 54 ++++++++++++++++++------
 meson_options.txt                        |  2 +
 51 files changed, 187 insertions(+), 67 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v1 1/4] build: add meson option to configure IOVA mode as VA
  2022-08-03 15:34         ` [EXT] " Shijith Thotton
  2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
@ 2022-08-29 15:16           ` Shijith Thotton
  2022-08-29 18:18             ` Morten Brørup
  2022-08-29 15:16           ` [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build Shijith Thotton
                             ` (2 subsequent siblings)
  4 siblings, 1 reply; 88+ messages in thread
From: Shijith Thotton @ 2022-08-29 15:16 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	Nicolas Chautru, Ciara Power, Konstantin Ananyev, Chengwen Feng,
	Kevin Laatz, Reshma Pattan, Maxime Coquelin, Chenbo Xia

IOVA mode in DPDK is either PA or VA. The new build option iova_as_va
configures the mode to VA at compile time and prevents setting it to PA
at runtime. For now, all drivers which are not always enabled are
disabled with this option. Supported driver can set the flag
pmd_iova_as_va in its build file to enable build.

mbuf structure holds the physical (PA) and virtual address (VA) of a
buffer. if IOVA mode is set to VA, PA is redundant as it is the same as
VA. So PA field need not be updated and marked invalid if the build is
configured to use only VA.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 +--
 app/test/test_bpf.c                      |  2 +-
 app/test/test_dmadev.c                   | 33 ++++++---------
 app/test/test_mbuf.c                     | 12 +++---
 app/test/test_pcapng.c                   |  2 +-
 config/meson.build                       |  3 ++
 drivers/meson.build                      |  6 +++
 lib/eal/include/rte_common.h             | 17 ++++++++
 lib/eal/linux/eal.c                      |  7 +++
 lib/mbuf/rte_mbuf.c                      |  8 ++--
 lib/mbuf/rte_mbuf.h                      | 18 +++++---
 lib/mbuf/rte_mbuf_core.h                 | 10 +++++
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 54 ++++++++++++++++++------
 meson_options.txt                        |  2 +
 16 files changed, 129 insertions(+), 54 deletions(-)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 8fab52d821..f6aa25b67d 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -1001,7 +1001,7 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
 					seg->length);
 				memcpy(data, seg->addr, seg->length);
 				m_head->buf_addr = data;
-				m_head->buf_iova = rte_malloc_virt2iova(data);
+				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
 				m_head->data_off = 0;
 				m_head->data_len = seg->length;
 			} else {
diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-perf/cperf_test_common.c
index 00aadc9a47..27646cd619 100644
--- a/app/test-crypto-perf/cperf_test_common.c
+++ b/app/test-crypto-perf/cperf_test_common.c
@@ -26,8 +26,7 @@ fill_single_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = 0;
 	m->buf_addr = (char *)m + mbuf_hdr_size;
-	m->buf_iova = rte_mempool_virt2iova(obj) +
-		mbuf_offset + mbuf_hdr_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(obj) + mbuf_offset + mbuf_hdr_size);
 	m->buf_len = segment_sz;
 	m->data_len = data_len;
 	m->pkt_len = data_len;
@@ -58,7 +57,7 @@ fill_multi_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 		/* start of buffer is after mbuf structure and priv data */
 		m->priv_size = 0;
 		m->buf_addr = (char *)m + mbuf_hdr_size;
-		m->buf_iova = next_seg_phys_addr;
+		rte_mbuf_iova_set(m, next_seg_phys_addr);
 		next_seg_phys_addr += mbuf_hdr_size + segment_sz;
 		m->buf_len = segment_sz;
 		m->data_len = data_len;
diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 97f500809e..f5af5e8a3f 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -2600,7 +2600,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 9e8e101f40..8306947eda 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -110,8 +110,8 @@ do_multi_copies(int16_t dev_id, uint16_t vchan,
 		for (j = 0; j < COPY_LEN/sizeof(uint64_t); j++)
 			src_data[j] = rte_rand();
 
-		if (rte_dma_copy(dev_id, vchan, srcs[i]->buf_iova + srcs[i]->data_off,
-				dsts[i]->buf_iova + dsts[i]->data_off, COPY_LEN, 0) != id_count++)
+		if (rte_dma_copy(dev_id, vchan, rte_pktmbuf_iova_offset(srcs[i], 0),
+				 rte_pktmbuf_iova_offset(dsts[i], 0), COPY_LEN, 0) != id_count++)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 	}
 	rte_dma_submit(dev_id, vchan);
@@ -317,9 +317,8 @@ test_failure_in_full_burst(int16_t dev_id, uint16_t vchan, bool fence,
 	rte_dma_stats_get(dev_id, vchan, &baseline); /* get a baseline set of stats */
 	for (i = 0; i < COMP_BURST_SZ; i++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(i == fail_idx ? 0 : (srcs[i]->buf_iova + srcs[i]->data_off)),
-				dsts[i]->buf_iova + dsts[i]->data_off,
-				COPY_LEN, OPT_FENCE(i));
+				      (i == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[i], 0)),
+				      rte_pktmbuf_iova_offset(dsts[i], 0), COPY_LEN, OPT_FENCE(i));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 		if (i == fail_idx)
@@ -407,9 +406,8 @@ test_individual_status_query_with_failure(int16_t dev_id, uint16_t vchan, bool f
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, OPT_FENCE(j));
+				      (j == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[j], 0)),
+				      rte_pktmbuf_iova_offset(dsts[j], 0), COPY_LEN, OPT_FENCE(j));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -470,9 +468,8 @@ test_single_item_status_query_with_failure(int16_t dev_id, uint16_t vchan,
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+				      (j == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[j], 0)),
+				      rte_pktmbuf_iova_offset(dsts[j], 0), COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -529,15 +526,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in one go */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_pktmbuf_iova_offset(dsts[j], 0),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
@@ -565,15 +561,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in bursts, but getting errors one at a time */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_pktmbuf_iova_offset(dsts[j], 0),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index e09b2549ca..992b8c64ab 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -1232,11 +1232,13 @@ test_failing_mbuf_sanity_check(struct rte_mempool *pktmbuf_pool)
 		return -1;
 	}
 
-	badbuf = *buf;
-	badbuf.buf_iova = 0;
-	if (verify_mbuf_check_panics(&badbuf)) {
-		printf("Error with bad-physaddr mbuf test\n");
-		return -1;
+	if (!rte_is_iova_as_va_build()) {
+		badbuf = *buf;
+		rte_mbuf_iova_set(&badbuf, 0);
+		if (verify_mbuf_check_panics(&badbuf)) {
+			printf("Error with bad-physaddr mbuf test\n");
+			return -1;
+		}
 	}
 
 	badbuf = *buf;
diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index 320dacea34..abbf00f6da 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -40,7 +40,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/config/meson.build b/config/meson.build
index 7f7b6c92fd..1ff1cd774b 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -309,6 +309,9 @@ endif
 if get_option('mbuf_refcnt_atomic')
     dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
 endif
+if get_option('iova_as_va')
+    dpdk_conf.set('RTE_IOVA_AS_VA', true)
+endif
 
 compile_time_cpuflags = []
 subdir(arch_subdir)
diff --git a/drivers/meson.build b/drivers/meson.build
index b22c2adda7..469e60f1fa 100644
--- a/drivers/meson.build
+++ b/drivers/meson.build
@@ -103,6 +103,7 @@ foreach subpath:subdirs
         ext_deps = []
         pkgconfig_extra_libs = []
         testpmd_sources = []
+        pmd_iova_as_va = false
 
         if not enable_drivers.contains(drv_path)
             build = false
@@ -120,6 +121,11 @@ foreach subpath:subdirs
             # pull in driver directory which should update all the local variables
             subdir(drv_path)
 
+            if dpdk_conf.has('RTE_IOVA_AS_VA') and not pmd_iova_as_va and not always_enable.contains(drv_path)
+                build = false
+                reason = 'driver does not support IOVA as VA mode'
+            endif
+
             # get dependency objs from strings
             shared_deps = ext_deps
             static_deps = ext_deps
diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
index a96cc2a138..0010ad7c7d 100644
--- a/lib/eal/include/rte_common.h
+++ b/lib/eal/include/rte_common.h
@@ -921,6 +921,23 @@ __rte_noreturn void
 rte_exit(int exit_code, const char *format, ...)
 	__rte_format_printf(2, 3);
 
+/**
+ * Check if build is configured to use IOVA as VA.
+ *
+ * @return
+ *   1 if true, 0 otherwise
+ *
+ */
+static inline int
+rte_is_iova_as_va_build(void)
+{
+#ifdef RTE_IOVA_AS_VA
+	return 1;
+#else
+	return 0;
+#endif
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 37d29643a5..a5bcbd7aa9 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1127,6 +1127,13 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	if (rte_eal_iova_mode() == RTE_IOVA_PA && rte_is_iova_as_va_build()) {
+		rte_eal_init_alert(
+			"Cannot use IOVA as 'PA' since build is configured to use only 'VA'");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
 	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
 		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
 
diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
index a2307cebe6..7343307c57 100644
--- a/lib/mbuf/rte_mbuf.c
+++ b/lib/mbuf/rte_mbuf.c
@@ -89,7 +89,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 
 	/* keep some headroom between start of buffer and data */
@@ -187,8 +187,8 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
 	RTE_ASSERT(ctx->off + ext_mem->elt_size <= ext_mem->buf_len);
 
 	m->buf_addr = RTE_PTR_ADD(ext_mem->buf_ptr, ctx->off);
-	m->buf_iova = ext_mem->buf_iova == RTE_BAD_IOVA ?
-		      RTE_BAD_IOVA : (ext_mem->buf_iova + ctx->off);
+	rte_mbuf_iova_set(m, ext_mem->buf_iova == RTE_BAD_IOVA ? RTE_BAD_IOVA :
+								 (ext_mem->buf_iova + ctx->off));
 
 	ctx->off += ext_mem->elt_size;
 	if (ctx->off + ext_mem->elt_size > ext_mem->buf_len) {
@@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header,
 		*reason = "bad mbuf pool";
 		return -1;
 	}
-	if (m->buf_iova == 0) {
+	if (m->buf_iova == 0 && !rte_is_iova_as_va_build()) {
 		*reason = "bad IO addr";
 		return -1;
 	}
diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
index 9811e8c760..5c86579220 100644
--- a/lib/mbuf/rte_mbuf.h
+++ b/lib/mbuf/rte_mbuf.h
@@ -146,7 +146,7 @@ static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 static inline rte_iova_t
 rte_mbuf_data_iova(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + mb->data_off;
+	return (rte_is_iova_as_va_build() ? (uint64_t)mb->buf_addr : mb->buf_iova) + mb->data_off;
 }
 
 /**
@@ -164,7 +164,8 @@ rte_mbuf_data_iova(const struct rte_mbuf *mb)
 static inline rte_iova_t
 rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
+	return (rte_is_iova_as_va_build() ? (uint64_t)mb->buf_addr : mb->buf_iova) +
+	       RTE_PKTMBUF_HEADROOM;
 }
 
 /**
@@ -469,6 +470,13 @@ rte_mbuf_ext_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo,
 				 __ATOMIC_ACQ_REL);
 }
 
+static inline void
+rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
+{
+	if (!rte_is_iova_as_va_build())
+		m->buf_iova = iova;
+}
+
 /** Mbuf prefetch */
 #define RTE_MBUF_PREFETCH_TO_FREE(m) do {       \
 	if ((m) != NULL)                        \
@@ -1056,7 +1064,7 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
 	RTE_ASSERT(shinfo->free_cb != NULL);
 
 	m->buf_addr = buf_addr;
-	m->buf_iova = buf_iova;
+	rte_mbuf_iova_set(m, buf_iova);
 	m->buf_len = buf_len;
 
 	m->data_len = 0;
@@ -1143,7 +1151,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 
 	mi->data_off = m->data_off;
 	mi->data_len = m->data_len;
-	mi->buf_iova = m->buf_iova;
+	rte_mbuf_iova_set(mi, m->buf_iova);
 	mi->buf_addr = m->buf_addr;
 	mi->buf_len = m->buf_len;
 
@@ -1245,7 +1253,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 3d6ddd6773..81cb07c2e4 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -581,6 +581,8 @@ struct rte_mbuf {
 	void *buf_addr;           /**< Virtual address of segment buffer. */
 	/**
 	 * Physical address of segment buffer.
+	 * This field is invalid if the build is configured to use only
+	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is defined).
 	 * Force alignment to 8-bytes, so as to ensure we have the exact
 	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
 	 * working on vector drivers easier.
@@ -848,8 +850,12 @@ struct rte_mbuf_ext_shared_info {
  * @param o
  *   The offset into the data to calculate address from.
  */
+#ifdef RTE_IOVA_AS_VA
+#define rte_pktmbuf_iova_offset(m, o) rte_pktmbuf_mtod_offset(m, rte_iova_t, o)
+#else
 #define rte_pktmbuf_iova_offset(m, o) \
 	(rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
+#endif
 
 /**
  * A macro that returns the IO address that points to the start of the
@@ -858,7 +864,11 @@ struct rte_mbuf_ext_shared_info {
  * @param m
  *   The packet mbuf.
  */
+#ifdef RTE_IOVA_AS_VA
+#define rte_pktmbuf_iova(m) rte_pktmbuf_mtod(m, rte_iova_t)
+#else
 #define rte_pktmbuf_iova(m) rte_pktmbuf_iova_offset(m, 0)
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 40fac3b7c6..e8f7c76c0c 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -962,7 +962,7 @@ restore_mbuf(struct rte_mbuf *m)
 		/* start of buffer is after mbuf structure and priv data */
 
 		m->buf_addr = (char *)m + mbuf_size;
-		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 		m = m->next;
 	}
 }
diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
index 54946f46d9..56354152a6 100644
--- a/lib/vhost/vhost_crypto.c
+++ b/lib/vhost/vhost_crypto.c
@@ -823,11 +823,17 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_src->data_len = cipher->para.src_data_len;
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				cipher->para.src_data_len);
+		if (!rte_is_iova_as_va_build()) {
+			m_src->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.src_data_len);
+			if (unlikely(m_src->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 ||
-				m_src->buf_addr == NULL)) {
+		if (unlikely(m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -867,10 +873,17 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, cipher->para.dst_data_len);
+		if (!rte_is_iova_as_va_build()) {
+			m_dst->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.dst_data_len);
+			if (unlikely(m_dst->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -980,11 +993,17 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_src->data_len = chain->para.src_data_len;
 		m_dst->data_len = chain->para.dst_data_len;
-
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				chain->para.src_data_len);
+		if (!rte_is_iova_as_va_build()) {
+			m_src->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.src_data_len);
+			if (unlikely(m_src->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
+		if (unlikely(m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -1024,10 +1043,17 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, chain->para.dst_data_len);
+		if (!rte_is_iova_as_va_build()) {
+			m_dst->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.dst_data_len);
+			if (unlikely(m_dst->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
diff --git a/meson_options.txt b/meson_options.txt
index 7c220ad68d..f0fa6cf04c 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -44,6 +44,8 @@ option('platform', type: 'string', value: 'native', description:
        'Platform to build, either "native", "generic" or a SoC. Please refer to the Linux build guide for more information.')
 option('enable_trace_fp', type: 'boolean', value: false, description:
        'enable fast path trace points.')
+option('iova_as_va', type: 'boolean', value: false, description:
+       'Build which only supports IOVA as VA mode. Unsupported drivers are disabled.')
 option('tests', type: 'boolean', value: true, description:
        'build unit tests')
 option('use_hpet', type: 'boolean', value: false, description:
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build
  2022-08-03 15:34         ` [EXT] " Shijith Thotton
  2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
  2022-08-29 15:16           ` [PATCH v1 1/4] build: add meson option to configure IOVA mode " Shijith Thotton
@ 2022-08-29 15:16           ` Shijith Thotton
  2022-08-29 18:32             ` Morten Brørup
  2022-08-29 15:16           ` [PATCH v1 3/4] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
  2022-08-29 15:16           ` [PATCH v1 4/4] drivers: mark software " Shijith Thotton
  4 siblings, 1 reply; 88+ messages in thread
From: Shijith Thotton @ 2022-08-29 15:16 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas

mbuf physical address field is not used in builds which only uses VA. It
is used to expand the dynamic field area.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 lib/mbuf/rte_mbuf_core.h | 26 +++++++++++++++++---------
 lib/mbuf/rte_mbuf_dyn.c  |  2 ++
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 81cb07c2e4..98ce62fd6a 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -579,15 +579,23 @@ struct rte_mbuf {
 	RTE_MARKER cacheline0;
 
 	void *buf_addr;           /**< Virtual address of segment buffer. */
-	/**
-	 * Physical address of segment buffer.
-	 * This field is invalid if the build is configured to use only
-	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is defined).
-	 * Force alignment to 8-bytes, so as to ensure we have the exact
-	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
-	 * working on vector drivers easier.
-	 */
-	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+	RTE_STD_C11
+	union {
+		/**
+		 * Physical address of segment buffer.
+		 * This field is invalid if the build is configured to use only
+		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is defined).
+		 * Force alignment to 8-bytes, so as to ensure we have the exact
+		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
+		 * working on vector drivers easier.
+		 */
+		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+		/**
+		 * Reserved for dynamic field in builds where physical address
+		 * field is invalid.
+		 */
+		uint64_t dynfield2;
+	};
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
 	RTE_MARKER64 rearm_data;
diff --git a/lib/mbuf/rte_mbuf_dyn.c b/lib/mbuf/rte_mbuf_dyn.c
index 4ae79383b5..0813d5fb34 100644
--- a/lib/mbuf/rte_mbuf_dyn.c
+++ b/lib/mbuf/rte_mbuf_dyn.c
@@ -128,6 +128,8 @@ init_shared_mem(void)
 		 */
 		memset(shm, 0, sizeof(*shm));
 		mark_free(dynfield1);
+		if (rte_is_iova_as_va_build())
+			mark_free(dynfield2);
 
 		/* init free_flags */
 		for (mask = RTE_MBUF_F_FIRST_FREE; mask <= RTE_MBUF_F_LAST_FREE; mask <<= 1)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v1 3/4] drivers: mark Marvell cnxk PMDs work with IOVA as VA
  2022-08-03 15:34         ` [EXT] " Shijith Thotton
                             ` (2 preceding siblings ...)
  2022-08-29 15:16           ` [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build Shijith Thotton
@ 2022-08-29 15:16           ` Shijith Thotton
  2022-08-29 15:16           ` [PATCH v1 4/4] drivers: mark software " Shijith Thotton
  4 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-08-29 15:16 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	Jan Viktorin, Ruifeng Wang, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Ankur Dwivedi, Anoob Joseph,
	Tejasree Kondoj, Radha Mohan Chintakuntla, Veerasenareddy Burru,
	Ashwin Sekhar T K, Jakub Palider, Tomasz Duszynski

Enabled the flag pmd_iova_as_va in cnxk driver build files as they work
with IOVA as VA. Updated cn9k and cn10k soc build configurations to
enable the IOVA as VA build by default.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 config/arm/meson.build                   | 8 ++++++--
 drivers/common/cnxk/meson.build          | 1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h | 4 ++--
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  | 2 +-
 drivers/crypto/cnxk/meson.build          | 2 ++
 drivers/dma/cnxk/meson.build             | 1 +
 drivers/event/cnxk/meson.build           | 1 +
 drivers/mempool/cnxk/meson.build         | 1 +
 drivers/net/cnxk/cnxk_ethdev.h           | 1 -
 drivers/net/cnxk/meson.build             | 1 +
 drivers/raw/cnxk_bphy/meson.build        | 1 +
 drivers/raw/cnxk_gpio/meson.build        | 1 +
 12 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/config/arm/meson.build b/config/arm/meson.build
index 9f1636e0d5..a3a58185bf 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -294,7 +294,8 @@ soc_cn10k = {
     'flags': [
         ['RTE_MAX_LCORE', 24],
         ['RTE_MAX_NUMA_NODES', 1],
-        ['RTE_MEMPOOL_ALIGN', 128]
+        ['RTE_MEMPOOL_ALIGN', 128],
+        ['RTE_IOVA_AS_VA', true]
     ],
     'part_number': '0xd49',
     'extra_march_features': ['crypto'],
@@ -370,7 +371,10 @@ soc_cn9k = {
     'description': 'Marvell OCTEON 9',
     'implementer': '0x43',
     'part_number': '0xb2',
-    'numa': false
+    'numa': false,
+    'flags': [
+        ['RTE_IOVA_AS_VA', true]
+    ]
 }
 
 soc_stingray = {
diff --git a/drivers/common/cnxk/meson.build b/drivers/common/cnxk/meson.build
index 6f808271d1..d019cfa8d1 100644
--- a/drivers/common/cnxk/meson.build
+++ b/drivers/common/cnxk/meson.build
@@ -86,3 +86,4 @@ sources += files('cnxk_telemetry_bphy.c',
 )
 
 deps += ['bus_pci', 'net', 'telemetry']
+pmd_iova_as_va = true
diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
index 66cfe6ca98..16db14344d 100644
--- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
@@ -85,7 +85,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op *cop,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
@@ -102,7 +102,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn10k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
index e469596756..8b68e4c728 100644
--- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
@@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
+	inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->w7.u64 = sa->inst.w7;
 }
 #endif /* __CN9K_IPSEC_LA_OPS_H__ */
diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build
index 23a1cc3aac..764e7bb99a 100644
--- a/drivers/crypto/cnxk/meson.build
+++ b/drivers/crypto/cnxk/meson.build
@@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug')
 else
     cflags += [ '-ULA_IPSEC_DEBUG' ]
 endif
+
+pmd_iova_as_va = true
diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
index d4be4ee860..ef0e3db109 100644
--- a/drivers/dma/cnxk/meson.build
+++ b/drivers/dma/cnxk/meson.build
@@ -3,3 +3,4 @@
 
 deps += ['bus_pci', 'common_cnxk', 'dmadev']
 sources = files('cnxk_dmadev.c')
+pmd_iova_as_va = true
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index b27bae7b12..650d0d4256 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -479,3 +479,4 @@ foreach flag: extra_flags
 endforeach
 
 deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
+pmd_iova_as_va = true
diff --git a/drivers/mempool/cnxk/meson.build b/drivers/mempool/cnxk/meson.build
index d5d1978569..a328176457 100644
--- a/drivers/mempool/cnxk/meson.build
+++ b/drivers/mempool/cnxk/meson.build
@@ -17,3 +17,4 @@ sources = files(
 )
 
 deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool']
+pmd_iova_as_va = true
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4cb7c9e90c..abf1e4215f 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -690,7 +690,6 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index f347e98fce..01489b3a36 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -194,3 +194,4 @@ foreach flag: extra_flags
 endforeach
 
 headers = files('rte_pmd_cnxk.h')
+pmd_iova_as_va = true
diff --git a/drivers/raw/cnxk_bphy/meson.build b/drivers/raw/cnxk_bphy/meson.build
index 14147feaf4..781ed63e05 100644
--- a/drivers/raw/cnxk_bphy/meson.build
+++ b/drivers/raw/cnxk_bphy/meson.build
@@ -10,3 +10,4 @@ sources = files(
         'cnxk_bphy_irq.c',
 )
 headers = files('rte_pmd_bphy.h')
+pmd_iova_as_va = true
diff --git a/drivers/raw/cnxk_gpio/meson.build b/drivers/raw/cnxk_gpio/meson.build
index a75a5b9084..f9aed173b6 100644
--- a/drivers/raw/cnxk_gpio/meson.build
+++ b/drivers/raw/cnxk_gpio/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'cnxk_gpio_selftest.c',
 )
 headers = files('rte_pmd_cnxk_gpio.h')
+pmd_iova_as_va = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v1 4/4] drivers: mark software PMDs work with IOVA as VA
  2022-08-03 15:34         ` [EXT] " Shijith Thotton
                             ` (3 preceding siblings ...)
  2022-08-29 15:16           ` [PATCH v1 3/4] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
@ 2022-08-29 15:16           ` Shijith Thotton
  4 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-08-29 15:16 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	Ruifeng Wang, Fan Zhang, Pablo de Lara, Chengwen Feng,
	Kevin Laatz, Mattias Rönnblom, Liang Ma, Peter Mccarthy,
	Harry van Haaren, Artem V. Andreev, Andrew Rybchenko,
	John W. Linville, Ciara Loftus, Qi Zhang, Chas Williams,
	Min Hu (Connor),
	Gaetan Rivet, Jakub Grajciar, Tetsuya Mukawa, Sachin Saxena,
	Hemant Agrawal

Enabled software PMDs in IOVA as VA build as they work with IOVA as VA.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 drivers/crypto/armv8/meson.build    | 1 +
 drivers/crypto/ipsec_mb/meson.build | 1 +
 drivers/crypto/null/meson.build     | 1 +
 drivers/crypto/openssl/meson.build  | 1 +
 drivers/dma/skeleton/meson.build    | 1 +
 drivers/event/dsw/meson.build       | 1 +
 drivers/event/opdl/meson.build      | 1 +
 drivers/event/skeleton/meson.build  | 1 +
 drivers/event/sw/meson.build        | 1 +
 drivers/mempool/bucket/meson.build  | 1 +
 drivers/mempool/ring/meson.build    | 1 +
 drivers/mempool/stack/meson.build   | 1 +
 drivers/net/af_packet/meson.build   | 1 +
 drivers/net/af_xdp/meson.build      | 2 ++
 drivers/net/bonding/meson.build     | 1 +
 drivers/net/failsafe/meson.build    | 1 +
 drivers/net/memif/meson.build       | 1 +
 drivers/net/null/meson.build        | 1 +
 drivers/net/pcap/meson.build        | 1 +
 drivers/net/ring/meson.build        | 1 +
 drivers/net/tap/meson.build         | 1 +
 drivers/raw/skeleton/meson.build    | 1 +
 22 files changed, 23 insertions(+)

diff --git a/drivers/crypto/armv8/meson.build b/drivers/crypto/armv8/meson.build
index 5effba8bbc..a2c9d69e3f 100644
--- a/drivers/crypto/armv8/meson.build
+++ b/drivers/crypto/armv8/meson.build
@@ -17,3 +17,4 @@ endif
 ext_deps += dep
 deps += ['bus_vdev']
 sources = files('rte_armv8_pmd.c', 'rte_armv8_pmd_ops.c')
+pmd_iova_as_va = true
diff --git a/drivers/crypto/ipsec_mb/meson.build b/drivers/crypto/ipsec_mb/meson.build
index a89b29d6c3..785440b593 100644
--- a/drivers/crypto/ipsec_mb/meson.build
+++ b/drivers/crypto/ipsec_mb/meson.build
@@ -37,3 +37,4 @@ sources = files(
         'pmd_zuc.c',
 )
 deps += ['bus_vdev', 'net', 'security']
+pmd_iova_as_va = true
diff --git a/drivers/crypto/null/meson.build b/drivers/crypto/null/meson.build
index acc16e7d81..68dc030075 100644
--- a/drivers/crypto/null/meson.build
+++ b/drivers/crypto/null/meson.build
@@ -9,3 +9,4 @@ endif
 
 deps += 'bus_vdev'
 sources = files('null_crypto_pmd.c', 'null_crypto_pmd_ops.c')
+pmd_iova_as_va = true
diff --git a/drivers/crypto/openssl/meson.build b/drivers/crypto/openssl/meson.build
index cd962da1d6..25c44d0064 100644
--- a/drivers/crypto/openssl/meson.build
+++ b/drivers/crypto/openssl/meson.build
@@ -15,3 +15,4 @@ endif
 deps += 'bus_vdev'
 sources = files('rte_openssl_pmd.c', 'rte_openssl_pmd_ops.c')
 ext_deps += dep
+pmd_iova_as_va = true
diff --git a/drivers/dma/skeleton/meson.build b/drivers/dma/skeleton/meson.build
index 8871b80956..2b48d4e031 100644
--- a/drivers/dma/skeleton/meson.build
+++ b/drivers/dma/skeleton/meson.build
@@ -5,3 +5,4 @@ deps += ['dmadev', 'kvargs', 'ring', 'bus_vdev']
 sources = files(
         'skeleton_dmadev.c',
 )
+pmd_iova_as_va = true
diff --git a/drivers/event/dsw/meson.build b/drivers/event/dsw/meson.build
index 2df0fac4ff..477a6e5910 100644
--- a/drivers/event/dsw/meson.build
+++ b/drivers/event/dsw/meson.build
@@ -6,3 +6,4 @@ if cc.has_argument('-Wno-format-nonliteral')
     cflags += '-Wno-format-nonliteral'
 endif
 sources = files('dsw_evdev.c', 'dsw_event.c', 'dsw_xstats.c')
+pmd_iova_as_va = true
diff --git a/drivers/event/opdl/meson.build b/drivers/event/opdl/meson.build
index 786d2f4e82..e1a3de7ee3 100644
--- a/drivers/event/opdl/meson.build
+++ b/drivers/event/opdl/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'opdl_test.c',
 )
 deps += ['bus_vdev']
+pmd_iova_as_va = true
diff --git a/drivers/event/skeleton/meson.build b/drivers/event/skeleton/meson.build
index acfe156532..0ae514668c 100644
--- a/drivers/event/skeleton/meson.build
+++ b/drivers/event/skeleton/meson.build
@@ -3,3 +3,4 @@
 
 sources = files('skeleton_eventdev.c')
 deps += ['bus_pci', 'bus_vdev']
+pmd_iova_as_va = true
diff --git a/drivers/event/sw/meson.build b/drivers/event/sw/meson.build
index 6f81567efb..210cc1d048 100644
--- a/drivers/event/sw/meson.build
+++ b/drivers/event/sw/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'sw_evdev.c',
 )
 deps += ['hash', 'bus_vdev']
+pmd_iova_as_va = true
diff --git a/drivers/mempool/bucket/meson.build b/drivers/mempool/bucket/meson.build
index 0051b6ac3c..31ba101677 100644
--- a/drivers/mempool/bucket/meson.build
+++ b/drivers/mempool/bucket/meson.build
@@ -12,3 +12,4 @@ if is_windows
 endif
 
 sources = files('rte_mempool_bucket.c')
+pmd_iova_as_va = true
diff --git a/drivers/mempool/ring/meson.build b/drivers/mempool/ring/meson.build
index a021e908cf..f75f2125d7 100644
--- a/drivers/mempool/ring/meson.build
+++ b/drivers/mempool/ring/meson.build
@@ -2,3 +2,4 @@
 # Copyright(c) 2017 Intel Corporation
 
 sources = files('rte_mempool_ring.c')
+pmd_iova_as_va = true
diff --git a/drivers/mempool/stack/meson.build b/drivers/mempool/stack/meson.build
index 580dde79eb..3b94ed5b5e 100644
--- a/drivers/mempool/stack/meson.build
+++ b/drivers/mempool/stack/meson.build
@@ -4,3 +4,4 @@
 sources = files('rte_mempool_stack.c')
 
 deps += ['stack']
+pmd_iova_as_va = true
diff --git a/drivers/net/af_packet/meson.build b/drivers/net/af_packet/meson.build
index c014e9b61b..92fafea363 100644
--- a/drivers/net/af_packet/meson.build
+++ b/drivers/net/af_packet/meson.build
@@ -6,3 +6,4 @@ if not is_linux
     reason = 'only supported on Linux'
 endif
 sources = files('rte_eth_af_packet.c')
+pmd_iova_as_va = true
diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build
index 1e0de23705..35812511e2 100644
--- a/drivers/net/af_xdp/meson.build
+++ b/drivers/net/af_xdp/meson.build
@@ -55,3 +55,5 @@ else
     build = false
     reason = 'missing header, "linux/if_xdp.h"'
 endif
+
+pmd_iova_as_va = true
diff --git a/drivers/net/bonding/meson.build b/drivers/net/bonding/meson.build
index 18ad7e21f3..b61166888e 100644
--- a/drivers/net/bonding/meson.build
+++ b/drivers/net/bonding/meson.build
@@ -22,3 +22,4 @@ deps += 'sched' # needed for rte_bitmap.h
 deps += ['ip_frag']
 
 headers = files('rte_eth_bond.h', 'rte_eth_bond_8023ad.h')
+pmd_iova_as_va = true
diff --git a/drivers/net/failsafe/meson.build b/drivers/net/failsafe/meson.build
index b8e5bf70f8..a90be869d1 100644
--- a/drivers/net/failsafe/meson.build
+++ b/drivers/net/failsafe/meson.build
@@ -27,3 +27,4 @@ sources = files(
         'failsafe_ops.c',
         'failsafe_rxtx.c',
 )
+pmd_iova_as_va = true
diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
index 680bc8631c..59140dc3dd 100644
--- a/drivers/net/memif/meson.build
+++ b/drivers/net/memif/meson.build
@@ -12,3 +12,4 @@ sources = files(
 )
 
 deps += ['hash']
+pmd_iova_as_va = true
diff --git a/drivers/net/null/meson.build b/drivers/net/null/meson.build
index 0251578aab..6b7adbd760 100644
--- a/drivers/net/null/meson.build
+++ b/drivers/net/null/meson.build
@@ -8,3 +8,4 @@ if is_windows
 endif
 
 sources = files('rte_eth_null.c')
+pmd_iova_as_va = true
diff --git a/drivers/net/pcap/meson.build b/drivers/net/pcap/meson.build
index ed7864eb9d..73c65dd2a6 100644
--- a/drivers/net/pcap/meson.build
+++ b/drivers/net/pcap/meson.build
@@ -15,3 +15,4 @@ ext_deps += pcap_dep
 if is_windows
     ext_deps += cc.find_library('iphlpapi', required: true)
 endif
+pmd_iova_as_va = true
diff --git a/drivers/net/ring/meson.build b/drivers/net/ring/meson.build
index 0156b37aad..45fa3492cf 100644
--- a/drivers/net/ring/meson.build
+++ b/drivers/net/ring/meson.build
@@ -9,3 +9,4 @@ endif
 
 sources = files('rte_eth_ring.c')
 headers = files('rte_eth_ring.h')
+pmd_iova_as_va = true
diff --git a/drivers/net/tap/meson.build b/drivers/net/tap/meson.build
index c09713a67b..da23599830 100644
--- a/drivers/net/tap/meson.build
+++ b/drivers/net/tap/meson.build
@@ -35,3 +35,4 @@ foreach arg:args
     config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
 endforeach
 configure_file(output : 'tap_autoconf.h', configuration : config)
+pmd_iova_as_va = true
diff --git a/drivers/raw/skeleton/meson.build b/drivers/raw/skeleton/meson.build
index 950a33cc20..439ab8792d 100644
--- a/drivers/raw/skeleton/meson.build
+++ b/drivers/raw/skeleton/meson.build
@@ -6,3 +6,4 @@ sources = files(
         'skeleton_rawdev.c',
         'skeleton_rawdev_test.c',
 )
+pmd_iova_as_va = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [PATCH v1 1/4] build: add meson option to configure IOVA mode as VA
  2022-08-29 15:16           ` [PATCH v1 1/4] build: add meson option to configure IOVA mode " Shijith Thotton
@ 2022-08-29 18:18             ` Morten Brørup
  2022-08-30  8:32               ` Bruce Richardson
  0 siblings, 1 reply; 88+ messages in thread
From: Morten Brørup @ 2022-08-29 18:18 UTC (permalink / raw)
  To: Shijith Thotton, dev
  Cc: pbhagavatula, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	olivier.matz, stephen, thomas, Nicolas Chautru, Ciara Power,
	Konstantin Ananyev, Chengwen Feng, Kevin Laatz, Reshma Pattan,
	Maxime Coquelin, Chenbo Xia

> From: Shijith Thotton [mailto:sthotton@marvell.com]
> Sent: Monday, 29 August 2022 17.16
> 
> IOVA mode in DPDK is either PA or VA. The new build option iova_as_va
> configures the mode to VA at compile time and prevents setting it to PA
> at runtime. For now, all drivers which are not always enabled are
> disabled with this option. Supported driver can set the flag
> pmd_iova_as_va in its build file to enable build.
> 
> mbuf structure holds the physical (PA) and virtual address (VA) of a
> buffer. if IOVA mode is set to VA, PA is redundant as it is the same as
> VA. So PA field need not be updated and marked invalid if the build is
> configured to use only VA.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---

[...]

> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> index e09b2549ca..992b8c64ab 100644
> --- a/app/test/test_mbuf.c
> +++ b/app/test/test_mbuf.c
> @@ -1232,11 +1232,13 @@ test_failing_mbuf_sanity_check(struct
> rte_mempool *pktmbuf_pool)
>  		return -1;
>  	}
> 
> -	badbuf = *buf;
> -	badbuf.buf_iova = 0;
> -	if (verify_mbuf_check_panics(&badbuf)) {
> -		printf("Error with bad-physaddr mbuf test\n");
> -		return -1;
> +	if (!rte_is_iova_as_va_build()) {
> +		badbuf = *buf;
> +		rte_mbuf_iova_set(&badbuf, 0);
> +		if (verify_mbuf_check_panics(&badbuf)) {
> +			printf("Error with bad-physaddr mbuf test\n");
> +			return -1;
> +		}
>  	}
> 
>  	badbuf = *buf;
> diff --git a/config/meson.build b/config/meson.build
> index 7f7b6c92fd..1ff1cd774b 100644
> --- a/config/meson.build
> +++ b/config/meson.build
> @@ -309,6 +309,9 @@ endif
>  if get_option('mbuf_refcnt_atomic')
>      dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
>  endif
> +if get_option('iova_as_va')
> +    dpdk_conf.set('RTE_IOVA_AS_VA', true)
> +endif
> 
>  compile_time_cpuflags = []
>  subdir(arch_subdir)
> diff --git a/drivers/meson.build b/drivers/meson.build
> index b22c2adda7..469e60f1fa 100644
> --- a/drivers/meson.build
> +++ b/drivers/meson.build
> @@ -103,6 +103,7 @@ foreach subpath:subdirs
>          ext_deps = []
>          pkgconfig_extra_libs = []
>          testpmd_sources = []
> +        pmd_iova_as_va = false
> 
>          if not enable_drivers.contains(drv_path)
>              build = false
> @@ -120,6 +121,11 @@ foreach subpath:subdirs
>              # pull in driver directory which should update all the
> local variables
>              subdir(drv_path)
> 
> +            if dpdk_conf.has('RTE_IOVA_AS_VA') and not pmd_iova_as_va
> and not always_enable.contains(drv_path)
> +                build = false
> +                reason = 'driver does not support IOVA as VA mode'
> +            endif
> +
>              # get dependency objs from strings
>              shared_deps = ext_deps
>              static_deps = ext_deps
> diff --git a/lib/eal/include/rte_common.h
> b/lib/eal/include/rte_common.h
> index a96cc2a138..0010ad7c7d 100644
> --- a/lib/eal/include/rte_common.h
> +++ b/lib/eal/include/rte_common.h
> @@ -921,6 +921,23 @@ __rte_noreturn void
>  rte_exit(int exit_code, const char *format, ...)
>  	__rte_format_printf(2, 3);
> 
> +/**
> + * Check if build is configured to use IOVA as VA.
> + *
> + * @return
> + *   1 if true, 0 otherwise
> + *
> + */
> +static inline int
> +rte_is_iova_as_va_build(void)
> +{
> +#ifdef RTE_IOVA_AS_VA
> +	return 1;
> +#else
> +	return 0;
> +#endif
> +}

The rte_is_iova_as_va_build() function is effectively a shadow of the RTE_IOVA_AS_VA definition. Why the need to camouflage RTE_IOVA_AS_VA through a function, instead of just using RTE_IOVA_AS_VA everywhere?



^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build
  2022-08-29 15:16           ` [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build Shijith Thotton
@ 2022-08-29 18:32             ` Morten Brørup
  2022-08-30  8:35               ` Bruce Richardson
  0 siblings, 1 reply; 88+ messages in thread
From: Morten Brørup @ 2022-08-29 18:32 UTC (permalink / raw)
  To: Shijith Thotton, dev
  Cc: pbhagavatula, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	olivier.matz, stephen, thomas


> From: Shijith Thotton [mailto:sthotton@marvell.com]
> Sent: Monday, 29 August 2022 17.16
> 
> mbuf physical address field is not used in builds which only uses VA.
> It is used to expand the dynamic field area.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---
>  lib/mbuf/rte_mbuf_core.h | 26 +++++++++++++++++---------
>  lib/mbuf/rte_mbuf_dyn.c  |  2 ++
>  2 files changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index 81cb07c2e4..98ce62fd6a 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -579,15 +579,23 @@ struct rte_mbuf {
>  	RTE_MARKER cacheline0;
> 
>  	void *buf_addr;           /**< Virtual address of segment buffer.
> */
> -	/**
> -	 * Physical address of segment buffer.
> -	 * This field is invalid if the build is configured to use only
> -	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is defined).
> -	 * Force alignment to 8-bytes, so as to ensure we have the exact
> -	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
> -	 * working on vector drivers easier.
> -	 */
> -	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> +	RTE_STD_C11
> +	union {
> +		/**
> +		 * Physical address of segment buffer.
> +		 * This field is invalid if the build is configured to use
> only
> +		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is
> defined).
> +		 * Force alignment to 8-bytes, so as to ensure we have the
> exact
> +		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This
> makes
> +		 * working on vector drivers easier.
> +		 */
> +		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> +		/**
> +		 * Reserved for dynamic field in builds where physical
> address
> +		 * field is invalid.
> +		 */
> +		uint64_t dynfield2;
> +	};
> 
>  	/* next 8 bytes are initialised on RX descriptor rearm */
>  	RTE_MARKER64 rearm_data;

I know that the intention here is to keep the rte_mbuf structure intact, which will certainly improve the probability of getting this patch series into DPDK.

So, I will add a comment for the benefit of the other participants in the discussion:

With this patch, and in RTE_IOVA_AS_VA mode, it becomes possible to move m->next into the first cache line, so rte_pktmbuf_prefree_seg() does not have to touch the second cache line, thus potentially improving performance by eliminating one cache miss per freed packet segment. (I also recall someone mentioning that some PMDs set m->next on RX... If that is the case, a cache miss per packet might also be avoidable in those PMDs.)

Obviously, moving m->next to the first cache line is not related to this patch series, but would belong in a completely different patch.


^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v1 1/4] build: add meson option to configure IOVA mode as VA
  2022-08-29 18:18             ` Morten Brørup
@ 2022-08-30  8:32               ` Bruce Richardson
  0 siblings, 0 replies; 88+ messages in thread
From: Bruce Richardson @ 2022-08-30  8:32 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Shijith Thotton, dev, pbhagavatula, Honnappa.Nagarahalli, jerinj,
	olivier.matz, stephen, thomas, Nicolas Chautru, Ciara Power,
	Konstantin Ananyev, Chengwen Feng, Kevin Laatz, Reshma Pattan,
	Maxime Coquelin, Chenbo Xia

On Mon, Aug 29, 2022 at 08:18:56PM +0200, Morten Brørup wrote:
> > From: Shijith Thotton [mailto:sthotton@marvell.com]
> > Sent: Monday, 29 August 2022 17.16
> > 
> > IOVA mode in DPDK is either PA or VA. The new build option iova_as_va
> > configures the mode to VA at compile time and prevents setting it to PA
> > at runtime. For now, all drivers which are not always enabled are
> > disabled with this option. Supported driver can set the flag
> > pmd_iova_as_va in its build file to enable build.
> > 
> > mbuf structure holds the physical (PA) and virtual address (VA) of a
> > buffer. if IOVA mode is set to VA, PA is redundant as it is the same as
> > VA. So PA field need not be updated and marked invalid if the build is
> > configured to use only VA.
> > 
> > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> > ---
> 
> [...]
> 
> > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > index e09b2549ca..992b8c64ab 100644
> > --- a/app/test/test_mbuf.c
> > +++ b/app/test/test_mbuf.c
> > @@ -1232,11 +1232,13 @@ test_failing_mbuf_sanity_check(struct
> > rte_mempool *pktmbuf_pool)
> >  		return -1;
> >  	}
> > 
> > -	badbuf = *buf;
> > -	badbuf.buf_iova = 0;
> > -	if (verify_mbuf_check_panics(&badbuf)) {
> > -		printf("Error with bad-physaddr mbuf test\n");
> > -		return -1;
> > +	if (!rte_is_iova_as_va_build()) {
> > +		badbuf = *buf;
> > +		rte_mbuf_iova_set(&badbuf, 0);
> > +		if (verify_mbuf_check_panics(&badbuf)) {
> > +			printf("Error with bad-physaddr mbuf test\n");
> > +			return -1;
> > +		}
> >  	}
> > 
> >  	badbuf = *buf;
> > diff --git a/config/meson.build b/config/meson.build
> > index 7f7b6c92fd..1ff1cd774b 100644
> > --- a/config/meson.build
> > +++ b/config/meson.build
> > @@ -309,6 +309,9 @@ endif
> >  if get_option('mbuf_refcnt_atomic')
> >      dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
> >  endif
> > +if get_option('iova_as_va')
> > +    dpdk_conf.set('RTE_IOVA_AS_VA', true)
> > +endif
> > 
> >  compile_time_cpuflags = []
> >  subdir(arch_subdir)
> > diff --git a/drivers/meson.build b/drivers/meson.build
> > index b22c2adda7..469e60f1fa 100644
> > --- a/drivers/meson.build
> > +++ b/drivers/meson.build
> > @@ -103,6 +103,7 @@ foreach subpath:subdirs
> >          ext_deps = []
> >          pkgconfig_extra_libs = []
> >          testpmd_sources = []
> > +        pmd_iova_as_va = false
> > 
> >          if not enable_drivers.contains(drv_path)
> >              build = false
> > @@ -120,6 +121,11 @@ foreach subpath:subdirs
> >              # pull in driver directory which should update all the
> > local variables
> >              subdir(drv_path)
> > 
> > +            if dpdk_conf.has('RTE_IOVA_AS_VA') and not pmd_iova_as_va
> > and not always_enable.contains(drv_path)
> > +                build = false
> > +                reason = 'driver does not support IOVA as VA mode'
> > +            endif
> > +
> >              # get dependency objs from strings
> >              shared_deps = ext_deps
> >              static_deps = ext_deps
> > diff --git a/lib/eal/include/rte_common.h
> > b/lib/eal/include/rte_common.h
> > index a96cc2a138..0010ad7c7d 100644
> > --- a/lib/eal/include/rte_common.h
> > +++ b/lib/eal/include/rte_common.h
> > @@ -921,6 +921,23 @@ __rte_noreturn void
> >  rte_exit(int exit_code, const char *format, ...)
> >  	__rte_format_printf(2, 3);
> > 
> > +/**
> > + * Check if build is configured to use IOVA as VA.
> > + *
> > + * @return
> > + *   1 if true, 0 otherwise
> > + *
> > + */
> > +static inline int
> > +rte_is_iova_as_va_build(void)
> > +{
> > +#ifdef RTE_IOVA_AS_VA
> > +	return 1;
> > +#else
> > +	return 0;
> > +#endif
> > +}
> 
> The rte_is_iova_as_va_build() function is effectively a shadow of the RTE_IOVA_AS_VA definition. Why the need to camouflage RTE_IOVA_AS_VA through a function, instead of just using RTE_IOVA_AS_VA everywhere?
> 
My reading is that it's not quite equivalent, and in the undef case it
can't be directly used in C code. You can't do "if (RTE_IOVA_AS_VA)", for
example. However, rather than adding a function, in meson you could also
add "dpdk_conf.set10(RTE_IOVA....)" to define a second macro that is 0 in
the undef case, and which therefore could be used in C conditionals.

/Bruce

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build
  2022-08-29 18:32             ` Morten Brørup
@ 2022-08-30  8:35               ` Bruce Richardson
  2022-08-30  8:41                 ` [EXT] " Pavan Nikhilesh Bhagavatula
  0 siblings, 1 reply; 88+ messages in thread
From: Bruce Richardson @ 2022-08-30  8:35 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Shijith Thotton, dev, pbhagavatula, Honnappa.Nagarahalli, jerinj,
	olivier.matz, stephen, thomas

On Mon, Aug 29, 2022 at 08:32:20PM +0200, Morten Brørup wrote:
> 
> > From: Shijith Thotton [mailto:sthotton@marvell.com]
> > Sent: Monday, 29 August 2022 17.16
> > 
> > mbuf physical address field is not used in builds which only uses VA.
> > It is used to expand the dynamic field area.
> > 
> > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> > ---
> >  lib/mbuf/rte_mbuf_core.h | 26 +++++++++++++++++---------
> >  lib/mbuf/rte_mbuf_dyn.c  |  2 ++
> >  2 files changed, 19 insertions(+), 9 deletions(-)
> > 
> > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> > index 81cb07c2e4..98ce62fd6a 100644
> > --- a/lib/mbuf/rte_mbuf_core.h
> > +++ b/lib/mbuf/rte_mbuf_core.h
> > @@ -579,15 +579,23 @@ struct rte_mbuf {
> >  	RTE_MARKER cacheline0;
> > 
> >  	void *buf_addr;           /**< Virtual address of segment buffer.
> > */
> > -	/**
> > -	 * Physical address of segment buffer.
> > -	 * This field is invalid if the build is configured to use only
> > -	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is defined).
> > -	 * Force alignment to 8-bytes, so as to ensure we have the exact
> > -	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
> > -	 * working on vector drivers easier.
> > -	 */
> > -	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> > +	RTE_STD_C11
> > +	union {
> > +		/**
> > +		 * Physical address of segment buffer.
> > +		 * This field is invalid if the build is configured to use
> > only
> > +		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is
> > defined).
> > +		 * Force alignment to 8-bytes, so as to ensure we have the
> > exact
> > +		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This
> > makes
> > +		 * working on vector drivers easier.
> > +		 */
> > +		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> > +		/**
> > +		 * Reserved for dynamic field in builds where physical
> > address
> > +		 * field is invalid.
> > +		 */
> > +		uint64_t dynfield2;
> > +	};
> > 
> >  	/* next 8 bytes are initialised on RX descriptor rearm */
> >  	RTE_MARKER64 rearm_data;
> 
> I know that the intention here is to keep the rte_mbuf structure intact, which will certainly improve the probability of getting this patch series into DPDK.
> 
> So, I will add a comment for the benefit of the other participants in the discussion:
> 
> With this patch, and in RTE_IOVA_AS_VA mode, it becomes possible to move m->next into the first cache line, so rte_pktmbuf_prefree_seg() does not have to touch the second cache line, thus potentially improving performance by eliminating one cache miss per freed packet segment. (I also recall someone mentioning that some PMDs set m->next on RX... If that is the case, a cache miss per packet might also be avoidable in those PMDs.)
> 
> Obviously, moving m->next to the first cache line is not related to this patch series, but would belong in a completely different patch.
>

+1 to that, with the exception that if it is decided to move the next
pointer rather than use this as dynamic space, I think it *should* be in
this patch series, rather than mucking about with mbuf twice. :-) 

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build
  2022-08-30  8:35               ` Bruce Richardson
@ 2022-08-30  8:41                 ` Pavan Nikhilesh Bhagavatula
  2022-08-30 13:22                   ` Honnappa Nagarahalli
  0 siblings, 1 reply; 88+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2022-08-30  8:41 UTC (permalink / raw)
  To: Bruce Richardson, Morten Brørup
  Cc: Shijith Thotton, dev, Honnappa.Nagarahalli,
	Jerin Jacob Kollanukkaran, olivier.matz, stephen, thomas



> -----Original Message-----
> From: Bruce Richardson <bruce.richardson@intel.com>
> Sent: Tuesday, August 30, 2022 2:06 PM
> To: Morten Brørup <mb@smartsharesystems.com>
> Cc: Shijith Thotton <sthotton@marvell.com>; dev@dpdk.org; Pavan
> Nikhilesh Bhagavatula <pbhagavatula@marvell.com>;
> Honnappa.Nagarahalli@arm.com; Jerin Jacob Kollanukkaran
> <jerinj@marvell.com>; olivier.matz@6wind.com;
> stephen@networkplumber.org; thomas@monjalon.net
> Subject: [EXT] Re: [PATCH v1 2/4] mbuf: add second dynamic field member
> for VA only build
> 
> External Email
> 
> ----------------------------------------------------------------------
> On Mon, Aug 29, 2022 at 08:32:20PM +0200, Morten Brørup wrote:
> >
> > > From: Shijith Thotton [mailto:sthotton@marvell.com]
> > > Sent: Monday, 29 August 2022 17.16
> > >
> > > mbuf physical address field is not used in builds which only uses VA.
> > > It is used to expand the dynamic field area.
> > >
> > > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> > > ---
> > >  lib/mbuf/rte_mbuf_core.h | 26 +++++++++++++++++---------
> > >  lib/mbuf/rte_mbuf_dyn.c  |  2 ++
> > >  2 files changed, 19 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> > > index 81cb07c2e4..98ce62fd6a 100644
> > > --- a/lib/mbuf/rte_mbuf_core.h
> > > +++ b/lib/mbuf/rte_mbuf_core.h
> > > @@ -579,15 +579,23 @@ struct rte_mbuf {
> > >  	RTE_MARKER cacheline0;
> > >
> > >  	void *buf_addr;           /**< Virtual address of segment buffer.
> > > */
> > > -	/**
> > > -	 * Physical address of segment buffer.
> > > -	 * This field is invalid if the build is configured to use only
> > > -	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is defined).
> > > -	 * Force alignment to 8-bytes, so as to ensure we have the exact
> > > -	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
> > > -	 * working on vector drivers easier.
> > > -	 */
> > > -	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> > > +	RTE_STD_C11
> > > +	union {
> > > +		/**
> > > +		 * Physical address of segment buffer.
> > > +		 * This field is invalid if the build is configured to use
> > > only
> > > +		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is
> > > defined).
> > > +		 * Force alignment to 8-bytes, so as to ensure we have the
> > > exact
> > > +		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This
> > > makes
> > > +		 * working on vector drivers easier.
> > > +		 */
> > > +		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> > > +		/**
> > > +		 * Reserved for dynamic field in builds where physical
> > > address
> > > +		 * field is invalid.
> > > +		 */
> > > +		uint64_t dynfield2;
> > > +	};
> > >
> > >  	/* next 8 bytes are initialised on RX descriptor rearm */
> > >  	RTE_MARKER64 rearm_data;
> >
> > I know that the intention here is to keep the rte_mbuf structure intact,
> which will certainly improve the probability of getting this patch series into
> DPDK.
> >
> > So, I will add a comment for the benefit of the other participants in the
> discussion:
> >
> > With this patch, and in RTE_IOVA_AS_VA mode, it becomes possible to
> move m->next into the first cache line, so rte_pktmbuf_prefree_seg() does
> not have to touch the second cache line, thus potentially improving
> performance by eliminating one cache miss per freed packet segment. (I also
> recall someone mentioning that some PMDs set m->next on RX... If that is
> the case, a cache miss per packet might also be avoidable in those PMDs.)
> >
> > Obviously, moving m->next to the first cache line is not related to this patch
> series, but would belong in a completely different patch.
> >
> 
> +1 to that, with the exception that if it is decided to move the next
> pointer rather than use this as dynamic space, I think it *should* be in
> this patch series, rather than mucking about with mbuf twice. :-)

+1 When RTE_IOVA_AS_VA is set we can set mbuf->next as the dynamic field and move it to mbuf->buf_iova.
mbuf->next write is one of the prominent hotspot in arm platforms.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-07-01 12:24   ` Shijith Thotton
  2022-07-03  7:31     ` Morten Brørup
@ 2022-08-30 13:07     ` Ferruh Yigit
  2022-09-12 13:19       ` [EXT] " Shijith Thotton
  1 sibling, 1 reply; 88+ messages in thread
From: Ferruh Yigit @ 2022-08-30 13:07 UTC (permalink / raw)
  To: Shijith Thotton, stephen
  Cc: dev, jerinj, olivier.matz, thomas, Bruce Richardson, Morten Brørup

On 7/1/2022 1:24 PM, Shijith Thotton wrote:
>>> If all devices are configured to run in IOVA mode as VA, physical
>>> address field of mbuf (buf_iova) won't be used. In such cases, buf_iova
>>> space is free to use as a dynamic field. So a new dynamic field member
>>> (dynfield2) is added in mbuf structure to make use of that space.
>>>
>>> A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify the
>>> mbuf that can use dynfield2.
>>>
>>> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>>
>> This seems like a complex and potentially error prone way to do this.
>> What is the use case?
>>
> 
> PCI drivers with the flag RTE_PCI_DRV_NEED_IOVA_AS_VA only works in IOVA mode as
> VA. buf_iova field of mbuf is not used by those PMDs and can be used as a
> dynamic area to save space.
> 

'RTE_PCI_DRV_NEED_IOVA_AS_VA' means device can *only* work in 
RTE_IOVA_VA mode, right?

Although there are many devices that support RTE_IOVA_VA mode, only a 
few of them works *only* with RTE_IOVA_VA mode, rest can prefer to use 
RTE_IOVA_PA or RTE_IOVA_VA.
Also using KNI forces to use RTE_IOVA_PA mode.
And moving 'buf_iova' filed out of first cache will impact the 
performance for RTE_IOVA_PA mode.

Since KNI is going away and vfio is more preferred way, it can be OK to 
make 'buf_iova' dynamic filed in long term, but I think it is better to 
do this slowly, like should we wait for KNI to go away first?


>> How much of a performance gain?
> 
> No change in performance.


^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build
  2022-08-30  8:41                 ` [EXT] " Pavan Nikhilesh Bhagavatula
@ 2022-08-30 13:22                   ` Honnappa Nagarahalli
  2022-09-07 13:55                     ` Shijith Thotton
  0 siblings, 1 reply; 88+ messages in thread
From: Honnappa Nagarahalli @ 2022-08-30 13:22 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Bruce Richardson, Morten Brørup
  Cc: Shijith Thotton, dev, jerinj, olivier.matz, stephen, thomas, nd

<snip>
> >
> > ----------------------------------------------------------------------
> > On Mon, Aug 29, 2022 at 08:32:20PM +0200, Morten Brørup wrote:
> > >
> > > > From: Shijith Thotton [mailto:sthotton@marvell.com]
> > > > Sent: Monday, 29 August 2022 17.16
> > > >
> > > > mbuf physical address field is not used in builds which only uses VA.
> > > > It is used to expand the dynamic field area.
> > > >
> > > > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> > > > ---
> > > >  lib/mbuf/rte_mbuf_core.h | 26 +++++++++++++++++---------
> > > > lib/mbuf/rte_mbuf_dyn.c  |  2 ++
> > > >  2 files changed, 19 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> > > > index 81cb07c2e4..98ce62fd6a 100644
> > > > --- a/lib/mbuf/rte_mbuf_core.h
> > > > +++ b/lib/mbuf/rte_mbuf_core.h
> > > > @@ -579,15 +579,23 @@ struct rte_mbuf {
> > > >  	RTE_MARKER cacheline0;
> > > >
> > > >  	void *buf_addr;           /**< Virtual address of segment buffer.
> > > > */
> > > > -	/**
> > > > -	 * Physical address of segment buffer.
> > > > -	 * This field is invalid if the build is configured to use only
> > > > -	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is defined).
> > > > -	 * Force alignment to 8-bytes, so as to ensure we have the exact
> > > > -	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
> > > > -	 * working on vector drivers easier.
> > > > -	 */
> > > > -	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> > > > +	RTE_STD_C11
> > > > +	union {
> > > > +		/**
> > > > +		 * Physical address of segment buffer.
> > > > +		 * This field is invalid if the build is configured to use
> > > > only
> > > > +		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is
> > > > defined).
> > > > +		 * Force alignment to 8-bytes, so as to ensure we have the
> > > > exact
> > > > +		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This
> > > > makes
> > > > +		 * working on vector drivers easier.
> > > > +		 */
> > > > +		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> > > > +		/**
> > > > +		 * Reserved for dynamic field in builds where physical
> > > > address
> > > > +		 * field is invalid.
> > > > +		 */
> > > > +		uint64_t dynfield2;
> > > > +	};
> > > >
> > > >  	/* next 8 bytes are initialised on RX descriptor rearm */
> > > >  	RTE_MARKER64 rearm_data;
> > >
> > > I know that the intention here is to keep the rte_mbuf structure
> > > intact,
> > which will certainly improve the probability of getting this patch
> > series into DPDK.
> > >
> > > So, I will add a comment for the benefit of the other participants
> > > in the
> > discussion:
> > >
> > > With this patch, and in RTE_IOVA_AS_VA mode, it becomes possible to
> > move m->next into the first cache line, so rte_pktmbuf_prefree_seg()
> > does not have to touch the second cache line, thus potentially
> > improving performance by eliminating one cache miss per freed packet
> > segment. (I also recall someone mentioning that some PMDs set m->next
> > on RX... If that is the case, a cache miss per packet might also be
> > avoidable in those PMDs.)
> > >
> > > Obviously, moving m->next to the first cache line is not related to
> > > this patch
> > series, but would belong in a completely different patch.
> > >
> >
> > +1 to that, with the exception that if it is decided to move the next
> > pointer rather than use this as dynamic space, I think it *should* be
> > in this patch series, rather than mucking about with mbuf twice. :-)
> 
> +1 When RTE_IOVA_AS_VA is set we can set mbuf->next as the dynamic field
> and move it to mbuf->buf_iova.
> mbuf->next write is one of the prominent hotspot in arm platforms.
+1 for reducing the cachelines that need to be touched

^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v2 0/5] mbuf dynamic field expansion
  2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
@ 2022-09-07 13:43             ` Shijith Thotton
  2022-09-21  9:43               ` David Marchand
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
  2022-09-07 13:43             ` [PATCH v2 1/5] build: add meson option to configure IOVA mode as VA Shijith Thotton
                               ` (4 subsequent siblings)
  5 siblings, 2 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-07 13:43 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas

This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
deemed unsafe as some components could still use the PA field without checking IOVA mode and there
are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
that drivers which need PA can be disabled during build. This series adds this new meson build
options. Second patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without PA field.

1. https://inbox.dpdk.org/dev/57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605763.git.sthotton@marvell.com/.

v2:
 * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
 * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.

Shijith Thotton (5):
  build: add meson option to configure IOVA mode as VA
  mbuf: add second dynamic field member for VA only build
  lib: move mbuf next pointer to first cache line
  drivers: mark Marvell cnxk PMDs work with IOVA as VA
  drivers: mark software PMDs work with IOVA as VA

 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 +--
 app/test/test_bpf.c                      |  2 +-
 app/test/test_dmadev.c                   | 33 ++++++--------
 app/test/test_mbuf.c                     | 12 +++---
 app/test/test_pcapng.c                   |  2 +-
 config/arm/meson.build                   |  8 +++-
 config/meson.build                       |  1 +
 drivers/common/cnxk/meson.build          |  1 +
 drivers/crypto/armv8/meson.build         |  1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
 drivers/crypto/cnxk/meson.build          |  2 +
 drivers/crypto/ipsec_mb/meson.build      |  1 +
 drivers/crypto/null/meson.build          |  1 +
 drivers/crypto/openssl/meson.build       |  1 +
 drivers/dma/cnxk/meson.build             |  1 +
 drivers/dma/skeleton/meson.build         |  1 +
 drivers/event/cnxk/meson.build           |  1 +
 drivers/event/dsw/meson.build            |  1 +
 drivers/event/opdl/meson.build           |  1 +
 drivers/event/skeleton/meson.build       |  1 +
 drivers/event/sw/meson.build             |  1 +
 drivers/mempool/bucket/meson.build       |  1 +
 drivers/mempool/cnxk/meson.build         |  1 +
 drivers/mempool/ring/meson.build         |  1 +
 drivers/mempool/stack/meson.build        |  1 +
 drivers/meson.build                      |  6 +++
 drivers/net/af_packet/meson.build        |  1 +
 drivers/net/af_xdp/meson.build           |  2 +
 drivers/net/bonding/meson.build          |  1 +
 drivers/net/cnxk/cnxk_ethdev.h           |  1 -
 drivers/net/cnxk/meson.build             |  1 +
 drivers/net/failsafe/meson.build         |  1 +
 drivers/net/memif/meson.build            |  1 +
 drivers/net/null/meson.build             |  1 +
 drivers/net/pcap/meson.build             |  1 +
 drivers/net/ring/meson.build             |  1 +
 drivers/net/tap/meson.build              |  1 +
 drivers/raw/cnxk_bphy/meson.build        |  1 +
 drivers/raw/cnxk_gpio/meson.build        |  1 +
 drivers/raw/skeleton/meson.build         |  1 +
 lib/eal/linux/eal.c                      |  7 +++
 lib/mbuf/rte_mbuf.c                      |  8 ++--
 lib/mbuf/rte_mbuf.h                      | 17 +++++---
 lib/mbuf/rte_mbuf_core.h                 | 55 ++++++++++++++++++------
 lib/mbuf/rte_mbuf_dyn.c                  |  2 +
 lib/meson.build                          |  3 ++
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 54 +++++++++++++++++------
 meson_options.txt                        |  2 +
 51 files changed, 188 insertions(+), 72 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v2 1/5] build: add meson option to configure IOVA mode as VA
  2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
  2022-09-07 13:43             ` [PATCH v2 0/5] " Shijith Thotton
@ 2022-09-07 13:43             ` Shijith Thotton
  2022-09-07 15:31               ` Stephen Hemminger
  2022-09-07 13:43             ` [PATCH v2 2/5] mbuf: add second dynamic field member for VA only build Shijith Thotton
                               ` (3 subsequent siblings)
  5 siblings, 1 reply; 88+ messages in thread
From: Shijith Thotton @ 2022-09-07 13:43 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	Nicolas Chautru, Ciara Power, Konstantin Ananyev, Chengwen Feng,
	Kevin Laatz, Reshma Pattan, Maxime Coquelin, Chenbo Xia

IOVA mode in DPDK is either PA or VA. The new build option iova_as_va
configures the mode to VA at compile time and prevents setting it to PA
at runtime. For now, all drivers which are not always enabled are
disabled with this option. Supported driver can set the flag
pmd_iova_as_va in its build file to enable build.

mbuf structure holds the physical (PA) and virtual address (VA) of a
buffer. if IOVA mode is set to VA, PA is redundant as it is the same as
VA. So PA field need not be updated and marked invalid if the build is
configured to use only VA.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 +--
 app/test/test_bpf.c                      |  2 +-
 app/test/test_dmadev.c                   | 33 ++++++---------
 app/test/test_mbuf.c                     | 12 +++---
 app/test/test_pcapng.c                   |  2 +-
 config/meson.build                       |  1 +
 drivers/meson.build                      |  6 +++
 lib/eal/linux/eal.c                      |  7 +++
 lib/mbuf/rte_mbuf.c                      |  8 ++--
 lib/mbuf/rte_mbuf.h                      | 17 +++++---
 lib/mbuf/rte_mbuf_core.h                 | 10 +++++
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 54 ++++++++++++++++++------
 meson_options.txt                        |  2 +
 15 files changed, 109 insertions(+), 54 deletions(-)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 8fab52d821..f6aa25b67d 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -1001,7 +1001,7 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
 					seg->length);
 				memcpy(data, seg->addr, seg->length);
 				m_head->buf_addr = data;
-				m_head->buf_iova = rte_malloc_virt2iova(data);
+				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
 				m_head->data_off = 0;
 				m_head->data_len = seg->length;
 			} else {
diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-perf/cperf_test_common.c
index 00aadc9a47..27646cd619 100644
--- a/app/test-crypto-perf/cperf_test_common.c
+++ b/app/test-crypto-perf/cperf_test_common.c
@@ -26,8 +26,7 @@ fill_single_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = 0;
 	m->buf_addr = (char *)m + mbuf_hdr_size;
-	m->buf_iova = rte_mempool_virt2iova(obj) +
-		mbuf_offset + mbuf_hdr_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(obj) + mbuf_offset + mbuf_hdr_size);
 	m->buf_len = segment_sz;
 	m->data_len = data_len;
 	m->pkt_len = data_len;
@@ -58,7 +57,7 @@ fill_multi_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 		/* start of buffer is after mbuf structure and priv data */
 		m->priv_size = 0;
 		m->buf_addr = (char *)m + mbuf_hdr_size;
-		m->buf_iova = next_seg_phys_addr;
+		rte_mbuf_iova_set(m, next_seg_phys_addr);
 		next_seg_phys_addr += mbuf_hdr_size + segment_sz;
 		m->buf_len = segment_sz;
 		m->data_len = data_len;
diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 97f500809e..f5af5e8a3f 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -2600,7 +2600,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 9e8e101f40..8306947eda 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -110,8 +110,8 @@ do_multi_copies(int16_t dev_id, uint16_t vchan,
 		for (j = 0; j < COPY_LEN/sizeof(uint64_t); j++)
 			src_data[j] = rte_rand();
 
-		if (rte_dma_copy(dev_id, vchan, srcs[i]->buf_iova + srcs[i]->data_off,
-				dsts[i]->buf_iova + dsts[i]->data_off, COPY_LEN, 0) != id_count++)
+		if (rte_dma_copy(dev_id, vchan, rte_pktmbuf_iova_offset(srcs[i], 0),
+				 rte_pktmbuf_iova_offset(dsts[i], 0), COPY_LEN, 0) != id_count++)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 	}
 	rte_dma_submit(dev_id, vchan);
@@ -317,9 +317,8 @@ test_failure_in_full_burst(int16_t dev_id, uint16_t vchan, bool fence,
 	rte_dma_stats_get(dev_id, vchan, &baseline); /* get a baseline set of stats */
 	for (i = 0; i < COMP_BURST_SZ; i++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(i == fail_idx ? 0 : (srcs[i]->buf_iova + srcs[i]->data_off)),
-				dsts[i]->buf_iova + dsts[i]->data_off,
-				COPY_LEN, OPT_FENCE(i));
+				      (i == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[i], 0)),
+				      rte_pktmbuf_iova_offset(dsts[i], 0), COPY_LEN, OPT_FENCE(i));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 		if (i == fail_idx)
@@ -407,9 +406,8 @@ test_individual_status_query_with_failure(int16_t dev_id, uint16_t vchan, bool f
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, OPT_FENCE(j));
+				      (j == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[j], 0)),
+				      rte_pktmbuf_iova_offset(dsts[j], 0), COPY_LEN, OPT_FENCE(j));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -470,9 +468,8 @@ test_single_item_status_query_with_failure(int16_t dev_id, uint16_t vchan,
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+				      (j == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[j], 0)),
+				      rte_pktmbuf_iova_offset(dsts[j], 0), COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -529,15 +526,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in one go */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_pktmbuf_iova_offset(dsts[j], 0),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
@@ -565,15 +561,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in bursts, but getting errors one at a time */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_pktmbuf_iova_offset(dsts[j], 0),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index e09b2549ca..45431f2c9c 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -1232,11 +1232,13 @@ test_failing_mbuf_sanity_check(struct rte_mempool *pktmbuf_pool)
 		return -1;
 	}
 
-	badbuf = *buf;
-	badbuf.buf_iova = 0;
-	if (verify_mbuf_check_panics(&badbuf)) {
-		printf("Error with bad-physaddr mbuf test\n");
-		return -1;
+	if (!RTE_IOVA_AS_VA) {
+		badbuf = *buf;
+		rte_mbuf_iova_set(&badbuf, 0);
+		if (verify_mbuf_check_panics(&badbuf)) {
+			printf("Error with bad-physaddr mbuf test\n");
+			return -1;
+		}
 	}
 
 	badbuf = *buf;
diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index 320dacea34..abbf00f6da 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -40,7 +40,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/config/meson.build b/config/meson.build
index 7f7b6c92fd..6b6c3e7eb6 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -309,6 +309,7 @@ endif
 if get_option('mbuf_refcnt_atomic')
     dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
 endif
+dpdk_conf.set10('RTE_IOVA_AS_VA', get_option('iova_as_va'))
 
 compile_time_cpuflags = []
 subdir(arch_subdir)
diff --git a/drivers/meson.build b/drivers/meson.build
index 376a64f4da..989770cffd 100644
--- a/drivers/meson.build
+++ b/drivers/meson.build
@@ -105,6 +105,7 @@ foreach subpath:subdirs
         ext_deps = []
         pkgconfig_extra_libs = []
         testpmd_sources = []
+        pmd_iova_as_va = false
 
         if not enable_drivers.contains(drv_path)
             build = false
@@ -122,6 +123,11 @@ foreach subpath:subdirs
             # pull in driver directory which should update all the local variables
             subdir(drv_path)
 
+            if dpdk_conf.get('RTE_IOVA_AS_VA') == 1 and not pmd_iova_as_va and not always_enable.contains(drv_path)
+                build = false
+                reason = 'driver does not support IOVA as VA mode'
+            endif
+
             # get dependency objs from strings
             shared_deps = ext_deps
             static_deps = ext_deps
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 37d29643a5..b70c4dcc5f 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1127,6 +1127,13 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	if (rte_eal_iova_mode() == RTE_IOVA_PA && RTE_IOVA_AS_VA) {
+		rte_eal_init_alert(
+			"Cannot use IOVA as 'PA' since build is configured to use only 'VA'");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
 	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
 		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
 
diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
index a2307cebe6..5af290c53a 100644
--- a/lib/mbuf/rte_mbuf.c
+++ b/lib/mbuf/rte_mbuf.c
@@ -89,7 +89,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 
 	/* keep some headroom between start of buffer and data */
@@ -187,8 +187,8 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
 	RTE_ASSERT(ctx->off + ext_mem->elt_size <= ext_mem->buf_len);
 
 	m->buf_addr = RTE_PTR_ADD(ext_mem->buf_ptr, ctx->off);
-	m->buf_iova = ext_mem->buf_iova == RTE_BAD_IOVA ?
-		      RTE_BAD_IOVA : (ext_mem->buf_iova + ctx->off);
+	rte_mbuf_iova_set(m, ext_mem->buf_iova == RTE_BAD_IOVA ? RTE_BAD_IOVA :
+								 (ext_mem->buf_iova + ctx->off));
 
 	ctx->off += ext_mem->elt_size;
 	if (ctx->off + ext_mem->elt_size > ext_mem->buf_len) {
@@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header,
 		*reason = "bad mbuf pool";
 		return -1;
 	}
-	if (m->buf_iova == 0) {
+	if (m->buf_iova == 0 && !RTE_IOVA_AS_VA) {
 		*reason = "bad IO addr";
 		return -1;
 	}
diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
index 9811e8c760..05be146bc2 100644
--- a/lib/mbuf/rte_mbuf.h
+++ b/lib/mbuf/rte_mbuf.h
@@ -146,7 +146,7 @@ static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 static inline rte_iova_t
 rte_mbuf_data_iova(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + mb->data_off;
+	return (RTE_IOVA_AS_VA ? (uint64_t)mb->buf_addr : mb->buf_iova) + mb->data_off;
 }
 
 /**
@@ -164,7 +164,7 @@ rte_mbuf_data_iova(const struct rte_mbuf *mb)
 static inline rte_iova_t
 rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
+	return (RTE_IOVA_AS_VA ? (uint64_t)mb->buf_addr : mb->buf_iova) + RTE_PKTMBUF_HEADROOM;
 }
 
 /**
@@ -469,6 +469,13 @@ rte_mbuf_ext_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo,
 				 __ATOMIC_ACQ_REL);
 }
 
+static inline void
+rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
+{
+	if (!RTE_IOVA_AS_VA)
+		m->buf_iova = iova;
+}
+
 /** Mbuf prefetch */
 #define RTE_MBUF_PREFETCH_TO_FREE(m) do {       \
 	if ((m) != NULL)                        \
@@ -1056,7 +1063,7 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
 	RTE_ASSERT(shinfo->free_cb != NULL);
 
 	m->buf_addr = buf_addr;
-	m->buf_iova = buf_iova;
+	rte_mbuf_iova_set(m, buf_iova);
 	m->buf_len = buf_len;
 
 	m->data_len = 0;
@@ -1143,7 +1150,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 
 	mi->data_off = m->data_off;
 	mi->data_len = m->data_len;
-	mi->buf_iova = m->buf_iova;
+	rte_mbuf_iova_set(mi, m->buf_iova);
 	mi->buf_addr = m->buf_addr;
 	mi->buf_len = m->buf_len;
 
@@ -1245,7 +1252,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 3d6ddd6773..c6292e7252 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -581,6 +581,8 @@ struct rte_mbuf {
 	void *buf_addr;           /**< Virtual address of segment buffer. */
 	/**
 	 * Physical address of segment buffer.
+	 * This field is invalid if the build is configured to use only
+	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
 	 * Force alignment to 8-bytes, so as to ensure we have the exact
 	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
 	 * working on vector drivers easier.
@@ -848,8 +850,12 @@ struct rte_mbuf_ext_shared_info {
  * @param o
  *   The offset into the data to calculate address from.
  */
+#if RTE_IOVA_AS_VA
+#define rte_pktmbuf_iova_offset(m, o) rte_pktmbuf_mtod_offset(m, rte_iova_t, o)
+#else
 #define rte_pktmbuf_iova_offset(m, o) \
 	(rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
+#endif
 
 /**
  * A macro that returns the IO address that points to the start of the
@@ -858,7 +864,11 @@ struct rte_mbuf_ext_shared_info {
  * @param m
  *   The packet mbuf.
  */
+#if RTE_IOVA_AS_VA
+#define rte_pktmbuf_iova(m) rte_pktmbuf_mtod(m, rte_iova_t)
+#else
 #define rte_pktmbuf_iova(m) rte_pktmbuf_iova_offset(m, 0)
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 40fac3b7c6..e8f7c76c0c 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -962,7 +962,7 @@ restore_mbuf(struct rte_mbuf *m)
 		/* start of buffer is after mbuf structure and priv data */
 
 		m->buf_addr = (char *)m + mbuf_size;
-		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 		m = m->next;
 	}
 }
diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
index 54946f46d9..7b50735796 100644
--- a/lib/vhost/vhost_crypto.c
+++ b/lib/vhost/vhost_crypto.c
@@ -823,11 +823,17 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_src->data_len = cipher->para.src_data_len;
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				cipher->para.src_data_len);
+		if (!RTE_IOVA_AS_VA) {
+			m_src->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.src_data_len);
+			if (unlikely(m_src->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 ||
-				m_src->buf_addr == NULL)) {
+		if (unlikely(m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -867,10 +873,17 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, cipher->para.dst_data_len);
+		if (!RTE_IOVA_AS_VA) {
+			m_dst->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.dst_data_len);
+			if (unlikely(m_dst->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -980,11 +993,17 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_src->data_len = chain->para.src_data_len;
 		m_dst->data_len = chain->para.dst_data_len;
-
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				chain->para.src_data_len);
+		if (!RTE_IOVA_AS_VA) {
+			m_src->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.src_data_len);
+			if (unlikely(m_src->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
+		if (unlikely(m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -1024,10 +1043,17 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, chain->para.dst_data_len);
+		if (!RTE_IOVA_AS_VA) {
+			m_dst->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.dst_data_len);
+			if (unlikely(m_dst->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
diff --git a/meson_options.txt b/meson_options.txt
index 7c220ad68d..f0fa6cf04c 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -44,6 +44,8 @@ option('platform', type: 'string', value: 'native', description:
        'Platform to build, either "native", "generic" or a SoC. Please refer to the Linux build guide for more information.')
 option('enable_trace_fp', type: 'boolean', value: false, description:
        'enable fast path trace points.')
+option('iova_as_va', type: 'boolean', value: false, description:
+       'Build which only supports IOVA as VA mode. Unsupported drivers are disabled.')
 option('tests', type: 'boolean', value: true, description:
        'build unit tests')
 option('use_hpet', type: 'boolean', value: false, description:
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v2 2/5] mbuf: add second dynamic field member for VA only build
  2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
  2022-09-07 13:43             ` [PATCH v2 0/5] " Shijith Thotton
  2022-09-07 13:43             ` [PATCH v2 1/5] build: add meson option to configure IOVA mode as VA Shijith Thotton
@ 2022-09-07 13:43             ` Shijith Thotton
  2022-09-07 13:43             ` [PATCH v2 3/5] lib: move mbuf next pointer to first cache line Shijith Thotton
                               ` (2 subsequent siblings)
  5 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-07 13:43 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas

mbuf physical address field is not used in builds which only uses VA. It
is used to expand the dynamic field area.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 lib/mbuf/rte_mbuf_core.h | 26 +++++++++++++++++---------
 lib/mbuf/rte_mbuf_dyn.c  |  2 ++
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index c6292e7252..94907f301d 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -579,15 +579,23 @@ struct rte_mbuf {
 	RTE_MARKER cacheline0;
 
 	void *buf_addr;           /**< Virtual address of segment buffer. */
-	/**
-	 * Physical address of segment buffer.
-	 * This field is invalid if the build is configured to use only
-	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
-	 * Force alignment to 8-bytes, so as to ensure we have the exact
-	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
-	 * working on vector drivers easier.
-	 */
-	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+	RTE_STD_C11
+	union {
+		/**
+		 * Physical address of segment buffer.
+		 * This field is invalid if the build is configured to use only
+		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
+		 * Force alignment to 8-bytes, so as to ensure we have the exact
+		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
+		 * working on vector drivers easier.
+		 */
+		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+		/**
+		 * Reserved for dynamic field in builds where physical address
+		 * field is invalid.
+		 */
+		uint64_t dynfield2;
+	};
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
 	RTE_MARKER64 rearm_data;
diff --git a/lib/mbuf/rte_mbuf_dyn.c b/lib/mbuf/rte_mbuf_dyn.c
index 4ae79383b5..6a4cf96897 100644
--- a/lib/mbuf/rte_mbuf_dyn.c
+++ b/lib/mbuf/rte_mbuf_dyn.c
@@ -128,6 +128,8 @@ init_shared_mem(void)
 		 */
 		memset(shm, 0, sizeof(*shm));
 		mark_free(dynfield1);
+		if (RTE_IOVA_AS_VA)
+			mark_free(dynfield2);
 
 		/* init free_flags */
 		for (mask = RTE_MBUF_F_FIRST_FREE; mask <= RTE_MBUF_F_LAST_FREE; mask <<= 1)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v2 3/5] lib: move mbuf next pointer to first cache line
  2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
                               ` (2 preceding siblings ...)
  2022-09-07 13:43             ` [PATCH v2 2/5] mbuf: add second dynamic field member for VA only build Shijith Thotton
@ 2022-09-07 13:43             ` Shijith Thotton
  2022-09-07 13:43             ` [PATCH v2 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
  2022-09-07 13:43             ` [PATCH v2 5/5] drivers: mark software " Shijith Thotton
  5 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-07 13:43 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	ferruh.yigit

Swapped position of mbuf next pointer and second dynamic field (dynfield2)
if the build is configured to use IOVA as VA. This is to move the mbuf
next pointer to first cache line. kni library is disabled for this
change as it depends on the offset value of next pointer.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 lib/mbuf/rte_mbuf_core.h | 29 +++++++++++++++++++++--------
 lib/meson.build          |  3 +++
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 94907f301d..915dcd8653 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -590,11 +590,14 @@ struct rte_mbuf {
 		 * working on vector drivers easier.
 		 */
 		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+#if RTE_IOVA_AS_VA
 		/**
-		 * Reserved for dynamic field in builds where physical address
-		 * field is invalid.
+		 * Next segment of scattered packet.
+		 * This field is valid when physical address field is invalid.
+		 * Otherwise next pointer in the second cache line will be used.
 		 */
-		uint64_t dynfield2;
+		struct rte_mbuf *next;
+#endif
 	};
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
@@ -711,11 +714,21 @@ struct rte_mbuf {
 	/* second cache line - fields only used in slow path or on TX */
 	RTE_MARKER cacheline1 __rte_cache_min_aligned;
 
-	/**
-	 * Next segment of scattered packet. Must be NULL in the last segment or
-	 * in case of non-segmented packet.
-	 */
-	struct rte_mbuf *next;
+	RTE_STD_C11
+	union {
+#if !RTE_IOVA_AS_VA
+		/**
+		 * Next segment of scattered packet. Must be NULL in the last
+		 * segment or in case of non-segmented packet.
+		 */
+		struct rte_mbuf *next;
+#endif
+		/**
+		 * Reserved for dynamic field when the next pointer is in first
+		 * cache line (i.e. RTE_IOVA_AS_VA is 1).
+		 */
+		uint64_t dynfield2;
+	};
 
 	/* fields to support TX offloads */
 	RTE_STD_C11
diff --git a/lib/meson.build b/lib/meson.build
index c648f7d800..73d93bc803 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -88,6 +88,9 @@ optional_libs = [
 disabled_libs = []
 opt_disabled_libs = run_command(list_dir_globs, get_option('disable_libs'),
         check: true).stdout().split()
+if dpdk_conf.get('RTE_IOVA_AS_VA') == 1
+    opt_disabled_libs += ['kni']
+endif
 foreach l:opt_disabled_libs
     if not optional_libs.contains(l)
         warning('Cannot disable mandatory library "@0@"'.format(l))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v2 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA
  2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
                               ` (3 preceding siblings ...)
  2022-09-07 13:43             ` [PATCH v2 3/5] lib: move mbuf next pointer to first cache line Shijith Thotton
@ 2022-09-07 13:43             ` Shijith Thotton
  2022-09-07 13:43             ` [PATCH v2 5/5] drivers: mark software " Shijith Thotton
  5 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-07 13:43 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	Jan Viktorin, Ruifeng Wang, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Ankur Dwivedi, Anoob Joseph,
	Tejasree Kondoj, Radha Mohan Chintakuntla, Veerasenareddy Burru,
	Ashwin Sekhar T K, Jakub Palider, Tomasz Duszynski

Enabled the flag pmd_iova_as_va in cnxk driver build files as they work
with IOVA as VA. Updated cn9k and cn10k soc build configurations to
enable the IOVA as VA build by default.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 config/arm/meson.build                   | 8 ++++++--
 drivers/common/cnxk/meson.build          | 1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h | 4 ++--
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  | 2 +-
 drivers/crypto/cnxk/meson.build          | 2 ++
 drivers/dma/cnxk/meson.build             | 1 +
 drivers/event/cnxk/meson.build           | 1 +
 drivers/mempool/cnxk/meson.build         | 1 +
 drivers/net/cnxk/cnxk_ethdev.h           | 1 -
 drivers/net/cnxk/meson.build             | 1 +
 drivers/raw/cnxk_bphy/meson.build        | 1 +
 drivers/raw/cnxk_gpio/meson.build        | 1 +
 12 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/config/arm/meson.build b/config/arm/meson.build
index 9f1636e0d5..4e95e8b388 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -294,7 +294,8 @@ soc_cn10k = {
     'flags': [
         ['RTE_MAX_LCORE', 24],
         ['RTE_MAX_NUMA_NODES', 1],
-        ['RTE_MEMPOOL_ALIGN', 128]
+        ['RTE_MEMPOOL_ALIGN', 128],
+        ['RTE_IOVA_AS_VA', 1]
     ],
     'part_number': '0xd49',
     'extra_march_features': ['crypto'],
@@ -370,7 +371,10 @@ soc_cn9k = {
     'description': 'Marvell OCTEON 9',
     'implementer': '0x43',
     'part_number': '0xb2',
-    'numa': false
+    'numa': false,
+    'flags': [
+        ['RTE_IOVA_AS_VA', 1]
+    ]
 }
 
 soc_stingray = {
diff --git a/drivers/common/cnxk/meson.build b/drivers/common/cnxk/meson.build
index 6f808271d1..d019cfa8d1 100644
--- a/drivers/common/cnxk/meson.build
+++ b/drivers/common/cnxk/meson.build
@@ -86,3 +86,4 @@ sources += files('cnxk_telemetry_bphy.c',
 )
 
 deps += ['bus_pci', 'net', 'telemetry']
+pmd_iova_as_va = true
diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
index 66cfe6ca98..16db14344d 100644
--- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
@@ -85,7 +85,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op *cop,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
@@ -102,7 +102,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn10k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
index e469596756..8b68e4c728 100644
--- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
@@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
+	inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->w7.u64 = sa->inst.w7;
 }
 #endif /* __CN9K_IPSEC_LA_OPS_H__ */
diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build
index 23a1cc3aac..764e7bb99a 100644
--- a/drivers/crypto/cnxk/meson.build
+++ b/drivers/crypto/cnxk/meson.build
@@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug')
 else
     cflags += [ '-ULA_IPSEC_DEBUG' ]
 endif
+
+pmd_iova_as_va = true
diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
index d4be4ee860..ef0e3db109 100644
--- a/drivers/dma/cnxk/meson.build
+++ b/drivers/dma/cnxk/meson.build
@@ -3,3 +3,4 @@
 
 deps += ['bus_pci', 'common_cnxk', 'dmadev']
 sources = files('cnxk_dmadev.c')
+pmd_iova_as_va = true
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index b27bae7b12..650d0d4256 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -479,3 +479,4 @@ foreach flag: extra_flags
 endforeach
 
 deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
+pmd_iova_as_va = true
diff --git a/drivers/mempool/cnxk/meson.build b/drivers/mempool/cnxk/meson.build
index d5d1978569..a328176457 100644
--- a/drivers/mempool/cnxk/meson.build
+++ b/drivers/mempool/cnxk/meson.build
@@ -17,3 +17,4 @@ sources = files(
 )
 
 deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool']
+pmd_iova_as_va = true
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4cb7c9e90c..abf1e4215f 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -690,7 +690,6 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index f347e98fce..01489b3a36 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -194,3 +194,4 @@ foreach flag: extra_flags
 endforeach
 
 headers = files('rte_pmd_cnxk.h')
+pmd_iova_as_va = true
diff --git a/drivers/raw/cnxk_bphy/meson.build b/drivers/raw/cnxk_bphy/meson.build
index 14147feaf4..781ed63e05 100644
--- a/drivers/raw/cnxk_bphy/meson.build
+++ b/drivers/raw/cnxk_bphy/meson.build
@@ -10,3 +10,4 @@ sources = files(
         'cnxk_bphy_irq.c',
 )
 headers = files('rte_pmd_bphy.h')
+pmd_iova_as_va = true
diff --git a/drivers/raw/cnxk_gpio/meson.build b/drivers/raw/cnxk_gpio/meson.build
index a75a5b9084..f9aed173b6 100644
--- a/drivers/raw/cnxk_gpio/meson.build
+++ b/drivers/raw/cnxk_gpio/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'cnxk_gpio_selftest.c',
 )
 headers = files('rte_pmd_cnxk_gpio.h')
+pmd_iova_as_va = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v2 5/5] drivers: mark software PMDs work with IOVA as VA
  2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
                               ` (4 preceding siblings ...)
  2022-09-07 13:43             ` [PATCH v2 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
@ 2022-09-07 13:43             ` Shijith Thotton
  5 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-07 13:43 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	Ruifeng Wang, Fan Zhang, Pablo de Lara, Chengwen Feng,
	Kevin Laatz, Mattias Rönnblom, Liang Ma, Peter Mccarthy,
	Harry van Haaren, Artem V. Andreev, Andrew Rybchenko,
	John W. Linville, Ciara Loftus, Qi Zhang, Chas Williams,
	Min Hu (Connor),
	Gaetan Rivet, Jakub Grajciar, Tetsuya Mukawa, Sachin Saxena,
	Hemant Agrawal

Enabled software PMDs in IOVA as VA build as they work with IOVA as VA.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 drivers/crypto/armv8/meson.build    | 1 +
 drivers/crypto/ipsec_mb/meson.build | 1 +
 drivers/crypto/null/meson.build     | 1 +
 drivers/crypto/openssl/meson.build  | 1 +
 drivers/dma/skeleton/meson.build    | 1 +
 drivers/event/dsw/meson.build       | 1 +
 drivers/event/opdl/meson.build      | 1 +
 drivers/event/skeleton/meson.build  | 1 +
 drivers/event/sw/meson.build        | 1 +
 drivers/mempool/bucket/meson.build  | 1 +
 drivers/mempool/ring/meson.build    | 1 +
 drivers/mempool/stack/meson.build   | 1 +
 drivers/net/af_packet/meson.build   | 1 +
 drivers/net/af_xdp/meson.build      | 2 ++
 drivers/net/bonding/meson.build     | 1 +
 drivers/net/failsafe/meson.build    | 1 +
 drivers/net/memif/meson.build       | 1 +
 drivers/net/null/meson.build        | 1 +
 drivers/net/pcap/meson.build        | 1 +
 drivers/net/ring/meson.build        | 1 +
 drivers/net/tap/meson.build         | 1 +
 drivers/raw/skeleton/meson.build    | 1 +
 22 files changed, 23 insertions(+)

diff --git a/drivers/crypto/armv8/meson.build b/drivers/crypto/armv8/meson.build
index 5effba8bbc..a2c9d69e3f 100644
--- a/drivers/crypto/armv8/meson.build
+++ b/drivers/crypto/armv8/meson.build
@@ -17,3 +17,4 @@ endif
 ext_deps += dep
 deps += ['bus_vdev']
 sources = files('rte_armv8_pmd.c', 'rte_armv8_pmd_ops.c')
+pmd_iova_as_va = true
diff --git a/drivers/crypto/ipsec_mb/meson.build b/drivers/crypto/ipsec_mb/meson.build
index a89b29d6c3..785440b593 100644
--- a/drivers/crypto/ipsec_mb/meson.build
+++ b/drivers/crypto/ipsec_mb/meson.build
@@ -37,3 +37,4 @@ sources = files(
         'pmd_zuc.c',
 )
 deps += ['bus_vdev', 'net', 'security']
+pmd_iova_as_va = true
diff --git a/drivers/crypto/null/meson.build b/drivers/crypto/null/meson.build
index acc16e7d81..68dc030075 100644
--- a/drivers/crypto/null/meson.build
+++ b/drivers/crypto/null/meson.build
@@ -9,3 +9,4 @@ endif
 
 deps += 'bus_vdev'
 sources = files('null_crypto_pmd.c', 'null_crypto_pmd_ops.c')
+pmd_iova_as_va = true
diff --git a/drivers/crypto/openssl/meson.build b/drivers/crypto/openssl/meson.build
index cd962da1d6..25c44d0064 100644
--- a/drivers/crypto/openssl/meson.build
+++ b/drivers/crypto/openssl/meson.build
@@ -15,3 +15,4 @@ endif
 deps += 'bus_vdev'
 sources = files('rte_openssl_pmd.c', 'rte_openssl_pmd_ops.c')
 ext_deps += dep
+pmd_iova_as_va = true
diff --git a/drivers/dma/skeleton/meson.build b/drivers/dma/skeleton/meson.build
index 8871b80956..2b48d4e031 100644
--- a/drivers/dma/skeleton/meson.build
+++ b/drivers/dma/skeleton/meson.build
@@ -5,3 +5,4 @@ deps += ['dmadev', 'kvargs', 'ring', 'bus_vdev']
 sources = files(
         'skeleton_dmadev.c',
 )
+pmd_iova_as_va = true
diff --git a/drivers/event/dsw/meson.build b/drivers/event/dsw/meson.build
index 2df0fac4ff..477a6e5910 100644
--- a/drivers/event/dsw/meson.build
+++ b/drivers/event/dsw/meson.build
@@ -6,3 +6,4 @@ if cc.has_argument('-Wno-format-nonliteral')
     cflags += '-Wno-format-nonliteral'
 endif
 sources = files('dsw_evdev.c', 'dsw_event.c', 'dsw_xstats.c')
+pmd_iova_as_va = true
diff --git a/drivers/event/opdl/meson.build b/drivers/event/opdl/meson.build
index 786d2f4e82..e1a3de7ee3 100644
--- a/drivers/event/opdl/meson.build
+++ b/drivers/event/opdl/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'opdl_test.c',
 )
 deps += ['bus_vdev']
+pmd_iova_as_va = true
diff --git a/drivers/event/skeleton/meson.build b/drivers/event/skeleton/meson.build
index acfe156532..0ae514668c 100644
--- a/drivers/event/skeleton/meson.build
+++ b/drivers/event/skeleton/meson.build
@@ -3,3 +3,4 @@
 
 sources = files('skeleton_eventdev.c')
 deps += ['bus_pci', 'bus_vdev']
+pmd_iova_as_va = true
diff --git a/drivers/event/sw/meson.build b/drivers/event/sw/meson.build
index 6f81567efb..210cc1d048 100644
--- a/drivers/event/sw/meson.build
+++ b/drivers/event/sw/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'sw_evdev.c',
 )
 deps += ['hash', 'bus_vdev']
+pmd_iova_as_va = true
diff --git a/drivers/mempool/bucket/meson.build b/drivers/mempool/bucket/meson.build
index 0051b6ac3c..31ba101677 100644
--- a/drivers/mempool/bucket/meson.build
+++ b/drivers/mempool/bucket/meson.build
@@ -12,3 +12,4 @@ if is_windows
 endif
 
 sources = files('rte_mempool_bucket.c')
+pmd_iova_as_va = true
diff --git a/drivers/mempool/ring/meson.build b/drivers/mempool/ring/meson.build
index a021e908cf..f75f2125d7 100644
--- a/drivers/mempool/ring/meson.build
+++ b/drivers/mempool/ring/meson.build
@@ -2,3 +2,4 @@
 # Copyright(c) 2017 Intel Corporation
 
 sources = files('rte_mempool_ring.c')
+pmd_iova_as_va = true
diff --git a/drivers/mempool/stack/meson.build b/drivers/mempool/stack/meson.build
index 580dde79eb..3b94ed5b5e 100644
--- a/drivers/mempool/stack/meson.build
+++ b/drivers/mempool/stack/meson.build
@@ -4,3 +4,4 @@
 sources = files('rte_mempool_stack.c')
 
 deps += ['stack']
+pmd_iova_as_va = true
diff --git a/drivers/net/af_packet/meson.build b/drivers/net/af_packet/meson.build
index c014e9b61b..92fafea363 100644
--- a/drivers/net/af_packet/meson.build
+++ b/drivers/net/af_packet/meson.build
@@ -6,3 +6,4 @@ if not is_linux
     reason = 'only supported on Linux'
 endif
 sources = files('rte_eth_af_packet.c')
+pmd_iova_as_va = true
diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build
index 1e0de23705..35812511e2 100644
--- a/drivers/net/af_xdp/meson.build
+++ b/drivers/net/af_xdp/meson.build
@@ -55,3 +55,5 @@ else
     build = false
     reason = 'missing header, "linux/if_xdp.h"'
 endif
+
+pmd_iova_as_va = true
diff --git a/drivers/net/bonding/meson.build b/drivers/net/bonding/meson.build
index 18ad7e21f3..b61166888e 100644
--- a/drivers/net/bonding/meson.build
+++ b/drivers/net/bonding/meson.build
@@ -22,3 +22,4 @@ deps += 'sched' # needed for rte_bitmap.h
 deps += ['ip_frag']
 
 headers = files('rte_eth_bond.h', 'rte_eth_bond_8023ad.h')
+pmd_iova_as_va = true
diff --git a/drivers/net/failsafe/meson.build b/drivers/net/failsafe/meson.build
index b8e5bf70f8..a90be869d1 100644
--- a/drivers/net/failsafe/meson.build
+++ b/drivers/net/failsafe/meson.build
@@ -27,3 +27,4 @@ sources = files(
         'failsafe_ops.c',
         'failsafe_rxtx.c',
 )
+pmd_iova_as_va = true
diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
index 680bc8631c..59140dc3dd 100644
--- a/drivers/net/memif/meson.build
+++ b/drivers/net/memif/meson.build
@@ -12,3 +12,4 @@ sources = files(
 )
 
 deps += ['hash']
+pmd_iova_as_va = true
diff --git a/drivers/net/null/meson.build b/drivers/net/null/meson.build
index 0251578aab..6b7adbd760 100644
--- a/drivers/net/null/meson.build
+++ b/drivers/net/null/meson.build
@@ -8,3 +8,4 @@ if is_windows
 endif
 
 sources = files('rte_eth_null.c')
+pmd_iova_as_va = true
diff --git a/drivers/net/pcap/meson.build b/drivers/net/pcap/meson.build
index ed7864eb9d..73c65dd2a6 100644
--- a/drivers/net/pcap/meson.build
+++ b/drivers/net/pcap/meson.build
@@ -15,3 +15,4 @@ ext_deps += pcap_dep
 if is_windows
     ext_deps += cc.find_library('iphlpapi', required: true)
 endif
+pmd_iova_as_va = true
diff --git a/drivers/net/ring/meson.build b/drivers/net/ring/meson.build
index 0156b37aad..45fa3492cf 100644
--- a/drivers/net/ring/meson.build
+++ b/drivers/net/ring/meson.build
@@ -9,3 +9,4 @@ endif
 
 sources = files('rte_eth_ring.c')
 headers = files('rte_eth_ring.h')
+pmd_iova_as_va = true
diff --git a/drivers/net/tap/meson.build b/drivers/net/tap/meson.build
index c09713a67b..da23599830 100644
--- a/drivers/net/tap/meson.build
+++ b/drivers/net/tap/meson.build
@@ -35,3 +35,4 @@ foreach arg:args
     config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
 endforeach
 configure_file(output : 'tap_autoconf.h', configuration : config)
+pmd_iova_as_va = true
diff --git a/drivers/raw/skeleton/meson.build b/drivers/raw/skeleton/meson.build
index 950a33cc20..439ab8792d 100644
--- a/drivers/raw/skeleton/meson.build
+++ b/drivers/raw/skeleton/meson.build
@@ -6,3 +6,4 @@ sources = files(
         'skeleton_rawdev.c',
         'skeleton_rawdev_test.c',
 )
+pmd_iova_as_va = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build
  2022-08-30 13:22                   ` Honnappa Nagarahalli
@ 2022-09-07 13:55                     ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-07 13:55 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Pavan Nikhilesh Bhagavatula,
	Bruce Richardson, Morten Brørup
  Cc: dev, Jerin Jacob Kollanukkaran, olivier.matz, stephen, thomas, nd

>> >
>> > ----------------------------------------------------------------------
>> > On Mon, Aug 29, 2022 at 08:32:20PM +0200, Morten Brørup wrote:
>> > >
>> > > > From: Shijith Thotton [mailto:sthotton@marvell.com]
>> > > > Sent: Monday, 29 August 2022 17.16
>> > > >
>> > > > mbuf physical address field is not used in builds which only uses VA.
>> > > > It is used to expand the dynamic field area.
>> > > >
>> > > > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> > > > ---
>> > > >  lib/mbuf/rte_mbuf_core.h | 26 +++++++++++++++++---------
>> > > > lib/mbuf/rte_mbuf_dyn.c  |  2 ++
>> > > >  2 files changed, 19 insertions(+), 9 deletions(-)
>> > > >
>> > > > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
>> > > > index 81cb07c2e4..98ce62fd6a 100644
>> > > > --- a/lib/mbuf/rte_mbuf_core.h
>> > > > +++ b/lib/mbuf/rte_mbuf_core.h
>> > > > @@ -579,15 +579,23 @@ struct rte_mbuf {
>> > > >  	RTE_MARKER cacheline0;
>> > > >
>> > > >  	void *buf_addr;           /**< Virtual address of segment buffer.
>> > > > */
>> > > > -	/**
>> > > > -	 * Physical address of segment buffer.
>> > > > -	 * This field is invalid if the build is configured to use only
>> > > > -	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is defined).
>> > > > -	 * Force alignment to 8-bytes, so as to ensure we have the exact
>> > > > -	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
>> > > > -	 * working on vector drivers easier.
>> > > > -	 */
>> > > > -	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
>> > > > +	RTE_STD_C11
>> > > > +	union {
>> > > > +		/**
>> > > > +		 * Physical address of segment buffer.
>> > > > +		 * This field is invalid if the build is configured to use
>> > > > only
>> > > > +		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is
>> > > > defined).
>> > > > +		 * Force alignment to 8-bytes, so as to ensure we have the
>> > > > exact
>> > > > +		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This
>> > > > makes
>> > > > +		 * working on vector drivers easier.
>> > > > +		 */
>> > > > +		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
>> > > > +		/**
>> > > > +		 * Reserved for dynamic field in builds where physical
>> > > > address
>> > > > +		 * field is invalid.
>> > > > +		 */
>> > > > +		uint64_t dynfield2;
>> > > > +	};
>> > > >
>> > > >  	/* next 8 bytes are initialised on RX descriptor rearm */
>> > > >  	RTE_MARKER64 rearm_data;
>> > >
>> > > I know that the intention here is to keep the rte_mbuf structure
>> > > intact,
>> > which will certainly improve the probability of getting this patch
>> > series into DPDK.
>> > >
>> > > So, I will add a comment for the benefit of the other participants
>> > > in the
>> > discussion:
>> > >
>> > > With this patch, and in RTE_IOVA_AS_VA mode, it becomes possible to
>> > move m->next into the first cache line, so rte_pktmbuf_prefree_seg()
>> > does not have to touch the second cache line, thus potentially
>> > improving performance by eliminating one cache miss per freed packet
>> > segment. (I also recall someone mentioning that some PMDs set m->next
>> > on RX... If that is the case, a cache miss per packet might also be
>> > avoidable in those PMDs.)
>> > >
>> > > Obviously, moving m->next to the first cache line is not related to
>> > > this patch
>> > series, but would belong in a completely different patch.
>> > >
>> >
>> > +1 to that, with the exception that if it is decided to move the next
>> > pointer rather than use this as dynamic space, I think it *should* be
>> > in this patch series, rather than mucking about with mbuf twice. :-)
>>
>> +1 When RTE_IOVA_AS_VA is set we can set mbuf->next as the dynamic field
>> and move it to mbuf->buf_iova.
>> mbuf->next write is one of the prominent hotspot in arm platforms.
>+1 for reducing the cachelines that need to be touched
 
Added a new patch to move next pointer to first cache line in v2. Please review.
https://patchwork.dpdk.org/project/dpdk/patch/20220907134340.3629224-4-sthotton@marvell.com/

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v2 1/5] build: add meson option to configure IOVA mode as VA
  2022-09-07 13:43             ` [PATCH v2 1/5] build: add meson option to configure IOVA mode as VA Shijith Thotton
@ 2022-09-07 15:31               ` Stephen Hemminger
  2022-09-07 15:38                 ` Bruce Richardson
  0 siblings, 1 reply; 88+ messages in thread
From: Stephen Hemminger @ 2022-09-07 15:31 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Honnappa.Nagarahalli, bruce.richardson,
	jerinj, mb, olivier.matz, thomas, Nicolas Chautru, Ciara Power,
	Konstantin Ananyev, Chengwen Feng, Kevin Laatz, Reshma Pattan,
	Maxime Coquelin, Chenbo Xia

On Wed, 7 Sep 2022 19:13:36 +0530
Shijith Thotton <sthotton@marvell.com> wrote:

> IOVA mode in DPDK is either PA or VA. The new build option iova_as_va
> configures the mode to VA at compile time and prevents setting it to PA
> at runtime. For now, all drivers which are not always enabled are
> disabled with this option. Supported driver can set the flag
> pmd_iova_as_va in its build file to enable build.
> 
> mbuf structure holds the physical (PA) and virtual address (VA) of a
> buffer. if IOVA mode is set to VA, PA is redundant as it is the same as
> VA. So PA field need not be updated and marked invalid if the build is
> configured to use only VA.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>

NAK
This works ok for use cases where NFV is building local version
of DPDK, but meson options dont work for distro vendors.

That is why DPDK went away from having so many config options.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v2 1/5] build: add meson option to configure IOVA mode as VA
  2022-09-07 15:31               ` Stephen Hemminger
@ 2022-09-07 15:38                 ` Bruce Richardson
  2022-09-07 21:33                   ` Morten Brørup
  0 siblings, 1 reply; 88+ messages in thread
From: Bruce Richardson @ 2022-09-07 15:38 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Shijith Thotton, dev, pbhagavatula, Honnappa.Nagarahalli, jerinj,
	mb, olivier.matz, thomas, Nicolas Chautru, Ciara Power,
	Konstantin Ananyev, Chengwen Feng, Kevin Laatz, Reshma Pattan,
	Maxime Coquelin, Chenbo Xia

On Wed, Sep 07, 2022 at 08:31:09AM -0700, Stephen Hemminger wrote:
> On Wed, 7 Sep 2022 19:13:36 +0530 Shijith Thotton <sthotton@marvell.com>
> wrote:
> 
> > IOVA mode in DPDK is either PA or VA. The new build option iova_as_va
> > configures the mode to VA at compile time and prevents setting it to PA
> > at runtime. For now, all drivers which are not always enabled are
> > disabled with this option. Supported driver can set the flag
> > pmd_iova_as_va in its build file to enable build.
> > 
> > mbuf structure holds the physical (PA) and virtual address (VA) of a
> > buffer. if IOVA mode is set to VA, PA is redundant as it is the same as
> > VA. So PA field need not be updated and marked invalid if the build is
> > configured to use only VA.
> > 
> > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> 
> NAK This works ok for use cases where NFV is building local version of
> DPDK, but meson options dont work for distro vendors.
> 
I think for something of this magnitude a build-time option is definitely
the way to go. Sure, it won't be available in distro-packages, but for
something like this it's just too invasive - and rather niche IMHO - to
make a runtime option.

/Bruce

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [PATCH v2 1/5] build: add meson option to configure IOVA mode as VA
  2022-09-07 15:38                 ` Bruce Richardson
@ 2022-09-07 21:33                   ` Morten Brørup
  0 siblings, 0 replies; 88+ messages in thread
From: Morten Brørup @ 2022-09-07 21:33 UTC (permalink / raw)
  To: Bruce Richardson, Stephen Hemminger, Shijith Thotton
  Cc: dev, pbhagavatula, Honnappa.Nagarahalli, jerinj, olivier.matz,
	thomas, Nicolas Chautru, Ciara Power, Konstantin Ananyev,
	Chengwen Feng, Kevin Laatz, Reshma Pattan, Maxime Coquelin,
	Chenbo Xia

> From: Bruce Richardson [mailto:bruce.richardson@intel.com]
> Sent: Wednesday, 7 September 2022 17.38
> 
> On Wed, Sep 07, 2022 at 08:31:09AM -0700, Stephen Hemminger wrote:
> > On Wed, 7 Sep 2022 19:13:36 +0530 Shijith Thotton
> <sthotton@marvell.com>
> > wrote:
> >
> > > IOVA mode in DPDK is either PA or VA. The new build option
> iova_as_va
> > > configures the mode to VA at compile time and prevents setting it
> to PA
> > > at runtime. For now, all drivers which are not always enabled are
> > > disabled with this option. Supported driver can set the flag
> > > pmd_iova_as_va in its build file to enable build.
> > >
> > > mbuf structure holds the physical (PA) and virtual address (VA) of
> a
> > > buffer. if IOVA mode is set to VA, PA is redundant as it is the
> same as
> > > VA. So PA field need not be updated and marked invalid if the build
> is
> > > configured to use only VA.
> > >
> > > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> >
> > NAK This works ok for use cases where NFV is building local version
> of
> > DPDK, but meson options dont work for distro vendors.
> >
> I think for something of this magnitude a build-time option is
> definitely
> the way to go. Sure, it won't be available in distro-packages, but for
> something like this it's just too invasive - and rather niche IMHO - to
> make a runtime option.
+1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH] mbuf: add mbuf physical address field to dynamic field
  2022-08-30 13:07     ` [PATCH] mbuf: add mbuf physical address field to dynamic field Ferruh Yigit
@ 2022-09-12 13:19       ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-12 13:19 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: dev, Jerin Jacob Kollanukkaran, olivier.matz, thomas,
	Bruce Richardson, Morten Brørup, stephen

>On 7/1/2022 1:24 PM, Shijith Thotton wrote:
>>>> If all devices are configured to run in IOVA mode as VA, physical
>>>> address field of mbuf (buf_iova) won't be used. In such cases, buf_iova
>>>> space is free to use as a dynamic field. So a new dynamic field member
>>>> (dynfield2) is added in mbuf structure to make use of that space.
>>>>
>>>> A new mbuf flag RTE_MBUF_F_DYNFIELD2 is introduced to help identify the
>>>> mbuf that can use dynfield2.
>>>>
>>>> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>>>
>>> This seems like a complex and potentially error prone way to do this.
>>> What is the use case?
>>>
>>
>> PCI drivers with the flag RTE_PCI_DRV_NEED_IOVA_AS_VA only works in IOVA
>mode as
>> VA. buf_iova field of mbuf is not used by those PMDs and can be used as a
>> dynamic area to save space.
>>
>
>'RTE_PCI_DRV_NEED_IOVA_AS_VA' means device can *only* work in
>RTE_IOVA_VA mode, right?
>
>Although there are many devices that support RTE_IOVA_VA mode, only a
>few of them works *only* with RTE_IOVA_VA mode, rest can prefer to use
>RTE_IOVA_PA or RTE_IOVA_VA.
>Also using KNI forces to use RTE_IOVA_PA mode.

I have removed reference to *only* in the latest series [1].
So all PMDs which supports IOVA as VA can be enabled with the build options iova_as_va.
But it will require PMDs to not use PA field of mbuf(buf_iova) on such builds.

1. https://inbox.dpdk.org/dev/20220907134340.3629224-1-sthotton@marvell.com/

>And moving 'buf_iova' filed out of first cache will impact the
>performance for RTE_IOVA_PA mode.
>
 
buf_iova is not moved from its position in default build.
Mbuf next pointer uses buf_iova space in iova_as_va build.
Please review https://patchwork.dpdk.org/project/dpdk/patch/20220907134340.3629224-4-sthotton@marvell.com/

>Since KNI is going away and vfio is more preferred way, it can be OK to
>make 'buf_iova' dynamic filed in long term, but I think it is better to
>do this slowly, like should we wait for KNI to go away first?
>
>
>>> How much of a performance gain?
>>
>> No change in performance.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v2 0/5] mbuf dynamic field expansion
  2022-09-07 13:43             ` [PATCH v2 0/5] " Shijith Thotton
@ 2022-09-21  9:43               ` David Marchand
  2022-09-21 14:01                 ` [EXT] " Shijith Thotton
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
  1 sibling, 1 reply; 88+ messages in thread
From: David Marchand @ 2022-09-21  9:43 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Honnappa.Nagarahalli, bruce.richardson,
	jerinj, mb, olivier.matz, stephen, thomas

On Wed, Sep 7, 2022 at 3:44 PM Shijith Thotton <sthotton@marvell.com> wrote:
>
> This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
> Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
> deemed unsafe as some components could still use the PA field without checking IOVA mode and there
> are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
> that drivers which need PA can be disabled during build. This series adds this new meson build
> options. Second patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
> Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without PA field.
>
> 1. https://inbox.dpdk.org/dev/57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605763.git.sthotton@marvell.com/.
>
> v2:
>  * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
>  * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.

It seems there was an issue with receiving this series from patchwork pov.
Please resend this v2 so that we get it through the CI.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v3 0/5] mbuf dynamic field expansion
  2022-09-07 13:43             ` [PATCH v2 0/5] " Shijith Thotton
  2022-09-21  9:43               ` David Marchand
@ 2022-09-21 13:56               ` Shijith Thotton
  2022-09-21 13:56                 ` [PATCH v3 1/5] build: add meson option to configure IOVA mode as VA Shijith Thotton
                                   ` (8 more replies)
  1 sibling, 9 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-21 13:56 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	david.marchand

This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
deemed unsafe as some components could still use the PA field without checking IOVA mode and there
are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
that drivers which need PA can be disabled during build. This series adds this new meson build
options. Second patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without PA field.

1. https://inbox.dpdk.org/dev/57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605763.git.sthotton@marvell.com/.

v3:
 * Cleared use of buf_iova from cnxk PMD.

v2:
 * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
 * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.

Shijith Thotton (5):
  build: add meson option to configure IOVA mode as VA
  mbuf: add second dynamic field member for VA only build
  lib: move mbuf next pointer to first cache line
  drivers: mark Marvell cnxk PMDs work with IOVA as VA
  drivers: mark software PMDs work with IOVA as VA

 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 +--
 app/test/test_bpf.c                      |  2 +-
 app/test/test_dmadev.c                   | 33 ++++++--------
 app/test/test_mbuf.c                     | 12 +++---
 app/test/test_pcapng.c                   |  2 +-
 config/arm/meson.build                   |  8 +++-
 config/meson.build                       |  1 +
 drivers/common/cnxk/meson.build          |  1 +
 drivers/crypto/armv8/meson.build         |  1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
 drivers/crypto/cnxk/meson.build          |  2 +
 drivers/crypto/ipsec_mb/meson.build      |  1 +
 drivers/crypto/null/meson.build          |  1 +
 drivers/crypto/openssl/meson.build       |  1 +
 drivers/dma/cnxk/meson.build             |  1 +
 drivers/dma/skeleton/meson.build         |  1 +
 drivers/event/cnxk/meson.build           |  1 +
 drivers/event/dsw/meson.build            |  1 +
 drivers/event/opdl/meson.build           |  1 +
 drivers/event/skeleton/meson.build       |  1 +
 drivers/event/sw/meson.build             |  1 +
 drivers/mempool/bucket/meson.build       |  1 +
 drivers/mempool/cnxk/meson.build         |  1 +
 drivers/mempool/ring/meson.build         |  1 +
 drivers/mempool/stack/meson.build        |  1 +
 drivers/meson.build                      |  6 +++
 drivers/net/af_packet/meson.build        |  1 +
 drivers/net/af_xdp/meson.build           |  2 +
 drivers/net/bonding/meson.build          |  1 +
 drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
 drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
 drivers/net/cnxk/cnxk_ethdev.h           |  1 -
 drivers/net/cnxk/meson.build             |  1 +
 drivers/net/failsafe/meson.build         |  1 +
 drivers/net/memif/meson.build            |  1 +
 drivers/net/null/meson.build             |  1 +
 drivers/net/pcap/meson.build             |  1 +
 drivers/net/ring/meson.build             |  1 +
 drivers/net/tap/meson.build              |  1 +
 drivers/raw/cnxk_bphy/meson.build        |  1 +
 drivers/raw/cnxk_gpio/meson.build        |  1 +
 drivers/raw/skeleton/meson.build         |  1 +
 lib/eal/linux/eal.c                      |  7 +++
 lib/mbuf/rte_mbuf.c                      |  8 ++--
 lib/mbuf/rte_mbuf.h                      | 17 +++++---
 lib/mbuf/rte_mbuf_core.h                 | 55 ++++++++++++++++++------
 lib/mbuf/rte_mbuf_dyn.c                  |  2 +
 lib/meson.build                          |  3 ++
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 54 +++++++++++++++++------
 meson_options.txt                        |  2 +
 53 files changed, 220 insertions(+), 150 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v3 1/5] build: add meson option to configure IOVA mode as VA
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
@ 2022-09-21 13:56                 ` Shijith Thotton
  2022-09-28 12:52                   ` Olivier Matz
  2022-09-21 13:56                 ` [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build Shijith Thotton
                                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 88+ messages in thread
From: Shijith Thotton @ 2022-09-21 13:56 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	david.marchand, Nicolas Chautru, Ciara Power, Konstantin Ananyev,
	Chengwen Feng, Kevin Laatz, Reshma Pattan, Maxime Coquelin,
	Chenbo Xia

IOVA mode in DPDK is either PA or VA. The new build option iova_as_va
configures the mode to VA at compile time and prevents setting it to PA
at runtime. For now, all drivers which are not always enabled are
disabled with this option. Supported driver can set the flag
pmd_iova_as_va in its build file to enable build.

mbuf structure holds the physical (PA) and virtual address (VA) of a
buffer. if IOVA mode is set to VA, PA is redundant as it is the same as
VA. So PA field need not be updated and marked invalid if the build is
configured to use only VA.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 +--
 app/test/test_bpf.c                      |  2 +-
 app/test/test_dmadev.c                   | 33 ++++++---------
 app/test/test_mbuf.c                     | 12 +++---
 app/test/test_pcapng.c                   |  2 +-
 config/meson.build                       |  1 +
 drivers/meson.build                      |  6 +++
 lib/eal/linux/eal.c                      |  7 +++
 lib/mbuf/rte_mbuf.c                      |  8 ++--
 lib/mbuf/rte_mbuf.h                      | 17 +++++---
 lib/mbuf/rte_mbuf_core.h                 | 10 +++++
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 54 ++++++++++++++++++------
 meson_options.txt                        |  2 +
 15 files changed, 109 insertions(+), 54 deletions(-)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 8fab52d821..f6aa25b67d 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -1001,7 +1001,7 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
 					seg->length);
 				memcpy(data, seg->addr, seg->length);
 				m_head->buf_addr = data;
-				m_head->buf_iova = rte_malloc_virt2iova(data);
+				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
 				m_head->data_off = 0;
 				m_head->data_len = seg->length;
 			} else {
diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-perf/cperf_test_common.c
index 00aadc9a47..27646cd619 100644
--- a/app/test-crypto-perf/cperf_test_common.c
+++ b/app/test-crypto-perf/cperf_test_common.c
@@ -26,8 +26,7 @@ fill_single_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = 0;
 	m->buf_addr = (char *)m + mbuf_hdr_size;
-	m->buf_iova = rte_mempool_virt2iova(obj) +
-		mbuf_offset + mbuf_hdr_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(obj) + mbuf_offset + mbuf_hdr_size);
 	m->buf_len = segment_sz;
 	m->data_len = data_len;
 	m->pkt_len = data_len;
@@ -58,7 +57,7 @@ fill_multi_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 		/* start of buffer is after mbuf structure and priv data */
 		m->priv_size = 0;
 		m->buf_addr = (char *)m + mbuf_hdr_size;
-		m->buf_iova = next_seg_phys_addr;
+		rte_mbuf_iova_set(m, next_seg_phys_addr);
 		next_seg_phys_addr += mbuf_hdr_size + segment_sz;
 		m->buf_len = segment_sz;
 		m->data_len = data_len;
diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 97f500809e..f5af5e8a3f 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -2600,7 +2600,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 9e8e101f40..8306947eda 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -110,8 +110,8 @@ do_multi_copies(int16_t dev_id, uint16_t vchan,
 		for (j = 0; j < COPY_LEN/sizeof(uint64_t); j++)
 			src_data[j] = rte_rand();
 
-		if (rte_dma_copy(dev_id, vchan, srcs[i]->buf_iova + srcs[i]->data_off,
-				dsts[i]->buf_iova + dsts[i]->data_off, COPY_LEN, 0) != id_count++)
+		if (rte_dma_copy(dev_id, vchan, rte_pktmbuf_iova_offset(srcs[i], 0),
+				 rte_pktmbuf_iova_offset(dsts[i], 0), COPY_LEN, 0) != id_count++)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 	}
 	rte_dma_submit(dev_id, vchan);
@@ -317,9 +317,8 @@ test_failure_in_full_burst(int16_t dev_id, uint16_t vchan, bool fence,
 	rte_dma_stats_get(dev_id, vchan, &baseline); /* get a baseline set of stats */
 	for (i = 0; i < COMP_BURST_SZ; i++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(i == fail_idx ? 0 : (srcs[i]->buf_iova + srcs[i]->data_off)),
-				dsts[i]->buf_iova + dsts[i]->data_off,
-				COPY_LEN, OPT_FENCE(i));
+				      (i == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[i], 0)),
+				      rte_pktmbuf_iova_offset(dsts[i], 0), COPY_LEN, OPT_FENCE(i));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 		if (i == fail_idx)
@@ -407,9 +406,8 @@ test_individual_status_query_with_failure(int16_t dev_id, uint16_t vchan, bool f
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, OPT_FENCE(j));
+				      (j == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[j], 0)),
+				      rte_pktmbuf_iova_offset(dsts[j], 0), COPY_LEN, OPT_FENCE(j));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -470,9 +468,8 @@ test_single_item_status_query_with_failure(int16_t dev_id, uint16_t vchan,
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+				      (j == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[j], 0)),
+				      rte_pktmbuf_iova_offset(dsts[j], 0), COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -529,15 +526,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in one go */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_pktmbuf_iova_offset(dsts[j], 0),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
@@ -565,15 +561,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in bursts, but getting errors one at a time */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_pktmbuf_iova_offset(dsts[j], 0),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index e09b2549ca..45431f2c9c 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -1232,11 +1232,13 @@ test_failing_mbuf_sanity_check(struct rte_mempool *pktmbuf_pool)
 		return -1;
 	}
 
-	badbuf = *buf;
-	badbuf.buf_iova = 0;
-	if (verify_mbuf_check_panics(&badbuf)) {
-		printf("Error with bad-physaddr mbuf test\n");
-		return -1;
+	if (!RTE_IOVA_AS_VA) {
+		badbuf = *buf;
+		rte_mbuf_iova_set(&badbuf, 0);
+		if (verify_mbuf_check_panics(&badbuf)) {
+			printf("Error with bad-physaddr mbuf test\n");
+			return -1;
+		}
 	}
 
 	badbuf = *buf;
diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index 320dacea34..abbf00f6da 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -40,7 +40,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/config/meson.build b/config/meson.build
index 7f7b6c92fd..6b6c3e7eb6 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -309,6 +309,7 @@ endif
 if get_option('mbuf_refcnt_atomic')
     dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
 endif
+dpdk_conf.set10('RTE_IOVA_AS_VA', get_option('iova_as_va'))
 
 compile_time_cpuflags = []
 subdir(arch_subdir)
diff --git a/drivers/meson.build b/drivers/meson.build
index 376a64f4da..989770cffd 100644
--- a/drivers/meson.build
+++ b/drivers/meson.build
@@ -105,6 +105,7 @@ foreach subpath:subdirs
         ext_deps = []
         pkgconfig_extra_libs = []
         testpmd_sources = []
+        pmd_iova_as_va = false
 
         if not enable_drivers.contains(drv_path)
             build = false
@@ -122,6 +123,11 @@ foreach subpath:subdirs
             # pull in driver directory which should update all the local variables
             subdir(drv_path)
 
+            if dpdk_conf.get('RTE_IOVA_AS_VA') == 1 and not pmd_iova_as_va and not always_enable.contains(drv_path)
+                build = false
+                reason = 'driver does not support IOVA as VA mode'
+            endif
+
             # get dependency objs from strings
             shared_deps = ext_deps
             static_deps = ext_deps
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 37d29643a5..b70c4dcc5f 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1127,6 +1127,13 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	if (rte_eal_iova_mode() == RTE_IOVA_PA && RTE_IOVA_AS_VA) {
+		rte_eal_init_alert(
+			"Cannot use IOVA as 'PA' since build is configured to use only 'VA'");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
 	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
 		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
 
diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
index a2307cebe6..5af290c53a 100644
--- a/lib/mbuf/rte_mbuf.c
+++ b/lib/mbuf/rte_mbuf.c
@@ -89,7 +89,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 
 	/* keep some headroom between start of buffer and data */
@@ -187,8 +187,8 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
 	RTE_ASSERT(ctx->off + ext_mem->elt_size <= ext_mem->buf_len);
 
 	m->buf_addr = RTE_PTR_ADD(ext_mem->buf_ptr, ctx->off);
-	m->buf_iova = ext_mem->buf_iova == RTE_BAD_IOVA ?
-		      RTE_BAD_IOVA : (ext_mem->buf_iova + ctx->off);
+	rte_mbuf_iova_set(m, ext_mem->buf_iova == RTE_BAD_IOVA ? RTE_BAD_IOVA :
+								 (ext_mem->buf_iova + ctx->off));
 
 	ctx->off += ext_mem->elt_size;
 	if (ctx->off + ext_mem->elt_size > ext_mem->buf_len) {
@@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header,
 		*reason = "bad mbuf pool";
 		return -1;
 	}
-	if (m->buf_iova == 0) {
+	if (m->buf_iova == 0 && !RTE_IOVA_AS_VA) {
 		*reason = "bad IO addr";
 		return -1;
 	}
diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
index 9811e8c760..05be146bc2 100644
--- a/lib/mbuf/rte_mbuf.h
+++ b/lib/mbuf/rte_mbuf.h
@@ -146,7 +146,7 @@ static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 static inline rte_iova_t
 rte_mbuf_data_iova(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + mb->data_off;
+	return (RTE_IOVA_AS_VA ? (uint64_t)mb->buf_addr : mb->buf_iova) + mb->data_off;
 }
 
 /**
@@ -164,7 +164,7 @@ rte_mbuf_data_iova(const struct rte_mbuf *mb)
 static inline rte_iova_t
 rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
+	return (RTE_IOVA_AS_VA ? (uint64_t)mb->buf_addr : mb->buf_iova) + RTE_PKTMBUF_HEADROOM;
 }
 
 /**
@@ -469,6 +469,13 @@ rte_mbuf_ext_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo,
 				 __ATOMIC_ACQ_REL);
 }
 
+static inline void
+rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
+{
+	if (!RTE_IOVA_AS_VA)
+		m->buf_iova = iova;
+}
+
 /** Mbuf prefetch */
 #define RTE_MBUF_PREFETCH_TO_FREE(m) do {       \
 	if ((m) != NULL)                        \
@@ -1056,7 +1063,7 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
 	RTE_ASSERT(shinfo->free_cb != NULL);
 
 	m->buf_addr = buf_addr;
-	m->buf_iova = buf_iova;
+	rte_mbuf_iova_set(m, buf_iova);
 	m->buf_len = buf_len;
 
 	m->data_len = 0;
@@ -1143,7 +1150,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 
 	mi->data_off = m->data_off;
 	mi->data_len = m->data_len;
-	mi->buf_iova = m->buf_iova;
+	rte_mbuf_iova_set(mi, m->buf_iova);
 	mi->buf_addr = m->buf_addr;
 	mi->buf_len = m->buf_len;
 
@@ -1245,7 +1252,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 3d6ddd6773..c6292e7252 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -581,6 +581,8 @@ struct rte_mbuf {
 	void *buf_addr;           /**< Virtual address of segment buffer. */
 	/**
 	 * Physical address of segment buffer.
+	 * This field is invalid if the build is configured to use only
+	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
 	 * Force alignment to 8-bytes, so as to ensure we have the exact
 	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
 	 * working on vector drivers easier.
@@ -848,8 +850,12 @@ struct rte_mbuf_ext_shared_info {
  * @param o
  *   The offset into the data to calculate address from.
  */
+#if RTE_IOVA_AS_VA
+#define rte_pktmbuf_iova_offset(m, o) rte_pktmbuf_mtod_offset(m, rte_iova_t, o)
+#else
 #define rte_pktmbuf_iova_offset(m, o) \
 	(rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
+#endif
 
 /**
  * A macro that returns the IO address that points to the start of the
@@ -858,7 +864,11 @@ struct rte_mbuf_ext_shared_info {
  * @param m
  *   The packet mbuf.
  */
+#if RTE_IOVA_AS_VA
+#define rte_pktmbuf_iova(m) rte_pktmbuf_mtod(m, rte_iova_t)
+#else
 #define rte_pktmbuf_iova(m) rte_pktmbuf_iova_offset(m, 0)
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 782d916ae0..05cde6e118 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -967,7 +967,7 @@ restore_mbuf(struct rte_mbuf *m)
 		/* start of buffer is after mbuf structure and priv data */
 
 		m->buf_addr = (char *)m + mbuf_size;
-		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 		m = m->next;
 	}
 }
diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
index 54946f46d9..7b50735796 100644
--- a/lib/vhost/vhost_crypto.c
+++ b/lib/vhost/vhost_crypto.c
@@ -823,11 +823,17 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_src->data_len = cipher->para.src_data_len;
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				cipher->para.src_data_len);
+		if (!RTE_IOVA_AS_VA) {
+			m_src->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.src_data_len);
+			if (unlikely(m_src->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 ||
-				m_src->buf_addr == NULL)) {
+		if (unlikely(m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -867,10 +873,17 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, cipher->para.dst_data_len);
+		if (!RTE_IOVA_AS_VA) {
+			m_dst->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.dst_data_len);
+			if (unlikely(m_dst->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -980,11 +993,17 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_src->data_len = chain->para.src_data_len;
 		m_dst->data_len = chain->para.dst_data_len;
-
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				chain->para.src_data_len);
+		if (!RTE_IOVA_AS_VA) {
+			m_src->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.src_data_len);
+			if (unlikely(m_src->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
+		if (unlikely(m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -1024,10 +1043,17 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, chain->para.dst_data_len);
+		if (!RTE_IOVA_AS_VA) {
+			m_dst->buf_iova =
+				gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.dst_data_len);
+			if (unlikely(m_dst->buf_iova == 0)) {
+				VC_LOG_ERR("zero_copy may fail due to cross page data");
+				ret = VIRTIO_CRYPTO_ERR;
+				goto error_exit;
+			}
+		}
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
diff --git a/meson_options.txt b/meson_options.txt
index 7c220ad68d..f0fa6cf04c 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -44,6 +44,8 @@ option('platform', type: 'string', value: 'native', description:
        'Platform to build, either "native", "generic" or a SoC. Please refer to the Linux build guide for more information.')
 option('enable_trace_fp', type: 'boolean', value: false, description:
        'enable fast path trace points.')
+option('iova_as_va', type: 'boolean', value: false, description:
+       'Build which only supports IOVA as VA mode. Unsupported drivers are disabled.')
 option('tests', type: 'boolean', value: true, description:
        'build unit tests')
 option('use_hpet', type: 'boolean', value: false, description:
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
  2022-09-21 13:56                 ` [PATCH v3 1/5] build: add meson option to configure IOVA mode as VA Shijith Thotton
@ 2022-09-21 13:56                 ` Shijith Thotton
  2022-09-28  7:24                   ` Thomas Monjalon
  2022-09-28 12:52                   ` Olivier Matz
  2022-09-21 13:56                 ` [PATCH v3 3/5] lib: move mbuf next pointer to first cache line Shijith Thotton
                                   ` (6 subsequent siblings)
  8 siblings, 2 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-21 13:56 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	david.marchand

mbuf physical address field is not used in builds which only uses VA. It
is used to expand the dynamic field area.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 lib/mbuf/rte_mbuf_core.h | 26 +++++++++++++++++---------
 lib/mbuf/rte_mbuf_dyn.c  |  2 ++
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index c6292e7252..94907f301d 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -579,15 +579,23 @@ struct rte_mbuf {
 	RTE_MARKER cacheline0;
 
 	void *buf_addr;           /**< Virtual address of segment buffer. */
-	/**
-	 * Physical address of segment buffer.
-	 * This field is invalid if the build is configured to use only
-	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
-	 * Force alignment to 8-bytes, so as to ensure we have the exact
-	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
-	 * working on vector drivers easier.
-	 */
-	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+	RTE_STD_C11
+	union {
+		/**
+		 * Physical address of segment buffer.
+		 * This field is invalid if the build is configured to use only
+		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
+		 * Force alignment to 8-bytes, so as to ensure we have the exact
+		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
+		 * working on vector drivers easier.
+		 */
+		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+		/**
+		 * Reserved for dynamic field in builds where physical address
+		 * field is invalid.
+		 */
+		uint64_t dynfield2;
+	};
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
 	RTE_MARKER64 rearm_data;
diff --git a/lib/mbuf/rte_mbuf_dyn.c b/lib/mbuf/rte_mbuf_dyn.c
index 4ae79383b5..6a4cf96897 100644
--- a/lib/mbuf/rte_mbuf_dyn.c
+++ b/lib/mbuf/rte_mbuf_dyn.c
@@ -128,6 +128,8 @@ init_shared_mem(void)
 		 */
 		memset(shm, 0, sizeof(*shm));
 		mark_free(dynfield1);
+		if (RTE_IOVA_AS_VA)
+			mark_free(dynfield2);
 
 		/* init free_flags */
 		for (mask = RTE_MBUF_F_FIRST_FREE; mask <= RTE_MBUF_F_LAST_FREE; mask <<= 1)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v3 3/5] lib: move mbuf next pointer to first cache line
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
  2022-09-21 13:56                 ` [PATCH v3 1/5] build: add meson option to configure IOVA mode as VA Shijith Thotton
  2022-09-21 13:56                 ` [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build Shijith Thotton
@ 2022-09-21 13:56                 ` Shijith Thotton
  2022-09-21 14:07                   ` Morten Brørup
  2022-09-28 12:52                   ` Olivier Matz
  2022-09-21 13:56                 ` [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
                                   ` (5 subsequent siblings)
  8 siblings, 2 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-21 13:56 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	david.marchand

Swapped position of mbuf next pointer and second dynamic field (dynfield2)
if the build is configured to use IOVA as VA. This is to move the mbuf
next pointer to first cache line. kni library is disabled for this
change as it depends on the offset value of next pointer.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 lib/mbuf/rte_mbuf_core.h | 29 +++++++++++++++++++++--------
 lib/meson.build          |  3 +++
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 94907f301d..915dcd8653 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -590,11 +590,14 @@ struct rte_mbuf {
 		 * working on vector drivers easier.
 		 */
 		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+#if RTE_IOVA_AS_VA
 		/**
-		 * Reserved for dynamic field in builds where physical address
-		 * field is invalid.
+		 * Next segment of scattered packet.
+		 * This field is valid when physical address field is invalid.
+		 * Otherwise next pointer in the second cache line will be used.
 		 */
-		uint64_t dynfield2;
+		struct rte_mbuf *next;
+#endif
 	};
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
@@ -711,11 +714,21 @@ struct rte_mbuf {
 	/* second cache line - fields only used in slow path or on TX */
 	RTE_MARKER cacheline1 __rte_cache_min_aligned;
 
-	/**
-	 * Next segment of scattered packet. Must be NULL in the last segment or
-	 * in case of non-segmented packet.
-	 */
-	struct rte_mbuf *next;
+	RTE_STD_C11
+	union {
+#if !RTE_IOVA_AS_VA
+		/**
+		 * Next segment of scattered packet. Must be NULL in the last
+		 * segment or in case of non-segmented packet.
+		 */
+		struct rte_mbuf *next;
+#endif
+		/**
+		 * Reserved for dynamic field when the next pointer is in first
+		 * cache line (i.e. RTE_IOVA_AS_VA is 1).
+		 */
+		uint64_t dynfield2;
+	};
 
 	/* fields to support TX offloads */
 	RTE_STD_C11
diff --git a/lib/meson.build b/lib/meson.build
index c648f7d800..73d93bc803 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -88,6 +88,9 @@ optional_libs = [
 disabled_libs = []
 opt_disabled_libs = run_command(list_dir_globs, get_option('disable_libs'),
         check: true).stdout().split()
+if dpdk_conf.get('RTE_IOVA_AS_VA') == 1
+    opt_disabled_libs += ['kni']
+endif
 foreach l:opt_disabled_libs
     if not optional_libs.contains(l)
         warning('Cannot disable mandatory library "@0@"'.format(l))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
                                   ` (2 preceding siblings ...)
  2022-09-21 13:56                 ` [PATCH v3 3/5] lib: move mbuf next pointer to first cache line Shijith Thotton
@ 2022-09-21 13:56                 ` Shijith Thotton
  2022-09-28 12:53                   ` Olivier Matz
  2022-10-07 20:17                   ` Olivier Matz
  2022-09-21 13:56                 ` [PATCH v3 5/5] drivers: mark software " Shijith Thotton
                                   ` (4 subsequent siblings)
  8 siblings, 2 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-21 13:56 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	david.marchand, Ruifeng Wang, Jan Viktorin, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Ankur Dwivedi,
	Anoob Joseph, Tejasree Kondoj, Radha Mohan Chintakuntla,
	Veerasenareddy Burru, Ashwin Sekhar T K, Jakub Palider,
	Tomasz Duszynski

Enabled the flag pmd_iova_as_va in cnxk driver build files as they work
with IOVA as VA. Updated cn9k and cn10k soc build configurations to
enable the IOVA as VA build by default.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 config/arm/meson.build                   |  8 +++-
 drivers/common/cnxk/meson.build          |  1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
 drivers/crypto/cnxk/meson.build          |  2 +
 drivers/dma/cnxk/meson.build             |  1 +
 drivers/event/cnxk/meson.build           |  1 +
 drivers/mempool/cnxk/meson.build         |  1 +
 drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
 drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
 drivers/net/cnxk/cnxk_ethdev.h           |  1 -
 drivers/net/cnxk/meson.build             |  1 +
 drivers/raw/cnxk_bphy/meson.build        |  1 +
 drivers/raw/cnxk_gpio/meson.build        |  1 +
 14 files changed, 50 insertions(+), 84 deletions(-)

diff --git a/config/arm/meson.build b/config/arm/meson.build
index 9f1636e0d5..4e95e8b388 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -294,7 +294,8 @@ soc_cn10k = {
     'flags': [
         ['RTE_MAX_LCORE', 24],
         ['RTE_MAX_NUMA_NODES', 1],
-        ['RTE_MEMPOOL_ALIGN', 128]
+        ['RTE_MEMPOOL_ALIGN', 128],
+        ['RTE_IOVA_AS_VA', 1]
     ],
     'part_number': '0xd49',
     'extra_march_features': ['crypto'],
@@ -370,7 +371,10 @@ soc_cn9k = {
     'description': 'Marvell OCTEON 9',
     'implementer': '0x43',
     'part_number': '0xb2',
-    'numa': false
+    'numa': false,
+    'flags': [
+        ['RTE_IOVA_AS_VA', 1]
+    ]
 }
 
 soc_stingray = {
diff --git a/drivers/common/cnxk/meson.build b/drivers/common/cnxk/meson.build
index 6f808271d1..d019cfa8d1 100644
--- a/drivers/common/cnxk/meson.build
+++ b/drivers/common/cnxk/meson.build
@@ -86,3 +86,4 @@ sources += files('cnxk_telemetry_bphy.c',
 )
 
 deps += ['bus_pci', 'net', 'telemetry']
+pmd_iova_as_va = true
diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
index 66cfe6ca98..16db14344d 100644
--- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
@@ -85,7 +85,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op *cop,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
@@ -102,7 +102,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn10k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
index e469596756..8b68e4c728 100644
--- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
@@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
+	inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->w7.u64 = sa->inst.w7;
 }
 #endif /* __CN9K_IPSEC_LA_OPS_H__ */
diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build
index 23a1cc3aac..764e7bb99a 100644
--- a/drivers/crypto/cnxk/meson.build
+++ b/drivers/crypto/cnxk/meson.build
@@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug')
 else
     cflags += [ '-ULA_IPSEC_DEBUG' ]
 endif
+
+pmd_iova_as_va = true
diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
index d4be4ee860..ef0e3db109 100644
--- a/drivers/dma/cnxk/meson.build
+++ b/drivers/dma/cnxk/meson.build
@@ -3,3 +3,4 @@
 
 deps += ['bus_pci', 'common_cnxk', 'dmadev']
 sources = files('cnxk_dmadev.c')
+pmd_iova_as_va = true
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index b27bae7b12..650d0d4256 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -479,3 +479,4 @@ foreach flag: extra_flags
 endforeach
 
 deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
+pmd_iova_as_va = true
diff --git a/drivers/mempool/cnxk/meson.build b/drivers/mempool/cnxk/meson.build
index d5d1978569..a328176457 100644
--- a/drivers/mempool/cnxk/meson.build
+++ b/drivers/mempool/cnxk/meson.build
@@ -17,3 +17,4 @@ sources = files(
 )
 
 deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool']
+pmd_iova_as_va = true
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index ea13866b20..2ef62da132 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -1775,14 +1775,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		mbuf2 = (uint64_t *)tx_pkts[2];
 		mbuf3 = (uint64_t *)tx_pkts[3];
 
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, buf_iova));
 		/*
 		 * Get mbuf's, olflags, iova, pktlen, dataoff
 		 * dataoff_iovaX.D[0] = iova,
@@ -1790,28 +1782,24 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		 * len_olflagsX.D[0] = ol_flags,
 		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
 		 */
-		dataoff_iova0 = vld1q_u64(mbuf0);
-		len_olflags0 = vld1q_u64(mbuf0 + 2);
-		dataoff_iova1 = vld1q_u64(mbuf1);
-		len_olflags1 = vld1q_u64(mbuf1 + 2);
-		dataoff_iova2 = vld1q_u64(mbuf2);
-		len_olflags2 = vld1q_u64(mbuf2 + 2);
-		dataoff_iova3 = vld1q_u64(mbuf3);
-		len_olflags3 = vld1q_u64(mbuf3 + 2);
+		dataoff_iova0 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
+		len_olflags0 = vld1q_u64(mbuf0 + 3);
+		dataoff_iova1 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf1), 1);
+		len_olflags1 = vld1q_u64(mbuf1 + 3);
+		dataoff_iova2 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf2), 1);
+		len_olflags2 = vld1q_u64(mbuf2 + 3);
+		dataoff_iova3 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf3), 1);
+		len_olflags3 = vld1q_u64(mbuf3 + 3);
 
 		/* Move mbufs to point pool */
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
+		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
+		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
+		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
+		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
 
 		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
 			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
@@ -1861,17 +1849,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
 		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
 
-		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
-		const uint64x2_t and_mask0 = {
-			0xFFFFFFFFFFFFFFFF,
-			0x000000000000FFFF,
-		};
-
-		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
-		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
-		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
-		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
-
 		/*
 		 * Pick only 16 bits of pktlen preset at bits 63:32
 		 * and place them at bits 15:0.
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 6ce81f5c96..f5d99ccb5a 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -1005,14 +1005,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		mbuf2 = (uint64_t *)tx_pkts[2];
 		mbuf3 = (uint64_t *)tx_pkts[3];
 
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, buf_iova));
 		/*
 		 * Get mbuf's, olflags, iova, pktlen, dataoff
 		 * dataoff_iovaX.D[0] = iova,
@@ -1020,28 +1012,24 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		 * len_olflagsX.D[0] = ol_flags,
 		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
 		 */
-		dataoff_iova0 = vld1q_u64(mbuf0);
-		len_olflags0 = vld1q_u64(mbuf0 + 2);
-		dataoff_iova1 = vld1q_u64(mbuf1);
-		len_olflags1 = vld1q_u64(mbuf1 + 2);
-		dataoff_iova2 = vld1q_u64(mbuf2);
-		len_olflags2 = vld1q_u64(mbuf2 + 2);
-		dataoff_iova3 = vld1q_u64(mbuf3);
-		len_olflags3 = vld1q_u64(mbuf3 + 2);
+		dataoff_iova0 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
+		len_olflags0 = vld1q_u64(mbuf0 + 3);
+		dataoff_iova1 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off, vld1q_u64(mbuf1), 1);
+		len_olflags1 = vld1q_u64(mbuf1 + 3);
+		dataoff_iova2 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off, vld1q_u64(mbuf2), 1);
+		len_olflags2 = vld1q_u64(mbuf2 + 3);
+		dataoff_iova3 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off, vld1q_u64(mbuf3), 1);
+		len_olflags3 = vld1q_u64(mbuf3 + 3);
 
 		/* Move mbufs to point pool */
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
+		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
+		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
+		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
+		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
 
 		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
 			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
@@ -1091,17 +1079,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
 		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
 
-		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
-		const uint64x2_t and_mask0 = {
-			0xFFFFFFFFFFFFFFFF,
-			0x000000000000FFFF,
-		};
-
-		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
-		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
-		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
-		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
-
 		/*
 		 * Pick only 16 bits of pktlen preset at bits 63:32
 		 * and place them at bits 15:0.
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4cb7c9e90c..abf1e4215f 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -690,7 +690,6 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index f347e98fce..01489b3a36 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -194,3 +194,4 @@ foreach flag: extra_flags
 endforeach
 
 headers = files('rte_pmd_cnxk.h')
+pmd_iova_as_va = true
diff --git a/drivers/raw/cnxk_bphy/meson.build b/drivers/raw/cnxk_bphy/meson.build
index 14147feaf4..781ed63e05 100644
--- a/drivers/raw/cnxk_bphy/meson.build
+++ b/drivers/raw/cnxk_bphy/meson.build
@@ -10,3 +10,4 @@ sources = files(
         'cnxk_bphy_irq.c',
 )
 headers = files('rte_pmd_bphy.h')
+pmd_iova_as_va = true
diff --git a/drivers/raw/cnxk_gpio/meson.build b/drivers/raw/cnxk_gpio/meson.build
index a75a5b9084..f9aed173b6 100644
--- a/drivers/raw/cnxk_gpio/meson.build
+++ b/drivers/raw/cnxk_gpio/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'cnxk_gpio_selftest.c',
 )
 headers = files('rte_pmd_cnxk_gpio.h')
+pmd_iova_as_va = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v3 5/5] drivers: mark software PMDs work with IOVA as VA
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
                                   ` (3 preceding siblings ...)
  2022-09-21 13:56                 ` [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
@ 2022-09-21 13:56                 ` Shijith Thotton
  2022-09-28  5:41                 ` [PATCH v3 0/5] mbuf dynamic field expansion Shijith Thotton
                                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-21 13:56 UTC (permalink / raw)
  To: dev
  Cc: pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen, thomas,
	david.marchand, Ruifeng Wang, Fan Zhang, Pablo de Lara,
	Chengwen Feng, Kevin Laatz, Mattias Rönnblom, Liang Ma,
	Peter Mccarthy, Harry van Haaren, Artem V. Andreev,
	Andrew Rybchenko, John W. Linville, Ciara Loftus, Qi Zhang,
	Chas Williams, Min Hu (Connor),
	Gaetan Rivet, Jakub Grajciar, Tetsuya Mukawa, Sachin Saxena,
	Hemant Agrawal

Enabled software PMDs in IOVA as VA build as they work with IOVA as VA.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 drivers/crypto/armv8/meson.build    | 1 +
 drivers/crypto/ipsec_mb/meson.build | 1 +
 drivers/crypto/null/meson.build     | 1 +
 drivers/crypto/openssl/meson.build  | 1 +
 drivers/dma/skeleton/meson.build    | 1 +
 drivers/event/dsw/meson.build       | 1 +
 drivers/event/opdl/meson.build      | 1 +
 drivers/event/skeleton/meson.build  | 1 +
 drivers/event/sw/meson.build        | 1 +
 drivers/mempool/bucket/meson.build  | 1 +
 drivers/mempool/ring/meson.build    | 1 +
 drivers/mempool/stack/meson.build   | 1 +
 drivers/net/af_packet/meson.build   | 1 +
 drivers/net/af_xdp/meson.build      | 2 ++
 drivers/net/bonding/meson.build     | 1 +
 drivers/net/failsafe/meson.build    | 1 +
 drivers/net/memif/meson.build       | 1 +
 drivers/net/null/meson.build        | 1 +
 drivers/net/pcap/meson.build        | 1 +
 drivers/net/ring/meson.build        | 1 +
 drivers/net/tap/meson.build         | 1 +
 drivers/raw/skeleton/meson.build    | 1 +
 22 files changed, 23 insertions(+)

diff --git a/drivers/crypto/armv8/meson.build b/drivers/crypto/armv8/meson.build
index 5effba8bbc..a2c9d69e3f 100644
--- a/drivers/crypto/armv8/meson.build
+++ b/drivers/crypto/armv8/meson.build
@@ -17,3 +17,4 @@ endif
 ext_deps += dep
 deps += ['bus_vdev']
 sources = files('rte_armv8_pmd.c', 'rte_armv8_pmd_ops.c')
+pmd_iova_as_va = true
diff --git a/drivers/crypto/ipsec_mb/meson.build b/drivers/crypto/ipsec_mb/meson.build
index a89b29d6c3..785440b593 100644
--- a/drivers/crypto/ipsec_mb/meson.build
+++ b/drivers/crypto/ipsec_mb/meson.build
@@ -37,3 +37,4 @@ sources = files(
         'pmd_zuc.c',
 )
 deps += ['bus_vdev', 'net', 'security']
+pmd_iova_as_va = true
diff --git a/drivers/crypto/null/meson.build b/drivers/crypto/null/meson.build
index acc16e7d81..68dc030075 100644
--- a/drivers/crypto/null/meson.build
+++ b/drivers/crypto/null/meson.build
@@ -9,3 +9,4 @@ endif
 
 deps += 'bus_vdev'
 sources = files('null_crypto_pmd.c', 'null_crypto_pmd_ops.c')
+pmd_iova_as_va = true
diff --git a/drivers/crypto/openssl/meson.build b/drivers/crypto/openssl/meson.build
index cd962da1d6..25c44d0064 100644
--- a/drivers/crypto/openssl/meson.build
+++ b/drivers/crypto/openssl/meson.build
@@ -15,3 +15,4 @@ endif
 deps += 'bus_vdev'
 sources = files('rte_openssl_pmd.c', 'rte_openssl_pmd_ops.c')
 ext_deps += dep
+pmd_iova_as_va = true
diff --git a/drivers/dma/skeleton/meson.build b/drivers/dma/skeleton/meson.build
index 8871b80956..2b48d4e031 100644
--- a/drivers/dma/skeleton/meson.build
+++ b/drivers/dma/skeleton/meson.build
@@ -5,3 +5,4 @@ deps += ['dmadev', 'kvargs', 'ring', 'bus_vdev']
 sources = files(
         'skeleton_dmadev.c',
 )
+pmd_iova_as_va = true
diff --git a/drivers/event/dsw/meson.build b/drivers/event/dsw/meson.build
index 2df0fac4ff..477a6e5910 100644
--- a/drivers/event/dsw/meson.build
+++ b/drivers/event/dsw/meson.build
@@ -6,3 +6,4 @@ if cc.has_argument('-Wno-format-nonliteral')
     cflags += '-Wno-format-nonliteral'
 endif
 sources = files('dsw_evdev.c', 'dsw_event.c', 'dsw_xstats.c')
+pmd_iova_as_va = true
diff --git a/drivers/event/opdl/meson.build b/drivers/event/opdl/meson.build
index 786d2f4e82..e1a3de7ee3 100644
--- a/drivers/event/opdl/meson.build
+++ b/drivers/event/opdl/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'opdl_test.c',
 )
 deps += ['bus_vdev']
+pmd_iova_as_va = true
diff --git a/drivers/event/skeleton/meson.build b/drivers/event/skeleton/meson.build
index acfe156532..0ae514668c 100644
--- a/drivers/event/skeleton/meson.build
+++ b/drivers/event/skeleton/meson.build
@@ -3,3 +3,4 @@
 
 sources = files('skeleton_eventdev.c')
 deps += ['bus_pci', 'bus_vdev']
+pmd_iova_as_va = true
diff --git a/drivers/event/sw/meson.build b/drivers/event/sw/meson.build
index 6f81567efb..210cc1d048 100644
--- a/drivers/event/sw/meson.build
+++ b/drivers/event/sw/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'sw_evdev.c',
 )
 deps += ['hash', 'bus_vdev']
+pmd_iova_as_va = true
diff --git a/drivers/mempool/bucket/meson.build b/drivers/mempool/bucket/meson.build
index 0051b6ac3c..31ba101677 100644
--- a/drivers/mempool/bucket/meson.build
+++ b/drivers/mempool/bucket/meson.build
@@ -12,3 +12,4 @@ if is_windows
 endif
 
 sources = files('rte_mempool_bucket.c')
+pmd_iova_as_va = true
diff --git a/drivers/mempool/ring/meson.build b/drivers/mempool/ring/meson.build
index a021e908cf..f75f2125d7 100644
--- a/drivers/mempool/ring/meson.build
+++ b/drivers/mempool/ring/meson.build
@@ -2,3 +2,4 @@
 # Copyright(c) 2017 Intel Corporation
 
 sources = files('rte_mempool_ring.c')
+pmd_iova_as_va = true
diff --git a/drivers/mempool/stack/meson.build b/drivers/mempool/stack/meson.build
index 580dde79eb..3b94ed5b5e 100644
--- a/drivers/mempool/stack/meson.build
+++ b/drivers/mempool/stack/meson.build
@@ -4,3 +4,4 @@
 sources = files('rte_mempool_stack.c')
 
 deps += ['stack']
+pmd_iova_as_va = true
diff --git a/drivers/net/af_packet/meson.build b/drivers/net/af_packet/meson.build
index c014e9b61b..92fafea363 100644
--- a/drivers/net/af_packet/meson.build
+++ b/drivers/net/af_packet/meson.build
@@ -6,3 +6,4 @@ if not is_linux
     reason = 'only supported on Linux'
 endif
 sources = files('rte_eth_af_packet.c')
+pmd_iova_as_va = true
diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build
index 1e0de23705..35812511e2 100644
--- a/drivers/net/af_xdp/meson.build
+++ b/drivers/net/af_xdp/meson.build
@@ -55,3 +55,5 @@ else
     build = false
     reason = 'missing header, "linux/if_xdp.h"'
 endif
+
+pmd_iova_as_va = true
diff --git a/drivers/net/bonding/meson.build b/drivers/net/bonding/meson.build
index 18ad7e21f3..b61166888e 100644
--- a/drivers/net/bonding/meson.build
+++ b/drivers/net/bonding/meson.build
@@ -22,3 +22,4 @@ deps += 'sched' # needed for rte_bitmap.h
 deps += ['ip_frag']
 
 headers = files('rte_eth_bond.h', 'rte_eth_bond_8023ad.h')
+pmd_iova_as_va = true
diff --git a/drivers/net/failsafe/meson.build b/drivers/net/failsafe/meson.build
index b8e5bf70f8..a90be869d1 100644
--- a/drivers/net/failsafe/meson.build
+++ b/drivers/net/failsafe/meson.build
@@ -27,3 +27,4 @@ sources = files(
         'failsafe_ops.c',
         'failsafe_rxtx.c',
 )
+pmd_iova_as_va = true
diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
index 680bc8631c..59140dc3dd 100644
--- a/drivers/net/memif/meson.build
+++ b/drivers/net/memif/meson.build
@@ -12,3 +12,4 @@ sources = files(
 )
 
 deps += ['hash']
+pmd_iova_as_va = true
diff --git a/drivers/net/null/meson.build b/drivers/net/null/meson.build
index 0251578aab..6b7adbd760 100644
--- a/drivers/net/null/meson.build
+++ b/drivers/net/null/meson.build
@@ -8,3 +8,4 @@ if is_windows
 endif
 
 sources = files('rte_eth_null.c')
+pmd_iova_as_va = true
diff --git a/drivers/net/pcap/meson.build b/drivers/net/pcap/meson.build
index ed7864eb9d..73c65dd2a6 100644
--- a/drivers/net/pcap/meson.build
+++ b/drivers/net/pcap/meson.build
@@ -15,3 +15,4 @@ ext_deps += pcap_dep
 if is_windows
     ext_deps += cc.find_library('iphlpapi', required: true)
 endif
+pmd_iova_as_va = true
diff --git a/drivers/net/ring/meson.build b/drivers/net/ring/meson.build
index 0156b37aad..45fa3492cf 100644
--- a/drivers/net/ring/meson.build
+++ b/drivers/net/ring/meson.build
@@ -9,3 +9,4 @@ endif
 
 sources = files('rte_eth_ring.c')
 headers = files('rte_eth_ring.h')
+pmd_iova_as_va = true
diff --git a/drivers/net/tap/meson.build b/drivers/net/tap/meson.build
index c09713a67b..da23599830 100644
--- a/drivers/net/tap/meson.build
+++ b/drivers/net/tap/meson.build
@@ -35,3 +35,4 @@ foreach arg:args
     config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
 endforeach
 configure_file(output : 'tap_autoconf.h', configuration : config)
+pmd_iova_as_va = true
diff --git a/drivers/raw/skeleton/meson.build b/drivers/raw/skeleton/meson.build
index 950a33cc20..439ab8792d 100644
--- a/drivers/raw/skeleton/meson.build
+++ b/drivers/raw/skeleton/meson.build
@@ -6,3 +6,4 @@ sources = files(
         'skeleton_rawdev.c',
         'skeleton_rawdev_test.c',
 )
+pmd_iova_as_va = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v2 0/5] mbuf dynamic field expansion
  2022-09-21  9:43               ` David Marchand
@ 2022-09-21 14:01                 ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-21 14:01 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, olivier.matz,
	stephen, thomas

>>
>> This is a continuation of the discussions[1] to add mbuf physical address field to
>dynamic field.
>> Previous version was to add PA field to dynamic field area based on the EAL
>IOVA mode option. It was
>> deemed unsafe as some components could still use the PA field without
>checking IOVA mode and there
>> are drivers which need PA to work. One suggestion was to make the IOVA mode
>check at compile time so
>> that drivers which need PA can be disabled during build. This series adds this
>new meson build
>> options. Second patch adds mbuf PA field to dynamic field on such builds. Last
>two patches enable
>> Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without
>PA field.
>>
>> 1. https://urldefense.proofpoint.com/v2/url?u=https-
>3A__inbox.dpdk.org_dev_57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605
>763.git.sthotton-
>40marvell.com_&d=DwIBaQ&c=nKjWec2b6R0mOyPaz7xtfQ&r=G9w4KsPaQLACBf
>GCL35PtiRH996yqJDxAZwrWegU2qQ&m=RVZ3bqz5IbpylWX3m-
>OEHVvbDom50kJbVz2VAWw9H3FdDKDd_gCWP2HM3bKpEVrq&s=M9Pruz-
>xzKgnYz6wvM-rH2539EZ4H_lflREfPsIZU7M&e=  .
>>
>> v2:
>>  * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
>>  * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.
>
>It seems there was an issue with receiving this series from patchwork pov.
>Please resend this v2 so that we get it through the CI.
>

I have sent v3 (https://patchwork.dpdk.org/project/dpdk/list/?series=24751).
Please try pulling it. I forgot to add thread info in v2.

Thanks,
Shijith

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [PATCH v3 3/5] lib: move mbuf next pointer to first cache line
  2022-09-21 13:56                 ` [PATCH v3 3/5] lib: move mbuf next pointer to first cache line Shijith Thotton
@ 2022-09-21 14:07                   ` Morten Brørup
  2022-09-28 12:52                   ` Olivier Matz
  1 sibling, 0 replies; 88+ messages in thread
From: Morten Brørup @ 2022-09-21 14:07 UTC (permalink / raw)
  To: Shijith Thotton, dev
  Cc: pbhagavatula, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	olivier.matz, stephen, thomas, david.marchand

> From: Shijith Thotton [mailto:sthotton@marvell.com]
> Sent: Wednesday, 21 September 2022 15.56
> 
> Swapped position of mbuf next pointer and second dynamic field
> (dynfield2)
> if the build is configured to use IOVA as VA. This is to move the mbuf
> next pointer to first cache line. kni library is disabled for this
> change as it depends on the offset value of next pointer.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---

Series-Acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [PATCH v3 0/5] mbuf dynamic field expansion
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
                                   ` (4 preceding siblings ...)
  2022-09-21 13:56                 ` [PATCH v3 5/5] drivers: mark software " Shijith Thotton
@ 2022-09-28  5:41                 ` Shijith Thotton
  2022-09-28 12:52                 ` Olivier Matz
                                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-28  5:41 UTC (permalink / raw)
  To: Shijith Thotton, dev
  Cc: Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, olivier.matz,
	stephen, thomas, david.marchand

>
>This is a continuation of the discussions[1] to add mbuf physical address field to
>dynamic field.
>Previous version was to add PA field to dynamic field area based on the EAL IOVA
>mode option. It was
>deemed unsafe as some components could still use the PA field without checking
>IOVA mode and there
>are drivers which need PA to work. One suggestion was to make the IOVA mode
>check at compile time so
>that drivers which need PA can be disabled during build. This series adds this new
>meson build
>options. Second patch adds mbuf PA field to dynamic field on such builds. Last two
>patches enable
>Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without
>PA field.
>
>1.
>https://inbox.dpdk.org/dev/57d2ab7fff672716d37ba4078e2e3bb2db126607.16566
>05763.git.sthotton@marvell.com/.
>
>v3:
> * Cleared use of buf_iova from cnxk PMD.
>
>v2:
> * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
> * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.
>
>Shijith Thotton (5):
>  build: add meson option to configure IOVA mode as VA
>  mbuf: add second dynamic field member for VA only build
>  lib: move mbuf next pointer to first cache line
>  drivers: mark Marvell cnxk PMDs work with IOVA as VA
>  drivers: mark software PMDs work with IOVA as VA
>

Hi All,

Please comment if any changes are needed on the series.
Right now, there is 1 ack from Morten.

Thanks,
Shijith

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build
  2022-09-21 13:56                 ` [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build Shijith Thotton
@ 2022-09-28  7:24                   ` Thomas Monjalon
  2022-09-28 12:52                     ` Olivier Matz
  2022-09-28 12:52                   ` Olivier Matz
  1 sibling, 1 reply; 88+ messages in thread
From: Thomas Monjalon @ 2022-09-28  7:24 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen,
	david.marchand

21/09/2022 15:56, Shijith Thotton:
> mbuf physical address field is not used in builds which only uses VA. It
> is used to expand the dynamic field area.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>

We cannot condition the use of the dynamic field.
I think it is enough justification to reject this patch.

And about adding a compilation option for IOVA in the first patch of this series,
I think it is not the direction the majority wants DPDK to go.
We tend to avoid compilation options.

> @@ -579,15 +579,23 @@ struct rte_mbuf {
>  	RTE_MARKER cacheline0;
>  
>  	void *buf_addr;           /**< Virtual address of segment buffer. */
> -	/**
> -	 * Physical address of segment buffer.
> -	 * This field is invalid if the build is configured to use only
> -	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
> -	 * Force alignment to 8-bytes, so as to ensure we have the exact
> -	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
> -	 * working on vector drivers easier.
> -	 */
> -	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> +	RTE_STD_C11
> +	union {
> +		/**
> +		 * Physical address of segment buffer.
> +		 * This field is invalid if the build is configured to use only
> +		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
> +		 * Force alignment to 8-bytes, so as to ensure we have the exact
> +		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
> +		 * working on vector drivers easier.
> +		 */
> +		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> +		/**
> +		 * Reserved for dynamic field in builds where physical address
> +		 * field is invalid.
> +		 */
> +		uint64_t dynfield2;
> +	};




^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 0/5] mbuf dynamic field expansion
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
                                   ` (5 preceding siblings ...)
  2022-09-28  5:41                 ` [PATCH v3 0/5] mbuf dynamic field expansion Shijith Thotton
@ 2022-09-28 12:52                 ` Olivier Matz
  2022-09-29  4:51                   ` [EXT] " Shijith Thotton
  2022-10-07 13:50                 ` Thomas Monjalon
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
  8 siblings, 1 reply; 88+ messages in thread
From: Olivier Matz @ 2022-09-28 12:52 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Honnappa.Nagarahalli, bruce.richardson,
	jerinj, mb, stephen, thomas, david.marchand

Hi Shijith,

On Wed, Sep 21, 2022 at 07:26:16PM +0530, Shijith Thotton wrote:
> This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
> Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
> deemed unsafe as some components could still use the PA field without checking IOVA mode and there
> are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
> that drivers which need PA can be disabled during build. This series adds this new meson build
> options. Second patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
> Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without PA field.

Thank you for this patchset.

To be honnest, initially I was really reserved to remove the use of
buf_iova for some specific platforms.

But what made me change my mind is that the removal if buf_iova will
likely happen in the long-term future. It looks there is a consensus on
this. I think your patchset is a good way to prepare this transition.

What is missing, I think, is a good description of the problem you are
solving:

- more space for dynamic mbuf fields -> why? can you give more detail about
  this need?
- increase performance -> you previously said that it was not your point,
  but if we move the next field into the first cache line, I think this
  has to be highlighted. Out of curiosity, did you made measurements?

I'm sending separate comments as replies to the patches.

Olivier


> 
> 1. https://inbox.dpdk.org/dev/57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605763.git.sthotton@marvell.com/.
> 
> v3:
>  * Cleared use of buf_iova from cnxk PMD.
> 
> v2:
>  * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
>  * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.
> 
> Shijith Thotton (5):
>   build: add meson option to configure IOVA mode as VA
>   mbuf: add second dynamic field member for VA only build
>   lib: move mbuf next pointer to first cache line
>   drivers: mark Marvell cnxk PMDs work with IOVA as VA
>   drivers: mark software PMDs work with IOVA as VA
> 
>  app/test-bbdev/test_bbdev_perf.c         |  2 +-
>  app/test-crypto-perf/cperf_test_common.c |  5 +--
>  app/test/test_bpf.c                      |  2 +-
>  app/test/test_dmadev.c                   | 33 ++++++--------
>  app/test/test_mbuf.c                     | 12 +++---
>  app/test/test_pcapng.c                   |  2 +-
>  config/arm/meson.build                   |  8 +++-
>  config/meson.build                       |  1 +
>  drivers/common/cnxk/meson.build          |  1 +
>  drivers/crypto/armv8/meson.build         |  1 +
>  drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
>  drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
>  drivers/crypto/cnxk/meson.build          |  2 +
>  drivers/crypto/ipsec_mb/meson.build      |  1 +
>  drivers/crypto/null/meson.build          |  1 +
>  drivers/crypto/openssl/meson.build       |  1 +
>  drivers/dma/cnxk/meson.build             |  1 +
>  drivers/dma/skeleton/meson.build         |  1 +
>  drivers/event/cnxk/meson.build           |  1 +
>  drivers/event/dsw/meson.build            |  1 +
>  drivers/event/opdl/meson.build           |  1 +
>  drivers/event/skeleton/meson.build       |  1 +
>  drivers/event/sw/meson.build             |  1 +
>  drivers/mempool/bucket/meson.build       |  1 +
>  drivers/mempool/cnxk/meson.build         |  1 +
>  drivers/mempool/ring/meson.build         |  1 +
>  drivers/mempool/stack/meson.build        |  1 +
>  drivers/meson.build                      |  6 +++
>  drivers/net/af_packet/meson.build        |  1 +
>  drivers/net/af_xdp/meson.build           |  2 +
>  drivers/net/bonding/meson.build          |  1 +
>  drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
>  drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
>  drivers/net/cnxk/cnxk_ethdev.h           |  1 -
>  drivers/net/cnxk/meson.build             |  1 +
>  drivers/net/failsafe/meson.build         |  1 +
>  drivers/net/memif/meson.build            |  1 +
>  drivers/net/null/meson.build             |  1 +
>  drivers/net/pcap/meson.build             |  1 +
>  drivers/net/ring/meson.build             |  1 +
>  drivers/net/tap/meson.build              |  1 +
>  drivers/raw/cnxk_bphy/meson.build        |  1 +
>  drivers/raw/cnxk_gpio/meson.build        |  1 +
>  drivers/raw/skeleton/meson.build         |  1 +
>  lib/eal/linux/eal.c                      |  7 +++
>  lib/mbuf/rte_mbuf.c                      |  8 ++--
>  lib/mbuf/rte_mbuf.h                      | 17 +++++---
>  lib/mbuf/rte_mbuf_core.h                 | 55 ++++++++++++++++++------
>  lib/mbuf/rte_mbuf_dyn.c                  |  2 +
>  lib/meson.build                          |  3 ++
>  lib/vhost/vhost.h                        |  2 +-
>  lib/vhost/vhost_crypto.c                 | 54 +++++++++++++++++------
>  meson_options.txt                        |  2 +
>  53 files changed, 220 insertions(+), 150 deletions(-)
> 
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 1/5] build: add meson option to configure IOVA mode as VA
  2022-09-21 13:56                 ` [PATCH v3 1/5] build: add meson option to configure IOVA mode as VA Shijith Thotton
@ 2022-09-28 12:52                   ` Olivier Matz
  2022-09-29  5:48                     ` [EXT] " Shijith Thotton
  0 siblings, 1 reply; 88+ messages in thread
From: Olivier Matz @ 2022-09-28 12:52 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Honnappa.Nagarahalli, bruce.richardson,
	jerinj, mb, stephen, thomas, david.marchand, Nicolas Chautru,
	Ciara Power, Konstantin Ananyev, Chengwen Feng, Kevin Laatz,
	Reshma Pattan, Maxime Coquelin, Chenbo Xia

On Wed, Sep 21, 2022 at 07:26:17PM +0530, Shijith Thotton wrote:
> IOVA mode in DPDK is either PA or VA. The new build option iova_as_va
> configures the mode to VA at compile time and prevents setting it to PA
> at runtime. For now, all drivers which are not always enabled are
> disabled with this option. Supported driver can set the flag
> pmd_iova_as_va in its build file to enable build.
> 
> mbuf structure holds the physical (PA) and virtual address (VA) of a
> buffer. if IOVA mode is set to VA, PA is redundant as it is the same as
> VA. So PA field need not be updated and marked invalid if the build is
> configured to use only VA.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---
>  app/test-bbdev/test_bbdev_perf.c         |  2 +-
>  app/test-crypto-perf/cperf_test_common.c |  5 +--
>  app/test/test_bpf.c                      |  2 +-
>  app/test/test_dmadev.c                   | 33 ++++++---------
>  app/test/test_mbuf.c                     | 12 +++---
>  app/test/test_pcapng.c                   |  2 +-
>  config/meson.build                       |  1 +
>  drivers/meson.build                      |  6 +++
>  lib/eal/linux/eal.c                      |  7 +++
>  lib/mbuf/rte_mbuf.c                      |  8 ++--
>  lib/mbuf/rte_mbuf.h                      | 17 +++++---
>  lib/mbuf/rte_mbuf_core.h                 | 10 +++++
>  lib/vhost/vhost.h                        |  2 +-
>  lib/vhost/vhost_crypto.c                 | 54 ++++++++++++++++++------
>  meson_options.txt                        |  2 +
>  15 files changed, 109 insertions(+), 54 deletions(-)
> 
> diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
> index 8fab52d821..f6aa25b67d 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -1001,7 +1001,7 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
>  					seg->length);
>  				memcpy(data, seg->addr, seg->length);
>  				m_head->buf_addr = data;
> -				m_head->buf_iova = rte_malloc_virt2iova(data);
> +				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));

Wouldn't it be better to have a preliminary patch that replaces direct
accesses to m->buf_iova by rte_mbuf_iova_*() functions in app and libs?
This would make this commit smaller to read.

If I understand properly, the drivers/ part has to be done at the same time
than setting "pmd_iova_as_va" in the meson config.

>  				m_head->data_off = 0;
>  				m_head->data_len = seg->length;
>  			} else {
> diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-perf/cperf_test_common.c
> index 00aadc9a47..27646cd619 100644
> --- a/app/test-crypto-perf/cperf_test_common.c
> +++ b/app/test-crypto-perf/cperf_test_common.c
> @@ -26,8 +26,7 @@ fill_single_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
>  	/* start of buffer is after mbuf structure and priv data */
>  	m->priv_size = 0;
>  	m->buf_addr = (char *)m + mbuf_hdr_size;
> -	m->buf_iova = rte_mempool_virt2iova(obj) +
> -		mbuf_offset + mbuf_hdr_size;
> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(obj) + mbuf_offset + mbuf_hdr_size);
>  	m->buf_len = segment_sz;
>  	m->data_len = data_len;
>  	m->pkt_len = data_len;
> @@ -58,7 +57,7 @@ fill_multi_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
>  		/* start of buffer is after mbuf structure and priv data */
>  		m->priv_size = 0;
>  		m->buf_addr = (char *)m + mbuf_hdr_size;
> -		m->buf_iova = next_seg_phys_addr;
> +		rte_mbuf_iova_set(m, next_seg_phys_addr);
>  		next_seg_phys_addr += mbuf_hdr_size + segment_sz;
>  		m->buf_len = segment_sz;
>  		m->data_len = data_len;
> diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
> index 97f500809e..f5af5e8a3f 100644
> --- a/app/test/test_bpf.c
> +++ b/app/test/test_bpf.c
> @@ -2600,7 +2600,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
>  	uint8_t *db;
>  
>  	mb->buf_addr = buf;
> -	mb->buf_iova = (uintptr_t)buf;
> +	rte_mbuf_iova_set(mb, (uintptr_t)buf);
>  	mb->buf_len = buf_len;
>  	rte_mbuf_refcnt_set(mb, 1);
>  
> diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
> index 9e8e101f40..8306947eda 100644
> --- a/app/test/test_dmadev.c
> +++ b/app/test/test_dmadev.c
> @@ -110,8 +110,8 @@ do_multi_copies(int16_t dev_id, uint16_t vchan,
>  		for (j = 0; j < COPY_LEN/sizeof(uint64_t); j++)
>  			src_data[j] = rte_rand();
>  
> -		if (rte_dma_copy(dev_id, vchan, srcs[i]->buf_iova + srcs[i]->data_off,
> -				dsts[i]->buf_iova + dsts[i]->data_off, COPY_LEN, 0) != id_count++)
> +		if (rte_dma_copy(dev_id, vchan, rte_pktmbuf_iova_offset(srcs[i], 0),
> +				 rte_pktmbuf_iova_offset(dsts[i], 0), COPY_LEN, 0) != id_count++)
>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
>  	}
>  	rte_dma_submit(dev_id, vchan);
> @@ -317,9 +317,8 @@ test_failure_in_full_burst(int16_t dev_id, uint16_t vchan, bool fence,
>  	rte_dma_stats_get(dev_id, vchan, &baseline); /* get a baseline set of stats */
>  	for (i = 0; i < COMP_BURST_SZ; i++) {
>  		int id = rte_dma_copy(dev_id, vchan,
> -				(i == fail_idx ? 0 : (srcs[i]->buf_iova + srcs[i]->data_off)),
> -				dsts[i]->buf_iova + dsts[i]->data_off,
> -				COPY_LEN, OPT_FENCE(i));
> +				      (i == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[i], 0)),
> +				      rte_pktmbuf_iova_offset(dsts[i], 0), COPY_LEN, OPT_FENCE(i));
>  		if (id < 0)
>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
>  		if (i == fail_idx)
> @@ -407,9 +406,8 @@ test_individual_status_query_with_failure(int16_t dev_id, uint16_t vchan, bool f
>  
>  	for (j = 0; j < COMP_BURST_SZ; j++) {
>  		int id = rte_dma_copy(dev_id, vchan,
> -				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
> -				dsts[j]->buf_iova + dsts[j]->data_off,
> -				COPY_LEN, OPT_FENCE(j));
> +				      (j == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[j], 0)),
> +				      rte_pktmbuf_iova_offset(dsts[j], 0), COPY_LEN, OPT_FENCE(j));
>  		if (id < 0)
>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
>  		if (j == fail_idx)
> @@ -470,9 +468,8 @@ test_single_item_status_query_with_failure(int16_t dev_id, uint16_t vchan,
>  
>  	for (j = 0; j < COMP_BURST_SZ; j++) {
>  		int id = rte_dma_copy(dev_id, vchan,
> -				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
> -				dsts[j]->buf_iova + dsts[j]->data_off,
> -				COPY_LEN, 0);
> +				      (j == fail_idx ? 0 : rte_pktmbuf_iova_offset(srcs[j], 0)),
> +				      rte_pktmbuf_iova_offset(dsts[j], 0), COPY_LEN, 0);
>  		if (id < 0)
>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
>  		if (j == fail_idx)
> @@ -529,15 +526,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
>  
>  	/* enqueue and gather completions in one go */
>  	for (j = 0; j < COMP_BURST_SZ; j++) {
> -		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
> +		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
>  		/* set up for failure if the current index is anywhere is the fails array */
>  		for (i = 0; i < num_fail; i++)
>  			if (j == fail[i])
>  				src = 0;
>  
> -		int id = rte_dma_copy(dev_id, vchan,
> -				src, dsts[j]->buf_iova + dsts[j]->data_off,
> -				COPY_LEN, 0);
> +		int id = rte_dma_copy(dev_id, vchan, src, rte_pktmbuf_iova_offset(dsts[j], 0),
> +				      COPY_LEN, 0);
>  		if (id < 0)
>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
>  	}
> @@ -565,15 +561,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
>  
>  	/* enqueue and gather completions in bursts, but getting errors one at a time */
>  	for (j = 0; j < COMP_BURST_SZ; j++) {
> -		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
> +		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
>  		/* set up for failure if the current index is anywhere is the fails array */
>  		for (i = 0; i < num_fail; i++)
>  			if (j == fail[i])
>  				src = 0;
>  
> -		int id = rte_dma_copy(dev_id, vchan,
> -				src, dsts[j]->buf_iova + dsts[j]->data_off,
> -				COPY_LEN, 0);
> +		int id = rte_dma_copy(dev_id, vchan, src, rte_pktmbuf_iova_offset(dsts[j], 0),
> +				      COPY_LEN, 0);
>  		if (id < 0)
>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
>  	}
> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> index e09b2549ca..45431f2c9c 100644
> --- a/app/test/test_mbuf.c
> +++ b/app/test/test_mbuf.c
> @@ -1232,11 +1232,13 @@ test_failing_mbuf_sanity_check(struct rte_mempool *pktmbuf_pool)
>  		return -1;
>  	}
>  
> -	badbuf = *buf;
> -	badbuf.buf_iova = 0;
> -	if (verify_mbuf_check_panics(&badbuf)) {
> -		printf("Error with bad-physaddr mbuf test\n");
> -		return -1;
> +	if (!RTE_IOVA_AS_VA) {
> +		badbuf = *buf;
> +		rte_mbuf_iova_set(&badbuf, 0);
> +		if (verify_mbuf_check_panics(&badbuf)) {
> +			printf("Error with bad-physaddr mbuf test\n");
> +			return -1;
> +		}
>  	}
>  
>  	badbuf = *buf;
> diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
> index 320dacea34..abbf00f6da 100644
> --- a/app/test/test_pcapng.c
> +++ b/app/test/test_pcapng.c
> @@ -40,7 +40,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
>  	uint8_t *db;
>  
>  	mb->buf_addr = buf;
> -	mb->buf_iova = (uintptr_t)buf;
> +	rte_mbuf_iova_set(mb, (uintptr_t)buf);
>  	mb->buf_len = buf_len;
>  	rte_mbuf_refcnt_set(mb, 1);
>  
> diff --git a/config/meson.build b/config/meson.build
> index 7f7b6c92fd..6b6c3e7eb6 100644
> --- a/config/meson.build
> +++ b/config/meson.build
> @@ -309,6 +309,7 @@ endif
>  if get_option('mbuf_refcnt_atomic')
>      dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
>  endif
> +dpdk_conf.set10('RTE_IOVA_AS_VA', get_option('iova_as_va'))
>  
>  compile_time_cpuflags = []
>  subdir(arch_subdir)
> diff --git a/drivers/meson.build b/drivers/meson.build
> index 376a64f4da..989770cffd 100644
> --- a/drivers/meson.build
> +++ b/drivers/meson.build
> @@ -105,6 +105,7 @@ foreach subpath:subdirs
>          ext_deps = []
>          pkgconfig_extra_libs = []
>          testpmd_sources = []
> +        pmd_iova_as_va = false

This option should be documented, however I don't know where is the proper
place. A comment here would be a good start I think.

I'm trying to find a more explicit name, but it's not easy.
What do you think about pmd_supports_disable_iova_as_pa?

Explicit is always better, it could avoid someone adding a new driver to
blindly copy the flag from a template driver.

>  
>          if not enable_drivers.contains(drv_path)
>              build = false
> @@ -122,6 +123,11 @@ foreach subpath:subdirs
>              # pull in driver directory which should update all the local variables
>              subdir(drv_path)
>  
> +            if dpdk_conf.get('RTE_IOVA_AS_VA') == 1 and not pmd_iova_as_va and not always_enable.contains(drv_path)
> +                build = false
> +                reason = 'driver does not support IOVA as VA mode'
> +            endif
> +
>              # get dependency objs from strings
>              shared_deps = ext_deps
>              static_deps = ext_deps
> diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
> index 37d29643a5..b70c4dcc5f 100644
> --- a/lib/eal/linux/eal.c
> +++ b/lib/eal/linux/eal.c
> @@ -1127,6 +1127,13 @@ rte_eal_init(int argc, char **argv)
>  		return -1;
>  	}
>  
> +	if (rte_eal_iova_mode() == RTE_IOVA_PA && RTE_IOVA_AS_VA) {
> +		rte_eal_init_alert(
> +			"Cannot use IOVA as 'PA' since build is configured to use only 'VA'");
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +
>  	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
>  		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
>  
> diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
> index a2307cebe6..5af290c53a 100644
> --- a/lib/mbuf/rte_mbuf.c
> +++ b/lib/mbuf/rte_mbuf.c
> @@ -89,7 +89,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
>  	/* start of buffer is after mbuf structure and priv data */
>  	m->priv_size = priv_size;
>  	m->buf_addr = (char *)m + mbuf_size;
> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>  	m->buf_len = (uint16_t)buf_len;
>  
>  	/* keep some headroom between start of buffer and data */
> @@ -187,8 +187,8 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
>  	RTE_ASSERT(ctx->off + ext_mem->elt_size <= ext_mem->buf_len);
>  
>  	m->buf_addr = RTE_PTR_ADD(ext_mem->buf_ptr, ctx->off);
> -	m->buf_iova = ext_mem->buf_iova == RTE_BAD_IOVA ?
> -		      RTE_BAD_IOVA : (ext_mem->buf_iova + ctx->off);
> +	rte_mbuf_iova_set(m, ext_mem->buf_iova == RTE_BAD_IOVA ? RTE_BAD_IOVA :
> +								 (ext_mem->buf_iova + ctx->off));
>  
>  	ctx->off += ext_mem->elt_size;
>  	if (ctx->off + ext_mem->elt_size > ext_mem->buf_len) {
> @@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header,
>  		*reason = "bad mbuf pool";
>  		return -1;
>  	}
> -	if (m->buf_iova == 0) {
> +	if (m->buf_iova == 0 && !RTE_IOVA_AS_VA) {
>  		*reason = "bad IO addr";
>  		return -1;
>  	}
> diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
> index 9811e8c760..05be146bc2 100644
> --- a/lib/mbuf/rte_mbuf.h
> +++ b/lib/mbuf/rte_mbuf.h
> @@ -146,7 +146,7 @@ static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
>  static inline rte_iova_t
>  rte_mbuf_data_iova(const struct rte_mbuf *mb)
>  {
> -	return mb->buf_iova + mb->data_off;
> +	return (RTE_IOVA_AS_VA ? (uint64_t)mb->buf_addr : mb->buf_iova) + mb->data_off;

nit: cast should be rte_iova_t instead of uint64_t

>  }
>  
>  /**
> @@ -164,7 +164,7 @@ rte_mbuf_data_iova(const struct rte_mbuf *mb)
>  static inline rte_iova_t
>  rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
>  {
> -	return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
> +	return (RTE_IOVA_AS_VA ? (uint64_t)mb->buf_addr : mb->buf_iova) + RTE_PKTMBUF_HEADROOM;
>  }

same here

>  
>  /**
> @@ -469,6 +469,13 @@ rte_mbuf_ext_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo,
>  				 __ATOMIC_ACQ_REL);
>  }
>  
> +static inline void
> +rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
> +{
> +	if (!RTE_IOVA_AS_VA)
> +		m->buf_iova = iova;
> +}
> +
>  /** Mbuf prefetch */
>  #define RTE_MBUF_PREFETCH_TO_FREE(m) do {       \
>  	if ((m) != NULL)                        \
> @@ -1056,7 +1063,7 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
>  	RTE_ASSERT(shinfo->free_cb != NULL);
>  
>  	m->buf_addr = buf_addr;
> -	m->buf_iova = buf_iova;
> +	rte_mbuf_iova_set(m, buf_iova);
>  	m->buf_len = buf_len;
>  
>  	m->data_len = 0;
> @@ -1143,7 +1150,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
>  
>  	mi->data_off = m->data_off;
>  	mi->data_len = m->data_len;
> -	mi->buf_iova = m->buf_iova;
> +	rte_mbuf_iova_set(mi, m->buf_iova);
>  	mi->buf_addr = m->buf_addr;
>  	mi->buf_len = m->buf_len;
>  
> @@ -1245,7 +1252,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
>  
>  	m->priv_size = priv_size;
>  	m->buf_addr = (char *)m + mbuf_size;
> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>  	m->buf_len = (uint16_t)buf_len;
>  	rte_pktmbuf_reset_headroom(m);
>  	m->data_len = 0;
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index 3d6ddd6773..c6292e7252 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -581,6 +581,8 @@ struct rte_mbuf {
>  	void *buf_addr;           /**< Virtual address of segment buffer. */
>  	/**
>  	 * Physical address of segment buffer.
> +	 * This field is invalid if the build is configured to use only
> +	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
>  	 * Force alignment to 8-bytes, so as to ensure we have the exact
>  	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
>  	 * working on vector drivers easier.

If the field is invalid, can't we add an #if condition ? I mean:

#if !RTE_IOVA_AS_VA
        rte_iova_t buf_iova;
#else
        uint64_t dummy;
#endif

I think it is preferable, because it would ensure that we never use
buf_iova when RTE_IOVA_AS_VA is set (especially useful when compiling
out-of-tree drivers).

This would certainly require to change some of the static inline
functions to use #if instead of if(), but I think it's worth the effort.


> @@ -848,8 +850,12 @@ struct rte_mbuf_ext_shared_info {
>   * @param o
>   *   The offset into the data to calculate address from.
>   */
> +#if RTE_IOVA_AS_VA
> +#define rte_pktmbuf_iova_offset(m, o) rte_pktmbuf_mtod_offset(m, rte_iova_t, o)
> +#else
>  #define rte_pktmbuf_iova_offset(m, o) \
>  	(rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
> +#endif
>  
>  /**
>   * A macro that returns the IO address that points to the start of the
> @@ -858,7 +864,11 @@ struct rte_mbuf_ext_shared_info {
>   * @param m
>   *   The packet mbuf.
>   */
> +#if RTE_IOVA_AS_VA
> +#define rte_pktmbuf_iova(m) rte_pktmbuf_mtod(m, rte_iova_t)
> +#else
>  #define rte_pktmbuf_iova(m) rte_pktmbuf_iova_offset(m, 0)
> +#endif
>  
>  #ifdef __cplusplus
>  }
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 782d916ae0..05cde6e118 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -967,7 +967,7 @@ restore_mbuf(struct rte_mbuf *m)
>  		/* start of buffer is after mbuf structure and priv data */
>  
>  		m->buf_addr = (char *)m + mbuf_size;
> -		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
> +		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>  		m = m->next;
>  	}
>  }
> diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
> index 54946f46d9..7b50735796 100644
> --- a/lib/vhost/vhost_crypto.c
> +++ b/lib/vhost/vhost_crypto.c
> @@ -823,11 +823,17 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
>  	switch (vcrypto->option) {
>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>  		m_src->data_len = cipher->para.src_data_len;
> -		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
> -				cipher->para.src_data_len);
> +		if (!RTE_IOVA_AS_VA) {
> +			m_src->buf_iova =
> +				gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.src_data_len);
> +			if (unlikely(m_src->buf_iova == 0)) {
> +				VC_LOG_ERR("zero_copy may fail due to cross page data");
> +				ret = VIRTIO_CRYPTO_ERR;
> +				goto error_exit;
> +			}
> +		}
>  		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
> -		if (unlikely(m_src->buf_iova == 0 ||
> -				m_src->buf_addr == NULL)) {
> +		if (unlikely(m_src->buf_addr == NULL)) {
>  			VC_LOG_ERR("zero_copy may fail due to cross page data");
>  			ret = VIRTIO_CRYPTO_ERR;
>  			goto error_exit;
> @@ -867,10 +873,17 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
>  
>  	switch (vcrypto->option) {
>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
> -		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
> -				desc->addr, cipher->para.dst_data_len);
> +		if (!RTE_IOVA_AS_VA) {
> +			m_dst->buf_iova =
> +				gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.dst_data_len);
> +			if (unlikely(m_dst->buf_iova == 0)) {
> +				VC_LOG_ERR("zero_copy may fail due to cross page data");
> +				ret = VIRTIO_CRYPTO_ERR;
> +				goto error_exit;
> +			}
> +		}
>  		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
> -		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
> +		if (unlikely(m_dst->buf_addr == NULL)) {
>  			VC_LOG_ERR("zero_copy may fail due to cross page data");
>  			ret = VIRTIO_CRYPTO_ERR;
>  			goto error_exit;
> @@ -980,11 +993,17 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>  		m_src->data_len = chain->para.src_data_len;
>  		m_dst->data_len = chain->para.dst_data_len;
> -
> -		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
> -				chain->para.src_data_len);
> +		if (!RTE_IOVA_AS_VA) {
> +			m_src->buf_iova =
> +				gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.src_data_len);
> +			if (unlikely(m_src->buf_iova == 0)) {
> +				VC_LOG_ERR("zero_copy may fail due to cross page data");
> +				ret = VIRTIO_CRYPTO_ERR;
> +				goto error_exit;
> +			}
> +		}
>  		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
> -		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
> +		if (unlikely(m_src->buf_addr == NULL)) {
>  			VC_LOG_ERR("zero_copy may fail due to cross page data");
>  			ret = VIRTIO_CRYPTO_ERR;
>  			goto error_exit;
> @@ -1024,10 +1043,17 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
>  
>  	switch (vcrypto->option) {
>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
> -		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
> -				desc->addr, chain->para.dst_data_len);
> +		if (!RTE_IOVA_AS_VA) {
> +			m_dst->buf_iova =
> +				gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.dst_data_len);
> +			if (unlikely(m_dst->buf_iova == 0)) {
> +				VC_LOG_ERR("zero_copy may fail due to cross page data");
> +				ret = VIRTIO_CRYPTO_ERR;
> +				goto error_exit;
> +			}
> +		}
>  		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
> -		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
> +		if (unlikely(m_dst->buf_addr == NULL)) {
>  			VC_LOG_ERR("zero_copy may fail due to cross page data");
>  			ret = VIRTIO_CRYPTO_ERR;
>  			goto error_exit;
> diff --git a/meson_options.txt b/meson_options.txt
> index 7c220ad68d..f0fa6cf04c 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -44,6 +44,8 @@ option('platform', type: 'string', value: 'native', description:
>         'Platform to build, either "native", "generic" or a SoC. Please refer to the Linux build guide for more information.')
>  option('enable_trace_fp', type: 'boolean', value: false, description:
>         'enable fast path trace points.')
> +option('iova_as_va', type: 'boolean', value: false, description:
> +       'Build which only supports IOVA as VA mode. Unsupported drivers are disabled.')

I wonder if we can find a better name for the option. Currently, it is a bit
confusing to me, because iova_as_va=false does not mean that iova_as_va is
disabled.

What about iova_as_pa=true|false, or enable_iova_as_pa=true|false, or
disable_iova_as_pa=true|false?

The help string is maybe easier to find, something like
"Enable or disable support for IOVA as PA mode."

We can also explain that enabling this option removes the buf_iova field from
the mbuf.

>  option('tests', type: 'boolean', value: true, description:
>         'build unit tests')
>  option('use_hpet', type: 'boolean', value: false, description:
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build
  2022-09-21 13:56                 ` [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build Shijith Thotton
  2022-09-28  7:24                   ` Thomas Monjalon
@ 2022-09-28 12:52                   ` Olivier Matz
  1 sibling, 0 replies; 88+ messages in thread
From: Olivier Matz @ 2022-09-28 12:52 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Honnappa.Nagarahalli, bruce.richardson,
	jerinj, mb, stephen, thomas, david.marchand

Hi,

On Wed, Sep 21, 2022 at 07:26:18PM +0530, Shijith Thotton wrote:
> mbuf physical address field is not used in builds which only uses VA. It
> is used to expand the dynamic field area.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---
>  lib/mbuf/rte_mbuf_core.h | 26 +++++++++++++++++---------
>  lib/mbuf/rte_mbuf_dyn.c  |  2 ++
>  2 files changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index c6292e7252..94907f301d 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -579,15 +579,23 @@ struct rte_mbuf {
>  	RTE_MARKER cacheline0;
>  
>  	void *buf_addr;           /**< Virtual address of segment buffer. */
> -	/**
> -	 * Physical address of segment buffer.
> -	 * This field is invalid if the build is configured to use only
> -	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
> -	 * Force alignment to 8-bytes, so as to ensure we have the exact
> -	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
> -	 * working on vector drivers easier.
> -	 */
> -	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> +	RTE_STD_C11
> +	union {
> +		/**
> +		 * Physical address of segment buffer.
> +		 * This field is invalid if the build is configured to use only
> +		 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
> +		 * Force alignment to 8-bytes, so as to ensure we have the exact
> +		 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
> +		 * working on vector drivers easier.
> +		 */
> +		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> +		/**
> +		 * Reserved for dynamic field in builds where physical address
> +		 * field is invalid.
> +		 */
> +		uint64_t dynfield2;
> +	};

Same comment than on previous patch: using a #if instead of the union here looks
better to me to ensure that we never use buf_iova when RTE_IOVA_AS_VA=1.

>  
>  	/* next 8 bytes are initialised on RX descriptor rearm */
>  	RTE_MARKER64 rearm_data;
> diff --git a/lib/mbuf/rte_mbuf_dyn.c b/lib/mbuf/rte_mbuf_dyn.c
> index 4ae79383b5..6a4cf96897 100644
> --- a/lib/mbuf/rte_mbuf_dyn.c
> +++ b/lib/mbuf/rte_mbuf_dyn.c
> @@ -128,6 +128,8 @@ init_shared_mem(void)
>  		 */
>  		memset(shm, 0, sizeof(*shm));
>  		mark_free(dynfield1);
> +		if (RTE_IOVA_AS_VA)
> +			mark_free(dynfield2);

In this case, it will have to be a #if here too.

>  
>  		/* init free_flags */
>  		for (mask = RTE_MBUF_F_FIRST_FREE; mask <= RTE_MBUF_F_LAST_FREE; mask <<= 1)


Also, I think we can add in the RTE_IOVA_AS_VA documentation that it
replaces the buf_iova by 8 bytes of dynamic field.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build
  2022-09-28  7:24                   ` Thomas Monjalon
@ 2022-09-28 12:52                     ` Olivier Matz
  2022-09-28 19:33                       ` Thomas Monjalon
  2022-09-28 19:48                       ` Stephen Hemminger
  0 siblings, 2 replies; 88+ messages in thread
From: Olivier Matz @ 2022-09-28 12:52 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: Shijith Thotton, dev, pbhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, stephen, david.marchand

On Wed, Sep 28, 2022 at 09:24:51AM +0200, Thomas Monjalon wrote:
> 21/09/2022 15:56, Shijith Thotton:
> > mbuf physical address field is not used in builds which only uses VA. It
> > is used to expand the dynamic field area.
> > 
> > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> 
> We cannot condition the use of the dynamic field.
> I think it is enough justification to reject this patch.

I don't think it is an issue.

> And about adding a compilation option for IOVA in the first patch of this series,
> I think it is not the direction the majority wants DPDK to go.
> We tend to avoid compilation options.

In general, I agree that we don't want to have many custom compile-time options,
especially if they impact ABI. It has several issues that have already been
widely discussed.

However, in this specific case, we can suppose that removing buf_iova is a
long-term goal (in years). Having this compile-time option is a way to test this
approach, and progressively prepare the drivers to support it. Then, in few
years (if we are still convinced), we may announce an abi breakage and switch to
this new mode by default.

Olivier

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 3/5] lib: move mbuf next pointer to first cache line
  2022-09-21 13:56                 ` [PATCH v3 3/5] lib: move mbuf next pointer to first cache line Shijith Thotton
  2022-09-21 14:07                   ` Morten Brørup
@ 2022-09-28 12:52                   ` Olivier Matz
  2022-09-29  6:14                     ` [EXT] " Shijith Thotton
  1 sibling, 1 reply; 88+ messages in thread
From: Olivier Matz @ 2022-09-28 12:52 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Honnappa.Nagarahalli, bruce.richardson,
	jerinj, mb, stephen, thomas, david.marchand

On Wed, Sep 21, 2022 at 07:26:19PM +0530, Shijith Thotton wrote:
> Swapped position of mbuf next pointer and second dynamic field (dynfield2)
> if the build is configured to use IOVA as VA. This is to move the mbuf
> next pointer to first cache line. kni library is disabled for this
> change as it depends on the offset value of next pointer.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---
>  lib/mbuf/rte_mbuf_core.h | 29 +++++++++++++++++++++--------
>  lib/meson.build          |  3 +++
>  2 files changed, 24 insertions(+), 8 deletions(-)
> 
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index 94907f301d..915dcd8653 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -590,11 +590,14 @@ struct rte_mbuf {
>  		 * working on vector drivers easier.
>  		 */
>  		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
> +#if RTE_IOVA_AS_VA
>  		/**
> -		 * Reserved for dynamic field in builds where physical address
> -		 * field is invalid.
> +		 * Next segment of scattered packet.
> +		 * This field is valid when physical address field is invalid.
> +		 * Otherwise next pointer in the second cache line will be used.
>  		 */
> -		uint64_t dynfield2;
> +		struct rte_mbuf *next;
> +#endif
>  	};
>  
>  	/* next 8 bytes are initialised on RX descriptor rearm */
> @@ -711,11 +714,21 @@ struct rte_mbuf {
>  	/* second cache line - fields only used in slow path or on TX */
>  	RTE_MARKER cacheline1 __rte_cache_min_aligned;
>  
> -	/**
> -	 * Next segment of scattered packet. Must be NULL in the last segment or
> -	 * in case of non-segmented packet.
> -	 */
> -	struct rte_mbuf *next;
> +	RTE_STD_C11
> +	union {
> +#if !RTE_IOVA_AS_VA
> +		/**
> +		 * Next segment of scattered packet. Must be NULL in the last
> +		 * segment or in case of non-segmented packet.
> +		 */
> +		struct rte_mbuf *next;
> +#endif
> +		/**
> +		 * Reserved for dynamic field when the next pointer is in first
> +		 * cache line (i.e. RTE_IOVA_AS_VA is 1).
> +		 */
> +		uint64_t dynfield2;
> +	};

Same comment than other patches about union vs #if.

>  
>  	/* fields to support TX offloads */
>  	RTE_STD_C11
> diff --git a/lib/meson.build b/lib/meson.build
> index c648f7d800..73d93bc803 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -88,6 +88,9 @@ optional_libs = [
>  disabled_libs = []
>  opt_disabled_libs = run_command(list_dir_globs, get_option('disable_libs'),
>          check: true).stdout().split()
> +if dpdk_conf.get('RTE_IOVA_AS_VA') == 1
> +    opt_disabled_libs += ['kni']
> +endif

I guess this should be in the previous patch instead, since kni uses
m->buf_iova

>  foreach l:opt_disabled_libs
>      if not optional_libs.contains(l)
>          warning('Cannot disable mandatory library "@0@"'.format(l))


After this change, the documentation of RTE_IOVA_AS_VA can be enhanced to
explain that it also moves the next pointer to the first cache line, possibly
increasing the performance.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA
  2022-09-21 13:56                 ` [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
@ 2022-09-28 12:53                   ` Olivier Matz
  2022-09-29  6:19                     ` [EXT] " Shijith Thotton
  2022-10-07 20:17                   ` Olivier Matz
  1 sibling, 1 reply; 88+ messages in thread
From: Olivier Matz @ 2022-09-28 12:53 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Honnappa.Nagarahalli, bruce.richardson,
	jerinj, mb, stephen, thomas, david.marchand, Ruifeng Wang,
	Jan Viktorin, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Ankur Dwivedi, Anoob Joseph, Tejasree Kondoj,
	Radha Mohan Chintakuntla, Veerasenareddy Burru,
	Ashwin Sekhar T K, Jakub Palider, Tomasz Duszynski

On Wed, Sep 21, 2022 at 07:26:20PM +0530, Shijith Thotton wrote:
> Enabled the flag pmd_iova_as_va in cnxk driver build files as they work
> with IOVA as VA. Updated cn9k and cn10k soc build configurations to
> enable the IOVA as VA build by default.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---
>  config/arm/meson.build                   |  8 +++-
>  drivers/common/cnxk/meson.build          |  1 +
>  drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
>  drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
>  drivers/crypto/cnxk/meson.build          |  2 +
>  drivers/dma/cnxk/meson.build             |  1 +
>  drivers/event/cnxk/meson.build           |  1 +
>  drivers/mempool/cnxk/meson.build         |  1 +
>  drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
>  drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
>  drivers/net/cnxk/cnxk_ethdev.h           |  1 -
>  drivers/net/cnxk/meson.build             |  1 +
>  drivers/raw/cnxk_bphy/meson.build        |  1 +
>  drivers/raw/cnxk_gpio/meson.build        |  1 +
>  14 files changed, 50 insertions(+), 84 deletions(-)
> 
> diff --git a/config/arm/meson.build b/config/arm/meson.build
> index 9f1636e0d5..4e95e8b388 100644
> --- a/config/arm/meson.build
> +++ b/config/arm/meson.build
> @@ -294,7 +294,8 @@ soc_cn10k = {
>      'flags': [
>          ['RTE_MAX_LCORE', 24],
>          ['RTE_MAX_NUMA_NODES', 1],
> -        ['RTE_MEMPOOL_ALIGN', 128]
> +        ['RTE_MEMPOOL_ALIGN', 128],
> +        ['RTE_IOVA_AS_VA', 1]
>      ],
>      'part_number': '0xd49',
>      'extra_march_features': ['crypto'],
> @@ -370,7 +371,10 @@ soc_cn9k = {
>      'description': 'Marvell OCTEON 9',
>      'implementer': '0x43',
>      'part_number': '0xb2',
> -    'numa': false
> +    'numa': false,
> +    'flags': [
> +        ['RTE_IOVA_AS_VA', 1]
> +    ]
>  }

I think this could go in a separate patch: "disable IOVA as PA for octeontx2/3"

The reason is that this patch clearly breaks the API (m->buf_iova field
becomes invalid) and the ABI (mbuf fields are moved) for these
architectures. This ABI breakage has to be advertised in the release
note. In fact, it should have been advertised before, but I suppose it
does not impact general purpose arm distributions, so I guess it is ok.

One other thing to highlight: enabling RTE_IOVA_AS_VA means that it
disable all drivers that do not have the pmd_iova_as_va flag. Are there
use-cases where drivers other than cnxk are used? For instance, is there
a PCI bus which is likely to be used by a driver/* ?


>  
>  soc_stingray = {
> diff --git a/drivers/common/cnxk/meson.build b/drivers/common/cnxk/meson.build
> index 6f808271d1..d019cfa8d1 100644
> --- a/drivers/common/cnxk/meson.build
> +++ b/drivers/common/cnxk/meson.build
> @@ -86,3 +86,4 @@ sources += files('cnxk_telemetry_bphy.c',
>  )
>  
>  deps += ['bus_pci', 'net', 'telemetry']
> +pmd_iova_as_va = true
> diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
> index 66cfe6ca98..16db14344d 100644
> --- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
> +++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
> @@ -85,7 +85,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op *cop,
>  
>  	/* Prepare CPT instruction */
>  	inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src);
> -	dptr = rte_pktmbuf_iova(m_src);
> +	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
>  	inst->dptr = dptr;
>  	inst->rptr = dptr;
>  
> @@ -102,7 +102,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn10k_ipsec_sa *sa,
>  
>  	/* Prepare CPT instruction */
>  	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
> -	dptr = rte_pktmbuf_iova(m_src);
> +	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
>  	inst->dptr = dptr;
>  	inst->rptr = dptr;
>  
> diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
> index e469596756..8b68e4c728 100644
> --- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
> +++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
> @@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
>  
>  	/* Prepare CPT instruction */
>  	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
> -	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
> +	inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t);
>  	inst->w7.u64 = sa->inst.w7;
>  }
>  #endif /* __CN9K_IPSEC_LA_OPS_H__ */
> diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build
> index 23a1cc3aac..764e7bb99a 100644
> --- a/drivers/crypto/cnxk/meson.build
> +++ b/drivers/crypto/cnxk/meson.build
> @@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug')
>  else
>      cflags += [ '-ULA_IPSEC_DEBUG' ]
>  endif
> +
> +pmd_iova_as_va = true
> diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
> index d4be4ee860..ef0e3db109 100644
> --- a/drivers/dma/cnxk/meson.build
> +++ b/drivers/dma/cnxk/meson.build
> @@ -3,3 +3,4 @@
>  
>  deps += ['bus_pci', 'common_cnxk', 'dmadev']
>  sources = files('cnxk_dmadev.c')
> +pmd_iova_as_va = true
> diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
> index b27bae7b12..650d0d4256 100644
> --- a/drivers/event/cnxk/meson.build
> +++ b/drivers/event/cnxk/meson.build
> @@ -479,3 +479,4 @@ foreach flag: extra_flags
>  endforeach
>  
>  deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
> +pmd_iova_as_va = true
> diff --git a/drivers/mempool/cnxk/meson.build b/drivers/mempool/cnxk/meson.build
> index d5d1978569..a328176457 100644
> --- a/drivers/mempool/cnxk/meson.build
> +++ b/drivers/mempool/cnxk/meson.build
> @@ -17,3 +17,4 @@ sources = files(
>  )
>  
>  deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool']
> +pmd_iova_as_va = true
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index ea13866b20..2ef62da132 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -1775,14 +1775,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
>  		mbuf2 = (uint64_t *)tx_pkts[2];
>  		mbuf3 = (uint64_t *)tx_pkts[3];
>  
> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
> -				     offsetof(struct rte_mbuf, buf_iova));
>  		/*
>  		 * Get mbuf's, olflags, iova, pktlen, dataoff
>  		 * dataoff_iovaX.D[0] = iova,
> @@ -1790,28 +1782,24 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
>  		 * len_olflagsX.D[0] = ol_flags,
>  		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
>  		 */
> -		dataoff_iova0 = vld1q_u64(mbuf0);
> -		len_olflags0 = vld1q_u64(mbuf0 + 2);
> -		dataoff_iova1 = vld1q_u64(mbuf1);
> -		len_olflags1 = vld1q_u64(mbuf1 + 2);
> -		dataoff_iova2 = vld1q_u64(mbuf2);
> -		len_olflags2 = vld1q_u64(mbuf2 + 2);
> -		dataoff_iova3 = vld1q_u64(mbuf3);
> -		len_olflags3 = vld1q_u64(mbuf3 + 2);
> +		dataoff_iova0 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
> +		len_olflags0 = vld1q_u64(mbuf0 + 3);
> +		dataoff_iova1 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf1), 1);
> +		len_olflags1 = vld1q_u64(mbuf1 + 3);
> +		dataoff_iova2 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf2), 1);
> +		len_olflags2 = vld1q_u64(mbuf2 + 3);
> +		dataoff_iova3 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf3), 1);
> +		len_olflags3 = vld1q_u64(mbuf3 + 3);
>  
>  		/* Move mbufs to point pool */
> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> +		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
> +		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
> +		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
> +		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
>  
>  		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
>  			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
> @@ -1861,17 +1849,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
>  		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
>  		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
>  
> -		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
> -		const uint64x2_t and_mask0 = {
> -			0xFFFFFFFFFFFFFFFF,
> -			0x000000000000FFFF,
> -		};
> -
> -		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
> -		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
> -		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
> -		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
> -
>  		/*
>  		 * Pick only 16 bits of pktlen preset at bits 63:32
>  		 * and place them at bits 15:0.
> diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
> index 6ce81f5c96..f5d99ccb5a 100644
> --- a/drivers/net/cnxk/cn9k_tx.h
> +++ b/drivers/net/cnxk/cn9k_tx.h
> @@ -1005,14 +1005,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		mbuf2 = (uint64_t *)tx_pkts[2];
>  		mbuf3 = (uint64_t *)tx_pkts[3];
>  
> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
> -				     offsetof(struct rte_mbuf, buf_iova));
>  		/*
>  		 * Get mbuf's, olflags, iova, pktlen, dataoff
>  		 * dataoff_iovaX.D[0] = iova,
> @@ -1020,28 +1012,24 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		 * len_olflagsX.D[0] = ol_flags,
>  		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
>  		 */
> -		dataoff_iova0 = vld1q_u64(mbuf0);
> -		len_olflags0 = vld1q_u64(mbuf0 + 2);
> -		dataoff_iova1 = vld1q_u64(mbuf1);
> -		len_olflags1 = vld1q_u64(mbuf1 + 2);
> -		dataoff_iova2 = vld1q_u64(mbuf2);
> -		len_olflags2 = vld1q_u64(mbuf2 + 2);
> -		dataoff_iova3 = vld1q_u64(mbuf3);
> -		len_olflags3 = vld1q_u64(mbuf3 + 2);
> +		dataoff_iova0 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
> +		len_olflags0 = vld1q_u64(mbuf0 + 3);
> +		dataoff_iova1 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off, vld1q_u64(mbuf1), 1);
> +		len_olflags1 = vld1q_u64(mbuf1 + 3);
> +		dataoff_iova2 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off, vld1q_u64(mbuf2), 1);
> +		len_olflags2 = vld1q_u64(mbuf2 + 3);
> +		dataoff_iova3 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off, vld1q_u64(mbuf3), 1);
> +		len_olflags3 = vld1q_u64(mbuf3 + 3);
>  
>  		/* Move mbufs to point pool */
> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> +		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
> +		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
> +		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
> +		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
>  
>  		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
>  			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
> @@ -1091,17 +1079,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
>  		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
>  
> -		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
> -		const uint64x2_t and_mask0 = {
> -			0xFFFFFFFFFFFFFFFF,
> -			0x000000000000FFFF,
> -		};
> -
> -		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
> -		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
> -		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
> -		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
> -
>  		/*
>  		 * Pick only 16 bits of pktlen preset at bits 63:32
>  		 * and place them at bits 15:0.
> diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
> index 4cb7c9e90c..abf1e4215f 100644
> --- a/drivers/net/cnxk/cnxk_ethdev.h
> +++ b/drivers/net/cnxk/cnxk_ethdev.h
> @@ -690,7 +690,6 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m)
>  
>  	m->priv_size = priv_size;
>  	m->buf_addr = (char *)m + mbuf_size;
> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>  	m->buf_len = (uint16_t)buf_len;
>  	rte_pktmbuf_reset_headroom(m);
>  	m->data_len = 0;
> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
> index f347e98fce..01489b3a36 100644
> --- a/drivers/net/cnxk/meson.build
> +++ b/drivers/net/cnxk/meson.build
> @@ -194,3 +194,4 @@ foreach flag: extra_flags
>  endforeach
>  
>  headers = files('rte_pmd_cnxk.h')
> +pmd_iova_as_va = true
> diff --git a/drivers/raw/cnxk_bphy/meson.build b/drivers/raw/cnxk_bphy/meson.build
> index 14147feaf4..781ed63e05 100644
> --- a/drivers/raw/cnxk_bphy/meson.build
> +++ b/drivers/raw/cnxk_bphy/meson.build
> @@ -10,3 +10,4 @@ sources = files(
>          'cnxk_bphy_irq.c',
>  )
>  headers = files('rte_pmd_bphy.h')
> +pmd_iova_as_va = true
> diff --git a/drivers/raw/cnxk_gpio/meson.build b/drivers/raw/cnxk_gpio/meson.build
> index a75a5b9084..f9aed173b6 100644
> --- a/drivers/raw/cnxk_gpio/meson.build
> +++ b/drivers/raw/cnxk_gpio/meson.build
> @@ -9,3 +9,4 @@ sources = files(
>          'cnxk_gpio_selftest.c',
>  )
>  headers = files('rte_pmd_cnxk_gpio.h')
> +pmd_iova_as_va = true
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build
  2022-09-28 12:52                     ` Olivier Matz
@ 2022-09-28 19:33                       ` Thomas Monjalon
  2022-09-28 19:48                       ` Stephen Hemminger
  1 sibling, 0 replies; 88+ messages in thread
From: Thomas Monjalon @ 2022-09-28 19:33 UTC (permalink / raw)
  To: Olivier Matz
  Cc: Shijith Thotton, dev, pbhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, stephen, david.marchand

28/09/2022 14:52, Olivier Matz:
> On Wed, Sep 28, 2022 at 09:24:51AM +0200, Thomas Monjalon wrote:
> > 21/09/2022 15:56, Shijith Thotton:
> > > mbuf physical address field is not used in builds which only uses VA. It
> > > is used to expand the dynamic field area.
> > > 
> > > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> > 
> > We cannot condition the use of the dynamic field.
> > I think it is enough justification to reject this patch.
> 
> I don't think it is an issue.
> 
> > And about adding a compilation option for IOVA in the first patch of this series,
> > I think it is not the direction the majority wants DPDK to go.
> > We tend to avoid compilation options.
> 
> In general, I agree that we don't want to have many custom compile-time options,
> especially if they impact ABI. It has several issues that have already been
> widely discussed.
> 
> However, in this specific case, we can suppose that removing buf_iova is a
> long-term goal (in years). Having this compile-time option is a way to test this
> approach, and progressively prepare the drivers to support it. Then, in few
> years (if we are still convinced), we may announce an abi breakage and switch to
> this new mode by default.

You convinced me.




^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build
  2022-09-28 12:52                     ` Olivier Matz
  2022-09-28 19:33                       ` Thomas Monjalon
@ 2022-09-28 19:48                       ` Stephen Hemminger
  2022-09-29  6:13                         ` [EXT] " Shijith Thotton
  1 sibling, 1 reply; 88+ messages in thread
From: Stephen Hemminger @ 2022-09-28 19:48 UTC (permalink / raw)
  To: Olivier Matz
  Cc: Thomas Monjalon, Shijith Thotton, dev, pbhagavatula,
	Honnappa.Nagarahalli, bruce.richardson, jerinj, mb,
	david.marchand

On Wed, 28 Sep 2022 14:52:47 +0200
Olivier Matz <olivier.matz@6wind.com> wrote:

> On Wed, Sep 28, 2022 at 09:24:51AM +0200, Thomas Monjalon wrote:
> > 21/09/2022 15:56, Shijith Thotton:  
> > > mbuf physical address field is not used in builds which only uses VA. It
> > > is used to expand the dynamic field area.
> > > 
> > > Signed-off-by: Shijith Thotton <sthotton@marvell.com>  
> > 
> > We cannot condition the use of the dynamic field.
> > I think it is enough justification to reject this patch.  
> 
> I don't think it is an issue.
> 
> > And about adding a compilation option for IOVA in the first patch of this series,
> > I think it is not the direction the majority wants DPDK to go.
> > We tend to avoid compilation options.  
> 
> In general, I agree that we don't want to have many custom compile-time options,
> especially if they impact ABI. It has several issues that have already been
> widely discussed.
> 
> However, in this specific case, we can suppose that removing buf_iova is a
> long-term goal (in years). Having this compile-time option is a way to test this
> approach, and progressively prepare the drivers to support it. Then, in few
> years (if we are still convinced), we may announce an abi breakage and switch to
> this new mode by default.

Since field is invalid if compile option is set,
shouldn't the field be inside an #ifdef so that if a driver or application
was to make the mistake of using that directly, it would fail at compile
time instead of runtime.

Leaving booby traps for applications and drivers is bad design.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v3 0/5] mbuf dynamic field expansion
  2022-09-28 12:52                 ` Olivier Matz
@ 2022-09-29  4:51                   ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-29  4:51 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, stephen, thomas,
	david.marchand

Hi Olivier,

Thanks for the review.

>On Wed, Sep 21, 2022 at 07:26:16PM +0530, Shijith Thotton wrote:
>> This is a continuation of the discussions[1] to add mbuf physical address field to
>dynamic field.
>> Previous version was to add PA field to dynamic field area based on the EAL
>IOVA mode option. It was
>> deemed unsafe as some components could still use the PA field without
>checking IOVA mode and there
>> are drivers which need PA to work. One suggestion was to make the IOVA mode
>check at compile time so
>> that drivers which need PA can be disabled during build. This series adds this
>new meson build
>> options. Second patch adds mbuf PA field to dynamic field on such builds. Last
>two patches enable
>> Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without
>PA field.
>
>Thank you for this patchset.
>
>To be honnest, initially I was really reserved to remove the use of
>buf_iova for some specific platforms.
>
>But what made me change my mind is that the removal if buf_iova will
>likely happen in the long-term future. It looks there is a consensus on
>this. I think your patchset is a good way to prepare this transition.
>
>What is missing, I think, is a good description of the problem you are
>solving:
>
>- more space for dynamic mbuf fields -> why? can you give more detail about
>  this need?
 
Idea was to let app/lib use an additional 8-bytes of dynamic area.

>- increase performance -> you previously said that it was not your point,
>  but if we move the next field into the first cache line, I think this
>  has to be highlighted. Out of curiosity, did you made measurements?
>

I'm yet to do it. I will update, once I have the numbers.

>>
>> 1. https://urldefense.proofpoint.com/v2/url?u=https-
>3A__inbox.dpdk.org_dev_57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605
>763.git.sthotton-
>40marvell.com_&d=DwIBAg&c=nKjWec2b6R0mOyPaz7xtfQ&r=G9w4KsPaQLACBf
>GCL35PtiRH996yqJDxAZwrWegU2qQ&m=O9JeIb0lfExyVnC8dV3WUADowh165KkS
>3s9JrmAjLwj8Uw5Iyb0tqSQ9YvQWpbIc&s=DaHEYGwUqUmAFmQ9Jkj8jGnOS4aw8
>iZ8Tcww-jPTdFE&e=  .
>>
>> v3:
>>  * Cleared use of buf_iova from cnxk PMD.
>>
>> v2:
>>  * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
>>  * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.
>>
>> Shijith Thotton (5):
>>   build: add meson option to configure IOVA mode as VA
>>   mbuf: add second dynamic field member for VA only build
>>   lib: move mbuf next pointer to first cache line
>>   drivers: mark Marvell cnxk PMDs work with IOVA as VA
>>   drivers: mark software PMDs work with IOVA as VA
>>
>>  app/test-bbdev/test_bbdev_perf.c         |  2 +-
>>  app/test-crypto-perf/cperf_test_common.c |  5 +--
>>  app/test/test_bpf.c                      |  2 +-
>>  app/test/test_dmadev.c                   | 33 ++++++--------
>>  app/test/test_mbuf.c                     | 12 +++---
>>  app/test/test_pcapng.c                   |  2 +-
>>  config/arm/meson.build                   |  8 +++-
>>  config/meson.build                       |  1 +
>>  drivers/common/cnxk/meson.build          |  1 +
>>  drivers/crypto/armv8/meson.build         |  1 +
>>  drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
>>  drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
>>  drivers/crypto/cnxk/meson.build          |  2 +
>>  drivers/crypto/ipsec_mb/meson.build      |  1 +
>>  drivers/crypto/null/meson.build          |  1 +
>>  drivers/crypto/openssl/meson.build       |  1 +
>>  drivers/dma/cnxk/meson.build             |  1 +
>>  drivers/dma/skeleton/meson.build         |  1 +
>>  drivers/event/cnxk/meson.build           |  1 +
>>  drivers/event/dsw/meson.build            |  1 +
>>  drivers/event/opdl/meson.build           |  1 +
>>  drivers/event/skeleton/meson.build       |  1 +
>>  drivers/event/sw/meson.build             |  1 +
>>  drivers/mempool/bucket/meson.build       |  1 +
>>  drivers/mempool/cnxk/meson.build         |  1 +
>>  drivers/mempool/ring/meson.build         |  1 +
>>  drivers/mempool/stack/meson.build        |  1 +
>>  drivers/meson.build                      |  6 +++
>>  drivers/net/af_packet/meson.build        |  1 +
>>  drivers/net/af_xdp/meson.build           |  2 +
>>  drivers/net/bonding/meson.build          |  1 +
>>  drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
>>  drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
>>  drivers/net/cnxk/cnxk_ethdev.h           |  1 -
>>  drivers/net/cnxk/meson.build             |  1 +
>>  drivers/net/failsafe/meson.build         |  1 +
>>  drivers/net/memif/meson.build            |  1 +
>>  drivers/net/null/meson.build             |  1 +
>>  drivers/net/pcap/meson.build             |  1 +
>>  drivers/net/ring/meson.build             |  1 +
>>  drivers/net/tap/meson.build              |  1 +
>>  drivers/raw/cnxk_bphy/meson.build        |  1 +
>>  drivers/raw/cnxk_gpio/meson.build        |  1 +
>>  drivers/raw/skeleton/meson.build         |  1 +
>>  lib/eal/linux/eal.c                      |  7 +++
>>  lib/mbuf/rte_mbuf.c                      |  8 ++--
>>  lib/mbuf/rte_mbuf.h                      | 17 +++++---
>>  lib/mbuf/rte_mbuf_core.h                 | 55 ++++++++++++++++++------
>>  lib/mbuf/rte_mbuf_dyn.c                  |  2 +
>>  lib/meson.build                          |  3 ++
>>  lib/vhost/vhost.h                        |  2 +-
>>  lib/vhost/vhost_crypto.c                 | 54 +++++++++++++++++------
>>  meson_options.txt                        |  2 +
>>  53 files changed, 220 insertions(+), 150 deletions(-)
>>
>> --
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v3 1/5] build: add meson option to configure IOVA mode as VA
  2022-09-28 12:52                   ` Olivier Matz
@ 2022-09-29  5:48                     ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-29  5:48 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, stephen, thomas,
	david.marchand, Nicolas Chautru, Ciara Power, Konstantin Ananyev,
	Chengwen Feng, Kevin Laatz, Reshma Pattan, Maxime Coquelin,
	Chenbo Xia

>> IOVA mode in DPDK is either PA or VA. The new build option iova_as_va
>> configures the mode to VA at compile time and prevents setting it to PA
>> at runtime. For now, all drivers which are not always enabled are
>> disabled with this option. Supported driver can set the flag
>> pmd_iova_as_va in its build file to enable build.
>>
>> mbuf structure holds the physical (PA) and virtual address (VA) of a
>> buffer. if IOVA mode is set to VA, PA is redundant as it is the same as
>> VA. So PA field need not be updated and marked invalid if the build is
>> configured to use only VA.
>>
>> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> ---
>>  app/test-bbdev/test_bbdev_perf.c         |  2 +-
>>  app/test-crypto-perf/cperf_test_common.c |  5 +--
>>  app/test/test_bpf.c                      |  2 +-
>>  app/test/test_dmadev.c                   | 33 ++++++---------
>>  app/test/test_mbuf.c                     | 12 +++---
>>  app/test/test_pcapng.c                   |  2 +-
>>  config/meson.build                       |  1 +
>>  drivers/meson.build                      |  6 +++
>>  lib/eal/linux/eal.c                      |  7 +++
>>  lib/mbuf/rte_mbuf.c                      |  8 ++--
>>  lib/mbuf/rte_mbuf.h                      | 17 +++++---
>>  lib/mbuf/rte_mbuf_core.h                 | 10 +++++
>>  lib/vhost/vhost.h                        |  2 +-
>>  lib/vhost/vhost_crypto.c                 | 54 ++++++++++++++++++------
>>  meson_options.txt                        |  2 +
>>  15 files changed, 109 insertions(+), 54 deletions(-)
>>
>> diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-
>bbdev/test_bbdev_perf.c
>> index 8fab52d821..f6aa25b67d 100644
>> --- a/app/test-bbdev/test_bbdev_perf.c
>> +++ b/app/test-bbdev/test_bbdev_perf.c
>> @@ -1001,7 +1001,7 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
>>  					seg->length);
>>  				memcpy(data, seg->addr, seg->length);
>>  				m_head->buf_addr = data;
>> -				m_head->buf_iova = rte_malloc_virt2iova(data);
>> +				rte_mbuf_iova_set(m_head,
>rte_malloc_virt2iova(data));
>
>Wouldn't it be better to have a preliminary patch that replaces direct
>accesses to m->buf_iova by rte_mbuf_iova_*() functions in app and libs?
>This would make this commit smaller to read.
 
Yes. I will add this change in v4.

>
>If I understand properly, the drivers/ part has to be done at the same time
>than setting "pmd_iova_as_va" in the meson config.
>

This approach was taken as per previous discussions. Also removing buf_iova from
a PMD would require proper testing and performance checks. Current approach
would give ample time for this.

>>  				m_head->data_off = 0;
>>  				m_head->data_len = seg->length;
>>  			} else {
>> diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-
>perf/cperf_test_common.c
>> index 00aadc9a47..27646cd619 100644
>> --- a/app/test-crypto-perf/cperf_test_common.c
>> +++ b/app/test-crypto-perf/cperf_test_common.c
>> @@ -26,8 +26,7 @@ fill_single_seg_mbuf(struct rte_mbuf *m, struct
>rte_mempool *mp,
>>  	/* start of buffer is after mbuf structure and priv data */
>>  	m->priv_size = 0;
>>  	m->buf_addr = (char *)m + mbuf_hdr_size;
>> -	m->buf_iova = rte_mempool_virt2iova(obj) +
>> -		mbuf_offset + mbuf_hdr_size;
>> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(obj) + mbuf_offset +
>mbuf_hdr_size);
>>  	m->buf_len = segment_sz;
>>  	m->data_len = data_len;
>>  	m->pkt_len = data_len;
>> @@ -58,7 +57,7 @@ fill_multi_seg_mbuf(struct rte_mbuf *m, struct
>rte_mempool *mp,
>>  		/* start of buffer is after mbuf structure and priv data */
>>  		m->priv_size = 0;
>>  		m->buf_addr = (char *)m + mbuf_hdr_size;
>> -		m->buf_iova = next_seg_phys_addr;
>> +		rte_mbuf_iova_set(m, next_seg_phys_addr);
>>  		next_seg_phys_addr += mbuf_hdr_size + segment_sz;
>>  		m->buf_len = segment_sz;
>>  		m->data_len = data_len;
>> diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
>> index 97f500809e..f5af5e8a3f 100644
>> --- a/app/test/test_bpf.c
>> +++ b/app/test/test_bpf.c
>> @@ -2600,7 +2600,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t
>buf[], uint32_t buf_len,
>>  	uint8_t *db;
>>
>>  	mb->buf_addr = buf;
>> -	mb->buf_iova = (uintptr_t)buf;
>> +	rte_mbuf_iova_set(mb, (uintptr_t)buf);
>>  	mb->buf_len = buf_len;
>>  	rte_mbuf_refcnt_set(mb, 1);
>>
>> diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
>> index 9e8e101f40..8306947eda 100644
>> --- a/app/test/test_dmadev.c
>> +++ b/app/test/test_dmadev.c
>> @@ -110,8 +110,8 @@ do_multi_copies(int16_t dev_id, uint16_t vchan,
>>  		for (j = 0; j < COPY_LEN/sizeof(uint64_t); j++)
>>  			src_data[j] = rte_rand();
>>
>> -		if (rte_dma_copy(dev_id, vchan, srcs[i]->buf_iova + srcs[i]-
>>data_off,
>> -				dsts[i]->buf_iova + dsts[i]->data_off, COPY_LEN,
>0) != id_count++)
>> +		if (rte_dma_copy(dev_id, vchan, rte_pktmbuf_iova_offset(srcs[i],
>0),
>> +				 rte_pktmbuf_iova_offset(dsts[i], 0), COPY_LEN,
>0) != id_count++)
>>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n",
>i);
>>  	}
>>  	rte_dma_submit(dev_id, vchan);
>> @@ -317,9 +317,8 @@ test_failure_in_full_burst(int16_t dev_id, uint16_t
>vchan, bool fence,
>>  	rte_dma_stats_get(dev_id, vchan, &baseline); /* get a baseline set of
>stats */
>>  	for (i = 0; i < COMP_BURST_SZ; i++) {
>>  		int id = rte_dma_copy(dev_id, vchan,
>> -				(i == fail_idx ? 0 : (srcs[i]->buf_iova + srcs[i]-
>>data_off)),
>> -				dsts[i]->buf_iova + dsts[i]->data_off,
>> -				COPY_LEN, OPT_FENCE(i));
>> +				      (i == fail_idx ? 0 :
>rte_pktmbuf_iova_offset(srcs[i], 0)),
>> +				      rte_pktmbuf_iova_offset(dsts[i], 0),
>COPY_LEN, OPT_FENCE(i));
>>  		if (id < 0)
>>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n",
>i);
>>  		if (i == fail_idx)
>> @@ -407,9 +406,8 @@ test_individual_status_query_with_failure(int16_t
>dev_id, uint16_t vchan, bool f
>>
>>  	for (j = 0; j < COMP_BURST_SZ; j++) {
>>  		int id = rte_dma_copy(dev_id, vchan,
>> -				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]-
>>data_off)),
>> -				dsts[j]->buf_iova + dsts[j]->data_off,
>> -				COPY_LEN, OPT_FENCE(j));
>> +				      (j == fail_idx ? 0 :
>rte_pktmbuf_iova_offset(srcs[j], 0)),
>> +				      rte_pktmbuf_iova_offset(dsts[j], 0),
>COPY_LEN, OPT_FENCE(j));
>>  		if (id < 0)
>>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n",
>j);
>>  		if (j == fail_idx)
>> @@ -470,9 +468,8 @@ test_single_item_status_query_with_failure(int16_t
>dev_id, uint16_t vchan,
>>
>>  	for (j = 0; j < COMP_BURST_SZ; j++) {
>>  		int id = rte_dma_copy(dev_id, vchan,
>> -				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]-
>>data_off)),
>> -				dsts[j]->buf_iova + dsts[j]->data_off,
>> -				COPY_LEN, 0);
>> +				      (j == fail_idx ? 0 :
>rte_pktmbuf_iova_offset(srcs[j], 0)),
>> +				      rte_pktmbuf_iova_offset(dsts[j], 0),
>COPY_LEN, 0);
>>  		if (id < 0)
>>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n",
>j);
>>  		if (j == fail_idx)
>> @@ -529,15 +526,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan,
>struct rte_mbuf **srcs, struc
>>
>>  	/* enqueue and gather completions in one go */
>>  	for (j = 0; j < COMP_BURST_SZ; j++) {
>> -		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
>> +		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
>>  		/* set up for failure if the current index is anywhere is the fails
>array */
>>  		for (i = 0; i < num_fail; i++)
>>  			if (j == fail[i])
>>  				src = 0;
>>
>> -		int id = rte_dma_copy(dev_id, vchan,
>> -				src, dsts[j]->buf_iova + dsts[j]->data_off,
>> -				COPY_LEN, 0);
>> +		int id = rte_dma_copy(dev_id, vchan, src,
>rte_pktmbuf_iova_offset(dsts[j], 0),
>> +				      COPY_LEN, 0);
>>  		if (id < 0)
>>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n",
>j);
>>  	}
>> @@ -565,15 +561,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan,
>struct rte_mbuf **srcs, struc
>>
>>  	/* enqueue and gather completions in bursts, but getting errors one at a
>time */
>>  	for (j = 0; j < COMP_BURST_SZ; j++) {
>> -		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
>> +		uintptr_t src = rte_pktmbuf_iova_offset(srcs[j], 0);
>>  		/* set up for failure if the current index is anywhere is the fails
>array */
>>  		for (i = 0; i < num_fail; i++)
>>  			if (j == fail[i])
>>  				src = 0;
>>
>> -		int id = rte_dma_copy(dev_id, vchan,
>> -				src, dsts[j]->buf_iova + dsts[j]->data_off,
>> -				COPY_LEN, 0);
>> +		int id = rte_dma_copy(dev_id, vchan, src,
>rte_pktmbuf_iova_offset(dsts[j], 0),
>> +				      COPY_LEN, 0);
>>  		if (id < 0)
>>  			ERR_RETURN("Error with rte_dma_copy for buffer %u\n",
>j);
>>  	}
>> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
>> index e09b2549ca..45431f2c9c 100644
>> --- a/app/test/test_mbuf.c
>> +++ b/app/test/test_mbuf.c
>> @@ -1232,11 +1232,13 @@ test_failing_mbuf_sanity_check(struct
>rte_mempool *pktmbuf_pool)
>>  		return -1;
>>  	}
>>
>> -	badbuf = *buf;
>> -	badbuf.buf_iova = 0;
>> -	if (verify_mbuf_check_panics(&badbuf)) {
>> -		printf("Error with bad-physaddr mbuf test\n");
>> -		return -1;
>> +	if (!RTE_IOVA_AS_VA) {
>> +		badbuf = *buf;
>> +		rte_mbuf_iova_set(&badbuf, 0);
>> +		if (verify_mbuf_check_panics(&badbuf)) {
>> +			printf("Error with bad-physaddr mbuf test\n");
>> +			return -1;
>> +		}
>>  	}
>>
>>  	badbuf = *buf;
>> diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
>> index 320dacea34..abbf00f6da 100644
>> --- a/app/test/test_pcapng.c
>> +++ b/app/test/test_pcapng.c
>> @@ -40,7 +40,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[],
>uint32_t buf_len,
>>  	uint8_t *db;
>>
>>  	mb->buf_addr = buf;
>> -	mb->buf_iova = (uintptr_t)buf;
>> +	rte_mbuf_iova_set(mb, (uintptr_t)buf);
>>  	mb->buf_len = buf_len;
>>  	rte_mbuf_refcnt_set(mb, 1);
>>
>> diff --git a/config/meson.build b/config/meson.build
>> index 7f7b6c92fd..6b6c3e7eb6 100644
>> --- a/config/meson.build
>> +++ b/config/meson.build
>> @@ -309,6 +309,7 @@ endif
>>  if get_option('mbuf_refcnt_atomic')
>>      dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
>>  endif
>> +dpdk_conf.set10('RTE_IOVA_AS_VA', get_option('iova_as_va'))
>>
>>  compile_time_cpuflags = []
>>  subdir(arch_subdir)
>> diff --git a/drivers/meson.build b/drivers/meson.build
>> index 376a64f4da..989770cffd 100644
>> --- a/drivers/meson.build
>> +++ b/drivers/meson.build
>> @@ -105,6 +105,7 @@ foreach subpath:subdirs
>>          ext_deps = []
>>          pkgconfig_extra_libs = []
>>          testpmd_sources = []
>> +        pmd_iova_as_va = false
>
>This option should be documented, however I don't know where is the proper
>place. A comment here would be a good start I think.
>
 
Will add.

>I'm trying to find a more explicit name, but it's not easy.
>What do you think about pmd_supports_disable_iova_as_pa?
>

Makes sense. I will change to pmd_supports_disable_iova_as_pa.

>Explicit is always better, it could avoid someone adding a new driver to
>blindly copy the flag from a template driver.
>
>>
>>          if not enable_drivers.contains(drv_path)
>>              build = false
>> @@ -122,6 +123,11 @@ foreach subpath:subdirs
>>              # pull in driver directory which should update all the local variables
>>              subdir(drv_path)
>>
>> +            if dpdk_conf.get('RTE_IOVA_AS_VA') == 1 and not pmd_iova_as_va and
>not always_enable.contains(drv_path)
>> +                build = false
>> +                reason = 'driver does not support IOVA as VA mode'
>> +            endif
>> +
>>              # get dependency objs from strings
>>              shared_deps = ext_deps
>>              static_deps = ext_deps
>> diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
>> index 37d29643a5..b70c4dcc5f 100644
>> --- a/lib/eal/linux/eal.c
>> +++ b/lib/eal/linux/eal.c
>> @@ -1127,6 +1127,13 @@ rte_eal_init(int argc, char **argv)
>>  		return -1;
>>  	}
>>
>> +	if (rte_eal_iova_mode() == RTE_IOVA_PA && RTE_IOVA_AS_VA) {
>> +		rte_eal_init_alert(
>> +			"Cannot use IOVA as 'PA' since build is configured to use
>only 'VA'");
>> +		rte_errno = EINVAL;
>> +		return -1;
>> +	}
>> +
>>  	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
>>  		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
>>
>> diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
>> index a2307cebe6..5af290c53a 100644
>> --- a/lib/mbuf/rte_mbuf.c
>> +++ b/lib/mbuf/rte_mbuf.c
>> @@ -89,7 +89,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
>>  	/* start of buffer is after mbuf structure and priv data */
>>  	m->priv_size = priv_size;
>>  	m->buf_addr = (char *)m + mbuf_size;
>> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>>  	m->buf_len = (uint16_t)buf_len;
>>
>>  	/* keep some headroom between start of buffer and data */
>> @@ -187,8 +187,8 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
>>  	RTE_ASSERT(ctx->off + ext_mem->elt_size <= ext_mem->buf_len);
>>
>>  	m->buf_addr = RTE_PTR_ADD(ext_mem->buf_ptr, ctx->off);
>> -	m->buf_iova = ext_mem->buf_iova == RTE_BAD_IOVA ?
>> -		      RTE_BAD_IOVA : (ext_mem->buf_iova + ctx->off);
>> +	rte_mbuf_iova_set(m, ext_mem->buf_iova == RTE_BAD_IOVA ?
>RTE_BAD_IOVA :
>> +								 (ext_mem-
>>buf_iova + ctx->off));
>>
>>  	ctx->off += ext_mem->elt_size;
>>  	if (ctx->off + ext_mem->elt_size > ext_mem->buf_len) {
>> @@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int
>is_header,
>>  		*reason = "bad mbuf pool";
>>  		return -1;
>>  	}
>> -	if (m->buf_iova == 0) {
>> +	if (m->buf_iova == 0 && !RTE_IOVA_AS_VA) {
>>  		*reason = "bad IO addr";
>>  		return -1;
>>  	}
>> diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
>> index 9811e8c760..05be146bc2 100644
>> --- a/lib/mbuf/rte_mbuf.h
>> +++ b/lib/mbuf/rte_mbuf.h
>> @@ -146,7 +146,7 @@ static inline uint16_t rte_pktmbuf_priv_size(struct
>rte_mempool *mp);
>>  static inline rte_iova_t
>>  rte_mbuf_data_iova(const struct rte_mbuf *mb)
>>  {
>> -	return mb->buf_iova + mb->data_off;
>> +	return (RTE_IOVA_AS_VA ? (uint64_t)mb->buf_addr : mb->buf_iova) +
>mb->data_off;
>
>nit: cast should be rte_iova_t instead of uint64_t
 
Will change.

>
>>  }
>>
>>  /**
>> @@ -164,7 +164,7 @@ rte_mbuf_data_iova(const struct rte_mbuf *mb)
>>  static inline rte_iova_t
>>  rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
>>  {
>> -	return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
>> +	return (RTE_IOVA_AS_VA ? (uint64_t)mb->buf_addr : mb->buf_iova) +
>RTE_PKTMBUF_HEADROOM;
>>  }
>
>same here
>

Will change.

>>
>>  /**
>> @@ -469,6 +469,13 @@ rte_mbuf_ext_refcnt_update(struct
>rte_mbuf_ext_shared_info *shinfo,
>>  				 __ATOMIC_ACQ_REL);
>>  }
>>
>> +static inline void
>> +rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
>> +{
>> +	if (!RTE_IOVA_AS_VA)
>> +		m->buf_iova = iova;
>> +}
>> +
>>  /** Mbuf prefetch */
>>  #define RTE_MBUF_PREFETCH_TO_FREE(m) do {       \
>>  	if ((m) != NULL)                        \
>> @@ -1056,7 +1063,7 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void
>*buf_addr,
>>  	RTE_ASSERT(shinfo->free_cb != NULL);
>>
>>  	m->buf_addr = buf_addr;
>> -	m->buf_iova = buf_iova;
>> +	rte_mbuf_iova_set(m, buf_iova);
>>  	m->buf_len = buf_len;
>>
>>  	m->data_len = 0;
>> @@ -1143,7 +1150,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf
>*mi, struct rte_mbuf *m)
>>
>>  	mi->data_off = m->data_off;
>>  	mi->data_len = m->data_len;
>> -	mi->buf_iova = m->buf_iova;
>> +	rte_mbuf_iova_set(mi, m->buf_iova);
>>  	mi->buf_addr = m->buf_addr;
>>  	mi->buf_len = m->buf_len;
>>
>> @@ -1245,7 +1252,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf
>*m)
>>
>>  	m->priv_size = priv_size;
>>  	m->buf_addr = (char *)m + mbuf_size;
>> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>>  	m->buf_len = (uint16_t)buf_len;
>>  	rte_pktmbuf_reset_headroom(m);
>>  	m->data_len = 0;
>> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
>> index 3d6ddd6773..c6292e7252 100644
>> --- a/lib/mbuf/rte_mbuf_core.h
>> +++ b/lib/mbuf/rte_mbuf_core.h
>> @@ -581,6 +581,8 @@ struct rte_mbuf {
>>  	void *buf_addr;           /**< Virtual address of segment buffer. */
>>  	/**
>>  	 * Physical address of segment buffer.
>> +	 * This field is invalid if the build is configured to use only
>> +	 * virtual address as IOVA (i.e. RTE_IOVA_AS_VA is 1).
>>  	 * Force alignment to 8-bytes, so as to ensure we have the exact
>>  	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
>>  	 * working on vector drivers easier.
>
>If the field is invalid, can't we add an #if condition ? I mean:
>
>#if !RTE_IOVA_AS_VA
>        rte_iova_t buf_iova;
>#else
>        uint64_t dummy;
>#endif
>
>I think it is preferable, because it would ensure that we never use
>buf_iova when RTE_IOVA_AS_VA is set (especially useful when compiling
>out-of-tree drivers).
>
>This would certainly require to change some of the static inline
>functions to use #if instead of if(), but I think it's worth the effort.
>
>
 
Agree. I will change in v4.

>> @@ -848,8 +850,12 @@ struct rte_mbuf_ext_shared_info {
>>   * @param o
>>   *   The offset into the data to calculate address from.
>>   */
>> +#if RTE_IOVA_AS_VA
>> +#define rte_pktmbuf_iova_offset(m, o) rte_pktmbuf_mtod_offset(m,
>rte_iova_t, o)
>> +#else
>>  #define rte_pktmbuf_iova_offset(m, o) \
>>  	(rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
>> +#endif
>>
>>  /**
>>   * A macro that returns the IO address that points to the start of the
>> @@ -858,7 +864,11 @@ struct rte_mbuf_ext_shared_info {
>>   * @param m
>>   *   The packet mbuf.
>>   */
>> +#if RTE_IOVA_AS_VA
>> +#define rte_pktmbuf_iova(m) rte_pktmbuf_mtod(m, rte_iova_t)
>> +#else
>>  #define rte_pktmbuf_iova(m) rte_pktmbuf_iova_offset(m, 0)
>> +#endif
>>
>>  #ifdef __cplusplus
>>  }
>> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
>> index 782d916ae0..05cde6e118 100644
>> --- a/lib/vhost/vhost.h
>> +++ b/lib/vhost/vhost.h
>> @@ -967,7 +967,7 @@ restore_mbuf(struct rte_mbuf *m)
>>  		/* start of buffer is after mbuf structure and priv data */
>>
>>  		m->buf_addr = (char *)m + mbuf_size;
>> -		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>> +		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>>  		m = m->next;
>>  	}
>>  }
>> diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
>> index 54946f46d9..7b50735796 100644
>> --- a/lib/vhost/vhost_crypto.c
>> +++ b/lib/vhost/vhost_crypto.c
>> @@ -823,11 +823,17 @@ prepare_sym_cipher_op(struct vhost_crypto
>*vcrypto, struct rte_crypto_op *op,
>>  	switch (vcrypto->option) {
>>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>>  		m_src->data_len = cipher->para.src_data_len;
>> -		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
>> -				cipher->para.src_data_len);
>> +		if (!RTE_IOVA_AS_VA) {
>> +			m_src->buf_iova =
>> +				gpa_to_hpa(vcrypto->dev, desc->addr, cipher-
>>para.src_data_len);
>> +			if (unlikely(m_src->buf_iova == 0)) {
>> +				VC_LOG_ERR("zero_copy may fail due to cross
>page data");
>> +				ret = VIRTIO_CRYPTO_ERR;
>> +				goto error_exit;
>> +			}
>> +		}
>>  		m_src->buf_addr = get_data_ptr(vc_req, desc,
>VHOST_ACCESS_RO);
>> -		if (unlikely(m_src->buf_iova == 0 ||
>> -				m_src->buf_addr == NULL)) {
>> +		if (unlikely(m_src->buf_addr == NULL)) {
>>  			VC_LOG_ERR("zero_copy may fail due to cross page
>data");
>>  			ret = VIRTIO_CRYPTO_ERR;
>>  			goto error_exit;
>> @@ -867,10 +873,17 @@ prepare_sym_cipher_op(struct vhost_crypto
>*vcrypto, struct rte_crypto_op *op,
>>
>>  	switch (vcrypto->option) {
>>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>> -		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
>> -				desc->addr, cipher->para.dst_data_len);
>> +		if (!RTE_IOVA_AS_VA) {
>> +			m_dst->buf_iova =
>> +				gpa_to_hpa(vcrypto->dev, desc->addr, cipher-
>>para.dst_data_len);
>> +			if (unlikely(m_dst->buf_iova == 0)) {
>> +				VC_LOG_ERR("zero_copy may fail due to cross
>page data");
>> +				ret = VIRTIO_CRYPTO_ERR;
>> +				goto error_exit;
>> +			}
>> +		}
>>  		m_dst->buf_addr = get_data_ptr(vc_req, desc,
>VHOST_ACCESS_RW);
>> -		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
>> +		if (unlikely(m_dst->buf_addr == NULL)) {
>>  			VC_LOG_ERR("zero_copy may fail due to cross page
>data");
>>  			ret = VIRTIO_CRYPTO_ERR;
>>  			goto error_exit;
>> @@ -980,11 +993,17 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto,
>struct rte_crypto_op *op,
>>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>>  		m_src->data_len = chain->para.src_data_len;
>>  		m_dst->data_len = chain->para.dst_data_len;
>> -
>> -		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
>> -				chain->para.src_data_len);
>> +		if (!RTE_IOVA_AS_VA) {
>> +			m_src->buf_iova =
>> +				gpa_to_hpa(vcrypto->dev, desc->addr, chain-
>>para.src_data_len);
>> +			if (unlikely(m_src->buf_iova == 0)) {
>> +				VC_LOG_ERR("zero_copy may fail due to cross
>page data");
>> +				ret = VIRTIO_CRYPTO_ERR;
>> +				goto error_exit;
>> +			}
>> +		}
>>  		m_src->buf_addr = get_data_ptr(vc_req, desc,
>VHOST_ACCESS_RO);
>> -		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
>> +		if (unlikely(m_src->buf_addr == NULL)) {
>>  			VC_LOG_ERR("zero_copy may fail due to cross page
>data");
>>  			ret = VIRTIO_CRYPTO_ERR;
>>  			goto error_exit;
>> @@ -1024,10 +1043,17 @@ prepare_sym_chain_op(struct vhost_crypto
>*vcrypto, struct rte_crypto_op *op,
>>
>>  	switch (vcrypto->option) {
>>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>> -		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
>> -				desc->addr, chain->para.dst_data_len);
>> +		if (!RTE_IOVA_AS_VA) {
>> +			m_dst->buf_iova =
>> +				gpa_to_hpa(vcrypto->dev, desc->addr, chain-
>>para.dst_data_len);
>> +			if (unlikely(m_dst->buf_iova == 0)) {
>> +				VC_LOG_ERR("zero_copy may fail due to cross
>page data");
>> +				ret = VIRTIO_CRYPTO_ERR;
>> +				goto error_exit;
>> +			}
>> +		}
>>  		m_dst->buf_addr = get_data_ptr(vc_req, desc,
>VHOST_ACCESS_RW);
>> -		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
>> +		if (unlikely(m_dst->buf_addr == NULL)) {
>>  			VC_LOG_ERR("zero_copy may fail due to cross page
>data");
>>  			ret = VIRTIO_CRYPTO_ERR;
>>  			goto error_exit;
>> diff --git a/meson_options.txt b/meson_options.txt
>> index 7c220ad68d..f0fa6cf04c 100644
>> --- a/meson_options.txt
>> +++ b/meson_options.txt
>> @@ -44,6 +44,8 @@ option('platform', type: 'string', value: 'native', description:
>>         'Platform to build, either "native", "generic" or a SoC. Please refer to the
>Linux build guide for more information.')
>>  option('enable_trace_fp', type: 'boolean', value: false, description:
>>         'enable fast path trace points.')
>> +option('iova_as_va', type: 'boolean', value: false, description:
>> +       'Build which only supports IOVA as VA mode. Unsupported drivers are
>disabled.')
>
>I wonder if we can find a better name for the option. Currently, it is a bit
>confusing to me, because iova_as_va=false does not mean that iova_as_va is
>disabled.
>
>What about iova_as_pa=true|false, or enable_iova_as_pa=true|false, or
>disable_iova_as_pa=true|false?
>
 
Agree. Will go with the option enable_iova_as_pa and will rename macro to RTE_IOVA_AS_PA.

>The help string is maybe easier to find, something like
>"Enable or disable support for IOVA as PA mode."
>
>We can also explain that enabling this option removes the buf_iova field from
>the mbuf.
>
>>  option('tests', type: 'boolean', value: true, description:
>>         'build unit tests')
>>  option('use_hpet', type: 'boolean', value: false, description:
>> --
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build
  2022-09-28 19:48                       ` Stephen Hemminger
@ 2022-09-29  6:13                         ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-29  6:13 UTC (permalink / raw)
  To: Stephen Hemminger, Olivier Matz
  Cc: Thomas Monjalon, dev, Pavan Nikhilesh Bhagavatula,
	Honnappa.Nagarahalli, bruce.richardson,
	Jerin Jacob Kollanukkaran, mb, david.marchand


>> > > mbuf physical address field is not used in builds which only uses VA. It
>> > > is used to expand the dynamic field area.
>> > >
>> > > Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> >
>> > We cannot condition the use of the dynamic field.
>> > I think it is enough justification to reject this patch.
>>
>> I don't think it is an issue.
>>
>> > And about adding a compilation option for IOVA in the first patch of this series,
>> > I think it is not the direction the majority wants DPDK to go.
>> > We tend to avoid compilation options.
>>
>> In general, I agree that we don't want to have many custom compile-time
>options,
>> especially if they impact ABI. It has several issues that have already been
>> widely discussed.
>>
>> However, in this specific case, we can suppose that removing buf_iova is a
>> long-term goal (in years). Having this compile-time option is a way to test this
>> approach, and progressively prepare the drivers to support it. Then, in few
>> years (if we are still convinced), we may announce an abi breakage and switch to
>> this new mode by default.
>
>Since field is invalid if compile option is set,
>shouldn't the field be inside an #ifdef so that if a driver or application
>was to make the mistake of using that directly, it would fail at compile
>time instead of runtime.
>
>Leaving booby traps for applications and drivers is bad design.
 
Will move to using #ifdef in v4.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v3 3/5] lib: move mbuf next pointer to first cache line
  2022-09-28 12:52                   ` Olivier Matz
@ 2022-09-29  6:14                     ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-29  6:14 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, stephen, thomas,
	david.marchand

>> Swapped position of mbuf next pointer and second dynamic field (dynfield2)
>> if the build is configured to use IOVA as VA. This is to move the mbuf
>> next pointer to first cache line. kni library is disabled for this
>> change as it depends on the offset value of next pointer.
>>
>> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> ---
>>  lib/mbuf/rte_mbuf_core.h | 29 +++++++++++++++++++++--------
>>  lib/meson.build          |  3 +++
>>  2 files changed, 24 insertions(+), 8 deletions(-)
>>
>> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
>> index 94907f301d..915dcd8653 100644
>> --- a/lib/mbuf/rte_mbuf_core.h
>> +++ b/lib/mbuf/rte_mbuf_core.h
>> @@ -590,11 +590,14 @@ struct rte_mbuf {
>>  		 * working on vector drivers easier.
>>  		 */
>>  		rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
>> +#if RTE_IOVA_AS_VA
>>  		/**
>> -		 * Reserved for dynamic field in builds where physical address
>> -		 * field is invalid.
>> +		 * Next segment of scattered packet.
>> +		 * This field is valid when physical address field is invalid.
>> +		 * Otherwise next pointer in the second cache line will be used.
>>  		 */
>> -		uint64_t dynfield2;
>> +		struct rte_mbuf *next;
>> +#endif
>>  	};
>>
>>  	/* next 8 bytes are initialised on RX descriptor rearm */
>> @@ -711,11 +714,21 @@ struct rte_mbuf {
>>  	/* second cache line - fields only used in slow path or on TX */
>>  	RTE_MARKER cacheline1 __rte_cache_min_aligned;
>>
>> -	/**
>> -	 * Next segment of scattered packet. Must be NULL in the last segment or
>> -	 * in case of non-segmented packet.
>> -	 */
>> -	struct rte_mbuf *next;
>> +	RTE_STD_C11
>> +	union {
>> +#if !RTE_IOVA_AS_VA
>> +		/**
>> +		 * Next segment of scattered packet. Must be NULL in the last
>> +		 * segment or in case of non-segmented packet.
>> +		 */
>> +		struct rte_mbuf *next;
>> +#endif
>> +		/**
>> +		 * Reserved for dynamic field when the next pointer is in first
>> +		 * cache line (i.e. RTE_IOVA_AS_VA is 1).
>> +		 */
>> +		uint64_t dynfield2;
>> +	};
>
>Same comment than other patches about union vs #if.
 
Okay. Will change.

>
>>
>>  	/* fields to support TX offloads */
>>  	RTE_STD_C11
>> diff --git a/lib/meson.build b/lib/meson.build
>> index c648f7d800..73d93bc803 100644
>> --- a/lib/meson.build
>> +++ b/lib/meson.build
>> @@ -88,6 +88,9 @@ optional_libs = [
>>  disabled_libs = []
>>  opt_disabled_libs = run_command(list_dir_globs, get_option('disable_libs'),
>>          check: true).stdout().split()
>> +if dpdk_conf.get('RTE_IOVA_AS_VA') == 1
>> +    opt_disabled_libs += ['kni']
>> +endif
>
>I guess this should be in the previous patch instead, since kni uses
>m->buf_iova
>
 
Ack.

>>  foreach l:opt_disabled_libs
>>      if not optional_libs.contains(l)
>>          warning('Cannot disable mandatory library "@0@"'.format(l))
>
>
>After this change, the documentation of RTE_IOVA_AS_VA can be enhanced to
>explain that it also moves the next pointer to the first cache line, possibly
>increasing the performance.
 
Okay.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA
  2022-09-28 12:53                   ` Olivier Matz
@ 2022-09-29  6:19                     ` Shijith Thotton
  2022-09-29  7:44                       ` Olivier Matz
  0 siblings, 1 reply; 88+ messages in thread
From: Shijith Thotton @ 2022-09-29  6:19 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, stephen, thomas,
	david.marchand, Ruifeng Wang, Jan Viktorin,
	Nithin Kumar Dabilpuram, Kiran Kumar Kokkilagadda,
	Sunil Kumar Kori, Satha Koteswara Rao Kottidi, Ankur Dwivedi,
	Anoob Joseph, Tejasree Kondoj, Radha Chintakuntla,
	Veerasenareddy Burru, Ashwin Sekhar T K, Jakub Palider,
	Tomasz Duszynski

>> Enabled the flag pmd_iova_as_va in cnxk driver build files as they work
>> with IOVA as VA. Updated cn9k and cn10k soc build configurations to
>> enable the IOVA as VA build by default.
>>
>> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> ---
>>  config/arm/meson.build                   |  8 +++-
>>  drivers/common/cnxk/meson.build          |  1 +
>>  drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
>>  drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
>>  drivers/crypto/cnxk/meson.build          |  2 +
>>  drivers/dma/cnxk/meson.build             |  1 +
>>  drivers/event/cnxk/meson.build           |  1 +
>>  drivers/mempool/cnxk/meson.build         |  1 +
>>  drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
>>  drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
>>  drivers/net/cnxk/cnxk_ethdev.h           |  1 -
>>  drivers/net/cnxk/meson.build             |  1 +
>>  drivers/raw/cnxk_bphy/meson.build        |  1 +
>>  drivers/raw/cnxk_gpio/meson.build        |  1 +
>>  14 files changed, 50 insertions(+), 84 deletions(-)
>>
>> diff --git a/config/arm/meson.build b/config/arm/meson.build
>> index 9f1636e0d5..4e95e8b388 100644
>> --- a/config/arm/meson.build
>> +++ b/config/arm/meson.build
>> @@ -294,7 +294,8 @@ soc_cn10k = {
>>      'flags': [
>>          ['RTE_MAX_LCORE', 24],
>>          ['RTE_MAX_NUMA_NODES', 1],
>> -        ['RTE_MEMPOOL_ALIGN', 128]
>> +        ['RTE_MEMPOOL_ALIGN', 128],
>> +        ['RTE_IOVA_AS_VA', 1]
>>      ],
>>      'part_number': '0xd49',
>>      'extra_march_features': ['crypto'],
>> @@ -370,7 +371,10 @@ soc_cn9k = {
>>      'description': 'Marvell OCTEON 9',
>>      'implementer': '0x43',
>>      'part_number': '0xb2',
>> -    'numa': false
>> +    'numa': false,
>> +    'flags': [
>> +        ['RTE_IOVA_AS_VA', 1]
>> +    ]
>>  }
>
>I think this could go in a separate patch: "disable IOVA as PA for octeontx2/3"
>
>The reason is that this patch clearly breaks the API (m->buf_iova field
>becomes invalid) and the ABI (mbuf fields are moved) for these
>architectures. This ABI breakage has to be advertised in the release
>note. In fact, it should have been advertised before, but I suppose it
>does not impact general purpose arm distributions, so I guess it is ok.
>
>One other thing to highlight: enabling RTE_IOVA_AS_VA means that it
>disable all drivers that do not have the pmd_iova_as_va flag. Are there
>use-cases where drivers other than cnxk are used? For instance, is there
>a PCI bus which is likely to be used by a driver/* ?
>

All always enable drivers are enabled in this mode, which include
bus/pci, bus/vdev and mempool/ring.

>>
>>  soc_stingray = {
>> diff --git a/drivers/common/cnxk/meson.build
>b/drivers/common/cnxk/meson.build
>> index 6f808271d1..d019cfa8d1 100644
>> --- a/drivers/common/cnxk/meson.build
>> +++ b/drivers/common/cnxk/meson.build
>> @@ -86,3 +86,4 @@ sources += files('cnxk_telemetry_bphy.c',
>>  )
>>
>>  deps += ['bus_pci', 'net', 'telemetry']
>> +pmd_iova_as_va = true
>> diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
>b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
>> index 66cfe6ca98..16db14344d 100644
>> --- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
>> +++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
>> @@ -85,7 +85,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op
>*cop,
>>
>>  	/* Prepare CPT instruction */
>>  	inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src);
>> -	dptr = rte_pktmbuf_iova(m_src);
>> +	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
>>  	inst->dptr = dptr;
>>  	inst->rptr = dptr;
>>
>> @@ -102,7 +102,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct
>cn10k_ipsec_sa *sa,
>>
>>  	/* Prepare CPT instruction */
>>  	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
>> -	dptr = rte_pktmbuf_iova(m_src);
>> +	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
>>  	inst->dptr = dptr;
>>  	inst->rptr = dptr;
>>
>> diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
>b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
>> index e469596756..8b68e4c728 100644
>> --- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
>> +++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
>> @@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct
>cn9k_ipsec_sa *sa,
>>
>>  	/* Prepare CPT instruction */
>>  	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
>> -	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
>> +	inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t);
>>  	inst->w7.u64 = sa->inst.w7;
>>  }
>>  #endif /* __CN9K_IPSEC_LA_OPS_H__ */
>> diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build
>> index 23a1cc3aac..764e7bb99a 100644
>> --- a/drivers/crypto/cnxk/meson.build
>> +++ b/drivers/crypto/cnxk/meson.build
>> @@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug')
>>  else
>>      cflags += [ '-ULA_IPSEC_DEBUG' ]
>>  endif
>> +
>> +pmd_iova_as_va = true
>> diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
>> index d4be4ee860..ef0e3db109 100644
>> --- a/drivers/dma/cnxk/meson.build
>> +++ b/drivers/dma/cnxk/meson.build
>> @@ -3,3 +3,4 @@
>>
>>  deps += ['bus_pci', 'common_cnxk', 'dmadev']
>>  sources = files('cnxk_dmadev.c')
>> +pmd_iova_as_va = true
>> diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
>> index b27bae7b12..650d0d4256 100644
>> --- a/drivers/event/cnxk/meson.build
>> +++ b/drivers/event/cnxk/meson.build
>> @@ -479,3 +479,4 @@ foreach flag: extra_flags
>>  endforeach
>>
>>  deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
>> +pmd_iova_as_va = true
>> diff --git a/drivers/mempool/cnxk/meson.build
>b/drivers/mempool/cnxk/meson.build
>> index d5d1978569..a328176457 100644
>> --- a/drivers/mempool/cnxk/meson.build
>> +++ b/drivers/mempool/cnxk/meson.build
>> @@ -17,3 +17,4 @@ sources = files(
>>  )
>>
>>  deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool']
>> +pmd_iova_as_va = true
>> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
>> index ea13866b20..2ef62da132 100644
>> --- a/drivers/net/cnxk/cn10k_tx.h
>> +++ b/drivers/net/cnxk/cn10k_tx.h
>> @@ -1775,14 +1775,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue,
>uint64_t *ws,
>>  		mbuf2 = (uint64_t *)tx_pkts[2];
>>  		mbuf3 = (uint64_t *)tx_pkts[3];
>>
>> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>>  		/*
>>  		 * Get mbuf's, olflags, iova, pktlen, dataoff
>>  		 * dataoff_iovaX.D[0] = iova,
>> @@ -1790,28 +1782,24 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue,
>uint64_t *ws,
>>  		 * len_olflagsX.D[0] = ol_flags,
>>  		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
>>  		 */
>> -		dataoff_iova0 = vld1q_u64(mbuf0);
>> -		len_olflags0 = vld1q_u64(mbuf0 + 2);
>> -		dataoff_iova1 = vld1q_u64(mbuf1);
>> -		len_olflags1 = vld1q_u64(mbuf1 + 2);
>> -		dataoff_iova2 = vld1q_u64(mbuf2);
>> -		len_olflags2 = vld1q_u64(mbuf2 + 2);
>> -		dataoff_iova3 = vld1q_u64(mbuf3);
>> -		len_olflags3 = vld1q_u64(mbuf3 + 2);
>> +		dataoff_iova0 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf0), 1);
>> +		len_olflags0 = vld1q_u64(mbuf0 + 3);
>> +		dataoff_iova1 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf1), 1);
>> +		len_olflags1 = vld1q_u64(mbuf1 + 3);
>> +		dataoff_iova2 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf2), 1);
>> +		len_olflags2 = vld1q_u64(mbuf2 + 3);
>> +		dataoff_iova3 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf3), 1);
>> +		len_olflags3 = vld1q_u64(mbuf3 + 3);
>>
>>  		/* Move mbufs to point pool */
>> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> +		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf,
>pool));
>>
>>  		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
>>  			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
>> @@ -1861,17 +1849,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue,
>uint64_t *ws,
>>  		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
>>  		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
>>
>> -		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
>> -		const uint64x2_t and_mask0 = {
>> -			0xFFFFFFFFFFFFFFFF,
>> -			0x000000000000FFFF,
>> -		};
>> -
>> -		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
>> -		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
>> -		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
>> -		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
>> -
>>  		/*
>>  		 * Pick only 16 bits of pktlen preset at bits 63:32
>>  		 * and place them at bits 15:0.
>> diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
>> index 6ce81f5c96..f5d99ccb5a 100644
>> --- a/drivers/net/cnxk/cn9k_tx.h
>> +++ b/drivers/net/cnxk/cn9k_tx.h
>> @@ -1005,14 +1005,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct
>rte_mbuf **tx_pkts,
>>  		mbuf2 = (uint64_t *)tx_pkts[2];
>>  		mbuf3 = (uint64_t *)tx_pkts[3];
>>
>> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>>  		/*
>>  		 * Get mbuf's, olflags, iova, pktlen, dataoff
>>  		 * dataoff_iovaX.D[0] = iova,
>> @@ -1020,28 +1012,24 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct
>rte_mbuf **tx_pkts,
>>  		 * len_olflagsX.D[0] = ol_flags,
>>  		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
>>  		 */
>> -		dataoff_iova0 = vld1q_u64(mbuf0);
>> -		len_olflags0 = vld1q_u64(mbuf0 + 2);
>> -		dataoff_iova1 = vld1q_u64(mbuf1);
>> -		len_olflags1 = vld1q_u64(mbuf1 + 2);
>> -		dataoff_iova2 = vld1q_u64(mbuf2);
>> -		len_olflags2 = vld1q_u64(mbuf2 + 2);
>> -		dataoff_iova3 = vld1q_u64(mbuf3);
>> -		len_olflags3 = vld1q_u64(mbuf3 + 2);
>> +		dataoff_iova0 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf0), 1);
>> +		len_olflags0 = vld1q_u64(mbuf0 + 3);
>> +		dataoff_iova1 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off,
>vld1q_u64(mbuf1), 1);
>> +		len_olflags1 = vld1q_u64(mbuf1 + 3);
>> +		dataoff_iova2 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off,
>vld1q_u64(mbuf2), 1);
>> +		len_olflags2 = vld1q_u64(mbuf2 + 3);
>> +		dataoff_iova3 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off,
>vld1q_u64(mbuf3), 1);
>> +		len_olflags3 = vld1q_u64(mbuf3 + 3);
>>
>>  		/* Move mbufs to point pool */
>> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> +		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf,
>pool));
>>
>>  		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
>>  			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
>> @@ -1091,17 +1079,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct
>rte_mbuf **tx_pkts,
>>  		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
>>  		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
>>
>> -		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
>> -		const uint64x2_t and_mask0 = {
>> -			0xFFFFFFFFFFFFFFFF,
>> -			0x000000000000FFFF,
>> -		};
>> -
>> -		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
>> -		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
>> -		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
>> -		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
>> -
>>  		/*
>>  		 * Pick only 16 bits of pktlen preset at bits 63:32
>>  		 * and place them at bits 15:0.
>> diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
>> index 4cb7c9e90c..abf1e4215f 100644
>> --- a/drivers/net/cnxk/cnxk_ethdev.h
>> +++ b/drivers/net/cnxk/cnxk_ethdev.h
>> @@ -690,7 +690,6 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m)
>>
>>  	m->priv_size = priv_size;
>>  	m->buf_addr = (char *)m + mbuf_size;
>> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>>  	m->buf_len = (uint16_t)buf_len;
>>  	rte_pktmbuf_reset_headroom(m);
>>  	m->data_len = 0;
>> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
>> index f347e98fce..01489b3a36 100644
>> --- a/drivers/net/cnxk/meson.build
>> +++ b/drivers/net/cnxk/meson.build
>> @@ -194,3 +194,4 @@ foreach flag: extra_flags
>>  endforeach
>>
>>  headers = files('rte_pmd_cnxk.h')
>> +pmd_iova_as_va = true
>> diff --git a/drivers/raw/cnxk_bphy/meson.build
>b/drivers/raw/cnxk_bphy/meson.build
>> index 14147feaf4..781ed63e05 100644
>> --- a/drivers/raw/cnxk_bphy/meson.build
>> +++ b/drivers/raw/cnxk_bphy/meson.build
>> @@ -10,3 +10,4 @@ sources = files(
>>          'cnxk_bphy_irq.c',
>>  )
>>  headers = files('rte_pmd_bphy.h')
>> +pmd_iova_as_va = true
>> diff --git a/drivers/raw/cnxk_gpio/meson.build
>b/drivers/raw/cnxk_gpio/meson.build
>> index a75a5b9084..f9aed173b6 100644
>> --- a/drivers/raw/cnxk_gpio/meson.build
>> +++ b/drivers/raw/cnxk_gpio/meson.build
>> @@ -9,3 +9,4 @@ sources = files(
>>          'cnxk_gpio_selftest.c',
>>  )
>>  headers = files('rte_pmd_cnxk_gpio.h')
>> +pmd_iova_as_va = true
>> --
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [EXT] Re: [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA
  2022-09-29  6:19                     ` [EXT] " Shijith Thotton
@ 2022-09-29  7:44                       ` Olivier Matz
  2022-09-29  8:10                         ` Shijith Thotton
  0 siblings, 1 reply; 88+ messages in thread
From: Olivier Matz @ 2022-09-29  7:44 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, stephen, thomas,
	david.marchand, Ruifeng Wang, Jan Viktorin,
	Nithin Kumar Dabilpuram, Kiran Kumar Kokkilagadda,
	Sunil Kumar Kori, Satha Koteswara Rao Kottidi, Ankur Dwivedi,
	Anoob Joseph, Tejasree Kondoj, Radha Chintakuntla,
	Veerasenareddy Burru, Ashwin Sekhar T K, Jakub Palider,
	Tomasz Duszynski

On Thu, Sep 29, 2022 at 06:19:32AM +0000, Shijith Thotton wrote:
> >> Enabled the flag pmd_iova_as_va in cnxk driver build files as they work
> >> with IOVA as VA. Updated cn9k and cn10k soc build configurations to
> >> enable the IOVA as VA build by default.
> >>
> >> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> >> ---
> >>  config/arm/meson.build                   |  8 +++-
> >>  drivers/common/cnxk/meson.build          |  1 +
> >>  drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
> >>  drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
> >>  drivers/crypto/cnxk/meson.build          |  2 +
> >>  drivers/dma/cnxk/meson.build             |  1 +
> >>  drivers/event/cnxk/meson.build           |  1 +
> >>  drivers/mempool/cnxk/meson.build         |  1 +
> >>  drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
> >>  drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
> >>  drivers/net/cnxk/cnxk_ethdev.h           |  1 -
> >>  drivers/net/cnxk/meson.build             |  1 +
> >>  drivers/raw/cnxk_bphy/meson.build        |  1 +
> >>  drivers/raw/cnxk_gpio/meson.build        |  1 +
> >>  14 files changed, 50 insertions(+), 84 deletions(-)
> >>
> >> diff --git a/config/arm/meson.build b/config/arm/meson.build
> >> index 9f1636e0d5..4e95e8b388 100644
> >> --- a/config/arm/meson.build
> >> +++ b/config/arm/meson.build
> >> @@ -294,7 +294,8 @@ soc_cn10k = {
> >>      'flags': [
> >>          ['RTE_MAX_LCORE', 24],
> >>          ['RTE_MAX_NUMA_NODES', 1],
> >> -        ['RTE_MEMPOOL_ALIGN', 128]
> >> +        ['RTE_MEMPOOL_ALIGN', 128],
> >> +        ['RTE_IOVA_AS_VA', 1]
> >>      ],
> >>      'part_number': '0xd49',
> >>      'extra_march_features': ['crypto'],
> >> @@ -370,7 +371,10 @@ soc_cn9k = {
> >>      'description': 'Marvell OCTEON 9',
> >>      'implementer': '0x43',
> >>      'part_number': '0xb2',
> >> -    'numa': false
> >> +    'numa': false,
> >> +    'flags': [
> >> +        ['RTE_IOVA_AS_VA', 1]
> >> +    ]
> >>  }
> >
> >I think this could go in a separate patch: "disable IOVA as PA for octeontx2/3"
> >
> >The reason is that this patch clearly breaks the API (m->buf_iova field
> >becomes invalid) and the ABI (mbuf fields are moved) for these
> >architectures. This ABI breakage has to be advertised in the release
> >note. In fact, it should have been advertised before, but I suppose it
> >does not impact general purpose arm distributions, so I guess it is ok.
> >
> >One other thing to highlight: enabling RTE_IOVA_AS_VA means that it
> >disable all drivers that do not have the pmd_iova_as_va flag. Are there
> >use-cases where drivers other than cnxk are used? For instance, is there
> >a PCI bus which is likely to be used by a driver/* ?
> >
> 
> All always enable drivers are enabled in this mode, which include
> bus/pci, bus/vdev and mempool/ring.

I was thinking about use cases where a pci PMD (NIC, crypto, ...) is
used in addition to the SOC drivers. These PMD won't compile when IOVA
as PA is disabled, and the use case will be broken.

This is probably a corner case (people at Marvell will know better than
me), I just wanted to highlight it. Should we document it?

Thanks,
Olivier

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA
  2022-09-29  7:44                       ` Olivier Matz
@ 2022-09-29  8:10                         ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-09-29  8:10 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, stephen, thomas,
	david.marchand, Ruifeng Wang, Jan Viktorin,
	Nithin Kumar Dabilpuram, Kiran Kumar Kokkilagadda,
	Sunil Kumar Kori, Satha Koteswara Rao Kottidi, Ankur Dwivedi,
	Anoob Joseph, Tejasree Kondoj, Radha Chintakuntla,
	Veerasenareddy Burru, Ashwin Sekhar T K, Jakub Palider,
	Tomasz Duszynski

>> >> Enabled the flag pmd_iova_as_va in cnxk driver build files as they work
>> >> with IOVA as VA. Updated cn9k and cn10k soc build configurations to
>> >> enable the IOVA as VA build by default.
>> >>
>> >> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> >> ---
>> >>  config/arm/meson.build                   |  8 +++-
>> >>  drivers/common/cnxk/meson.build          |  1 +
>> >>  drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
>> >>  drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
>> >>  drivers/crypto/cnxk/meson.build          |  2 +
>> >>  drivers/dma/cnxk/meson.build             |  1 +
>> >>  drivers/event/cnxk/meson.build           |  1 +
>> >>  drivers/mempool/cnxk/meson.build         |  1 +
>> >>  drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
>> >>  drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
>> >>  drivers/net/cnxk/cnxk_ethdev.h           |  1 -
>> >>  drivers/net/cnxk/meson.build             |  1 +
>> >>  drivers/raw/cnxk_bphy/meson.build        |  1 +
>> >>  drivers/raw/cnxk_gpio/meson.build        |  1 +
>> >>  14 files changed, 50 insertions(+), 84 deletions(-)
>> >>
>> >> diff --git a/config/arm/meson.build b/config/arm/meson.build
>> >> index 9f1636e0d5..4e95e8b388 100644
>> >> --- a/config/arm/meson.build
>> >> +++ b/config/arm/meson.build
>> >> @@ -294,7 +294,8 @@ soc_cn10k = {
>> >>      'flags': [
>> >>          ['RTE_MAX_LCORE', 24],
>> >>          ['RTE_MAX_NUMA_NODES', 1],
>> >> -        ['RTE_MEMPOOL_ALIGN', 128]
>> >> +        ['RTE_MEMPOOL_ALIGN', 128],
>> >> +        ['RTE_IOVA_AS_VA', 1]
>> >>      ],
>> >>      'part_number': '0xd49',
>> >>      'extra_march_features': ['crypto'],
>> >> @@ -370,7 +371,10 @@ soc_cn9k = {
>> >>      'description': 'Marvell OCTEON 9',
>> >>      'implementer': '0x43',
>> >>      'part_number': '0xb2',
>> >> -    'numa': false
>> >> +    'numa': false,
>> >> +    'flags': [
>> >> +        ['RTE_IOVA_AS_VA', 1]
>> >> +    ]
>> >>  }
>> >
>> >I think this could go in a separate patch: "disable IOVA as PA for octeontx2/3"
>> >
>> >The reason is that this patch clearly breaks the API (m->buf_iova field
>> >becomes invalid) and the ABI (mbuf fields are moved) for these
>> >architectures. This ABI breakage has to be advertised in the release
>> >note. In fact, it should have been advertised before, but I suppose it
>> >does not impact general purpose arm distributions, so I guess it is ok.
>> >
>> >One other thing to highlight: enabling RTE_IOVA_AS_VA means that it
>> >disable all drivers that do not have the pmd_iova_as_va flag. Are there
>> >use-cases where drivers other than cnxk are used? For instance, is there
>> >a PCI bus which is likely to be used by a driver/* ?
>> >
>>
>> All always enable drivers are enabled in this mode, which include
>> bus/pci, bus/vdev and mempool/ring.
>
>I was thinking about use cases where a pci PMD (NIC, crypto, ...) is
>used in addition to the SOC drivers. These PMD won't compile when IOVA
>as PA is disabled, and the use case will be broken.
>
>This is probably a corner case (people at Marvell will know better than
>me), I just wanted to highlight it. Should we document it?

Thanks for noting. I will add this to cnxk platform document.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 0/5] mbuf dynamic field expansion
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
                                   ` (6 preceding siblings ...)
  2022-09-28 12:52                 ` Olivier Matz
@ 2022-10-07 13:50                 ` Thomas Monjalon
  2022-10-07 19:35                   ` [EXT] " Shijith Thotton
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
  8 siblings, 1 reply; 88+ messages in thread
From: Thomas Monjalon @ 2022-10-07 13:50 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Shijith Thotton, Honnappa.Nagarahalli,
	bruce.richardson, jerinj, mb, olivier.matz, stephen,
	david.marchand

21/09/2022 15:56, Shijith Thotton:
> This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
> Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
> deemed unsafe as some components could still use the PA field without checking IOVA mode and there
> are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
> that drivers which need PA can be disabled during build. This series adds this new meson build
> options. Second patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
> Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without PA field.

Shijith, in case it was not clear,
we can accept this change only in -rc1 closing today,
and we didn't receive the expected v4 yet.




^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v4 0/7] mbuf dynamic field expansion
  2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
                                   ` (7 preceding siblings ...)
  2022-10-07 13:50                 ` Thomas Monjalon
@ 2022-10-07 19:30                 ` Shijith Thotton
  2022-10-07 19:30                   ` [PATCH v4 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
                                     ` (8 more replies)
  8 siblings, 9 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 19:30 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, ferruh.yigit, pbhagavatula,
	david.marchand

This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
deemed unsafe as some components could still use the PA field without checking IOVA mode and there
are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
that drivers which need PA can be disabled during build. This series adds this new meson build
options. Fourth patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
Marvell cnxk PMDs and software PMDs in IOVA as PA disabled build as they work without PA field.

1. https://inbox.dpdk.org/dev/57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605763.git.sthotton@marvell.com/.

v4:
 * Restructured changes to multiple patches.
 * Moved to #if scheme instead of union.
 * Updated release notes.

v3:
 * Cleared use of buf_iova from cnxk PMD.

v2:
 * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
 * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.

Shijith Thotton (7):
  mbuf: add API to get and set mbuf physical address
  test/dma: use API to get mbuf data physical address
  build: add meson option to configure IOVA mode as PA
  mbuf: add second dynamic field member
  lib: move mbuf next pointer to first cache line
  drivers: mark cnxk PMDs work with IOVA as PA disabled
  drivers: mark software PMDs work with IOVA as PA disabled

 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 +--
 app/test/test_bpf.c                      |  2 +-
 app/test/test_dmadev.c                   | 33 ++++++--------
 app/test/test_mbuf.c                     | 12 +++---
 app/test/test_pcapng.c                   |  2 +-
 config/arm/meson.build                   |  8 +++-
 config/meson.build                       |  1 +
 doc/guides/platform/cnxk.rst             |  3 +-
 doc/guides/rel_notes/release_22_11.rst   |  3 ++
 drivers/common/cnxk/meson.build          |  1 +
 drivers/crypto/armv8/meson.build         |  1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
 drivers/crypto/cnxk/meson.build          |  2 +
 drivers/crypto/ipsec_mb/meson.build      |  1 +
 drivers/crypto/null/meson.build          |  1 +
 drivers/crypto/openssl/meson.build       |  1 +
 drivers/dma/cnxk/meson.build             |  1 +
 drivers/dma/skeleton/meson.build         |  1 +
 drivers/event/cnxk/meson.build           |  1 +
 drivers/event/dsw/meson.build            |  1 +
 drivers/event/opdl/meson.build           |  1 +
 drivers/event/skeleton/meson.build       |  1 +
 drivers/event/sw/meson.build             |  1 +
 drivers/mempool/bucket/meson.build       |  1 +
 drivers/mempool/cnxk/meson.build         |  1 +
 drivers/mempool/ring/meson.build         |  1 +
 drivers/mempool/stack/meson.build        |  1 +
 drivers/meson.build                      |  6 +++
 drivers/net/af_packet/meson.build        |  1 +
 drivers/net/af_xdp/meson.build           |  2 +
 drivers/net/bonding/meson.build          |  1 +
 drivers/net/cnxk/cn10k_ethdev.c          |  4 +-
 drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
 drivers/net/cnxk/cn9k_ethdev.c           |  4 +-
 drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
 drivers/net/cnxk/cnxk_ethdev.h           |  1 -
 drivers/net/cnxk/meson.build             |  1 +
 drivers/net/failsafe/meson.build         |  1 +
 drivers/net/hns3/meson.build             |  6 +++
 drivers/net/memif/meson.build            |  1 +
 drivers/net/null/meson.build             |  1 +
 drivers/net/pcap/meson.build             |  1 +
 drivers/net/ring/meson.build             |  1 +
 drivers/net/tap/meson.build              |  1 +
 drivers/raw/cnxk_bphy/meson.build        |  1 +
 drivers/raw/cnxk_gpio/meson.build        |  1 +
 drivers/raw/skeleton/meson.build         |  1 +
 lib/eal/linux/eal.c                      |  6 +++
 lib/kni/rte_kni.c                        |  3 +-
 lib/mbuf/rte_mbuf.c                      | 12 +++---
 lib/mbuf/rte_mbuf.h                      | 37 +++++++++++++---
 lib/mbuf/rte_mbuf_core.h                 | 26 +++++++++--
 lib/mbuf/rte_mbuf_dyn.c                  |  3 ++
 lib/meson.build                          |  3 ++
 lib/pipeline/rte_table_action.c          |  2 +-
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 25 ++++++-----
 meson_options.txt                        |  2 +
 60 files changed, 210 insertions(+), 151 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v4 1/7] mbuf: add API to get and set mbuf physical address
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
@ 2022-10-07 19:30                   ` Shijith Thotton
  2022-10-07 20:16                     ` Olivier Matz
  2022-10-07 19:30                   ` [PATCH v4 2/7] test/dma: use API to get mbuf data " Shijith Thotton
                                     ` (7 subsequent siblings)
  8 siblings, 1 reply; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 19:30 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, ferruh.yigit, pbhagavatula,
	Nicolas Chautru, Ciara Power, Konstantin Ananyev, Reshma Pattan,
	Cristian Dumitrescu, Maxime Coquelin, Chenbo Xia

Added APIs rte_mbuf_iova_set and rte_mbuf_iova_get to set and get the
physical address of an mbuf respectively. Updated applications and
library to use the same.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 ++---
 app/test/test_bpf.c                      |  2 +-
 app/test/test_mbuf.c                     |  2 +-
 app/test/test_pcapng.c                   |  2 +-
 lib/kni/rte_kni.c                        |  3 +--
 lib/mbuf/rte_mbuf.c                      | 12 +++++-----
 lib/mbuf/rte_mbuf.h                      | 28 +++++++++++++++++++-----
 lib/mbuf/rte_mbuf_core.h                 |  3 +--
 lib/pipeline/rte_table_action.c          |  2 +-
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 25 ++++++++++-----------
 12 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 311e5d1a96..e7fbf71f6d 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -1002,7 +1002,7 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
 					seg->length);
 				memcpy(data, seg->addr, seg->length);
 				m_head->buf_addr = data;
-				m_head->buf_iova = rte_malloc_virt2iova(data);
+				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
 				m_head->data_off = 0;
 				m_head->data_len = seg->length;
 			} else {
diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-perf/cperf_test_common.c
index 00aadc9a47..27646cd619 100644
--- a/app/test-crypto-perf/cperf_test_common.c
+++ b/app/test-crypto-perf/cperf_test_common.c
@@ -26,8 +26,7 @@ fill_single_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = 0;
 	m->buf_addr = (char *)m + mbuf_hdr_size;
-	m->buf_iova = rte_mempool_virt2iova(obj) +
-		mbuf_offset + mbuf_hdr_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(obj) + mbuf_offset + mbuf_hdr_size);
 	m->buf_len = segment_sz;
 	m->data_len = data_len;
 	m->pkt_len = data_len;
@@ -58,7 +57,7 @@ fill_multi_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 		/* start of buffer is after mbuf structure and priv data */
 		m->priv_size = 0;
 		m->buf_addr = (char *)m + mbuf_hdr_size;
-		m->buf_iova = next_seg_phys_addr;
+		rte_mbuf_iova_set(m, next_seg_phys_addr);
 		next_seg_phys_addr += mbuf_hdr_size + segment_sz;
 		m->buf_len = segment_sz;
 		m->data_len = data_len;
diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 97f500809e..f5af5e8a3f 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -2600,7 +2600,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index e09b2549ca..22e45e66c1 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -1233,7 +1233,7 @@ test_failing_mbuf_sanity_check(struct rte_mempool *pktmbuf_pool)
 	}
 
 	badbuf = *buf;
-	badbuf.buf_iova = 0;
+	rte_mbuf_iova_set(&badbuf, 0);
 	if (verify_mbuf_check_panics(&badbuf)) {
 		printf("Error with bad-physaddr mbuf test\n");
 		return -1;
diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index 320dacea34..abbf00f6da 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -40,7 +40,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/lib/kni/rte_kni.c b/lib/kni/rte_kni.c
index 7971c56bb4..3737a91de7 100644
--- a/lib/kni/rte_kni.c
+++ b/lib/kni/rte_kni.c
@@ -357,8 +357,7 @@ static void *
 va2pa(struct rte_mbuf *m)
 {
 	return (void *)((unsigned long)m -
-			((unsigned long)m->buf_addr -
-			 (unsigned long)m->buf_iova));
+			((unsigned long)m->buf_addr - (unsigned long)rte_mbuf_iova_get(m)));
 }
 
 static void *
diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
index a2307cebe6..16f6ed6731 100644
--- a/lib/mbuf/rte_mbuf.c
+++ b/lib/mbuf/rte_mbuf.c
@@ -89,7 +89,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 
 	/* keep some headroom between start of buffer and data */
@@ -187,8 +187,8 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
 	RTE_ASSERT(ctx->off + ext_mem->elt_size <= ext_mem->buf_len);
 
 	m->buf_addr = RTE_PTR_ADD(ext_mem->buf_ptr, ctx->off);
-	m->buf_iova = ext_mem->buf_iova == RTE_BAD_IOVA ?
-		      RTE_BAD_IOVA : (ext_mem->buf_iova + ctx->off);
+	rte_mbuf_iova_set(m, ext_mem->buf_iova == RTE_BAD_IOVA ? RTE_BAD_IOVA :
+								 (ext_mem->buf_iova + ctx->off));
 
 	ctx->off += ext_mem->elt_size;
 	if (ctx->off + ext_mem->elt_size > ext_mem->buf_len) {
@@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header,
 		*reason = "bad mbuf pool";
 		return -1;
 	}
-	if (m->buf_iova == 0) {
+	if (rte_mbuf_iova_get(m) == 0) {
 		*reason = "bad IO addr";
 		return -1;
 	}
@@ -669,8 +669,8 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
 
 	__rte_mbuf_sanity_check(m, 1);
 
-	fprintf(f, "dump mbuf at %p, iova=%#"PRIx64", buf_len=%u\n",
-		m, m->buf_iova, m->buf_len);
+	fprintf(f, "dump mbuf at %p, iova=%#" PRIx64 ", buf_len=%u\n", m, rte_mbuf_iova_get(m),
+		m->buf_len);
 	fprintf(f, "  pkt_len=%u, ol_flags=%#"PRIx64", nb_segs=%u, port=%u",
 		m->pkt_len, m->ol_flags, m->nb_segs, m->port);
 
diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
index 9811e8c760..c552dfbcac 100644
--- a/lib/mbuf/rte_mbuf.h
+++ b/lib/mbuf/rte_mbuf.h
@@ -135,6 +135,24 @@ rte_mbuf_prefetch_part2(struct rte_mbuf *m)
 
 static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 
+/**
+ * Get the mbuf physical address.
+ */
+static inline rte_iova_t
+rte_mbuf_iova_get(const struct rte_mbuf *m)
+{
+	return m->buf_iova;
+}
+
+/**
+ * Set the mbuf physical address.
+ */
+static inline void
+rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
+{
+	m->buf_iova = iova;
+}
+
 /**
  * Return the IO address of the beginning of the mbuf data
  *
@@ -146,7 +164,7 @@ static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 static inline rte_iova_t
 rte_mbuf_data_iova(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + mb->data_off;
+	return rte_mbuf_iova_get(mb) + mb->data_off;
 }
 
 /**
@@ -164,7 +182,7 @@ rte_mbuf_data_iova(const struct rte_mbuf *mb)
 static inline rte_iova_t
 rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
+	return rte_mbuf_iova_get(mb) + RTE_PKTMBUF_HEADROOM;
 }
 
 /**
@@ -1056,7 +1074,7 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
 	RTE_ASSERT(shinfo->free_cb != NULL);
 
 	m->buf_addr = buf_addr;
-	m->buf_iova = buf_iova;
+	rte_mbuf_iova_set(m, buf_iova);
 	m->buf_len = buf_len;
 
 	m->data_len = 0;
@@ -1143,7 +1161,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 
 	mi->data_off = m->data_off;
 	mi->data_len = m->data_len;
-	mi->buf_iova = m->buf_iova;
+	rte_mbuf_iova_set(mi, rte_mbuf_iova_get(m));
 	mi->buf_addr = m->buf_addr;
 	mi->buf_len = m->buf_len;
 
@@ -1245,7 +1263,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index e672d59b36..51a12a1fb9 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -736,8 +736,7 @@ struct rte_mbuf_ext_shared_info {
  * @param o
  *   The offset into the data to calculate address from.
  */
-#define rte_pktmbuf_iova_offset(m, o) \
-	(rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
+#define rte_pktmbuf_iova_offset(m, o) (rte_iova_t)(rte_mbuf_iova_get(m) + (m)->data_off + (o))
 
 /**
  * A macro that returns the IO address that points to the start of the
diff --git a/lib/pipeline/rte_table_action.c b/lib/pipeline/rte_table_action.c
index b1310be565..06a8cdcf05 100644
--- a/lib/pipeline/rte_table_action.c
+++ b/lib/pipeline/rte_table_action.c
@@ -1929,7 +1929,7 @@ pkt_work_sym_crypto(struct rte_mbuf *mbuf, struct sym_crypto_data *data,
 
 	op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
 	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
-	op->phys_addr = mbuf->buf_iova + cfg->op_offset - sizeof(*mbuf);
+	op->phys_addr = rte_mbuf_iova_get(mbuf) + cfg->op_offset - sizeof(*mbuf);
 	op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
 	sym->m_src = mbuf;
 	sym->m_dst = NULL;
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index eeeda681cc..ef211ed519 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -971,7 +971,7 @@ restore_mbuf(struct rte_mbuf *m)
 		/* start of buffer is after mbuf structure and priv data */
 
 		m->buf_addr = (char *)m + mbuf_size;
-		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 		m = m->next;
 	}
 }
diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
index 54946f46d9..3077a21ae8 100644
--- a/lib/vhost/vhost_crypto.c
+++ b/lib/vhost/vhost_crypto.c
@@ -823,11 +823,10 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_src->data_len = cipher->para.src_data_len;
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				cipher->para.src_data_len);
+		rte_mbuf_iova_set(m_src,
+				  gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.src_data_len));
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 ||
-				m_src->buf_addr == NULL)) {
+		if (unlikely(rte_mbuf_iova_get(m_src) == 0 || m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -867,10 +866,10 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, cipher->para.dst_data_len);
+		rte_mbuf_iova_set(m_dst,
+				  gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.dst_data_len));
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(rte_mbuf_iova_get(m_dst) == 0 || m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -981,10 +980,10 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 		m_src->data_len = chain->para.src_data_len;
 		m_dst->data_len = chain->para.dst_data_len;
 
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				chain->para.src_data_len);
+		rte_mbuf_iova_set(m_src,
+				  gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.src_data_len));
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
+		if (unlikely(rte_mbuf_iova_get(m_src) == 0 || m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -1024,10 +1023,10 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, chain->para.dst_data_len);
+		rte_mbuf_iova_set(m_dst,
+				  gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.dst_data_len));
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(rte_mbuf_iova_get(m_dst) == 0 || m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v4 2/7] test/dma: use API to get mbuf data physical address
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
  2022-10-07 19:30                   ` [PATCH v4 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
@ 2022-10-07 19:30                   ` Shijith Thotton
  2022-10-07 20:17                     ` Olivier Matz
  2022-10-07 19:30                   ` [PATCH v4 3/7] build: add meson option to configure IOVA mode as PA Shijith Thotton
                                     ` (6 subsequent siblings)
  8 siblings, 1 reply; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 19:30 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, ferruh.yigit, pbhagavatula,
	Chengwen Feng, Kevin Laatz

Used rte_mbuf_data_iova API to get the physical address of mbuf data.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 app/test/test_dmadev.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 9e8e101f40..fe62e98af8 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -110,8 +110,8 @@ do_multi_copies(int16_t dev_id, uint16_t vchan,
 		for (j = 0; j < COPY_LEN/sizeof(uint64_t); j++)
 			src_data[j] = rte_rand();
 
-		if (rte_dma_copy(dev_id, vchan, srcs[i]->buf_iova + srcs[i]->data_off,
-				dsts[i]->buf_iova + dsts[i]->data_off, COPY_LEN, 0) != id_count++)
+		if (rte_dma_copy(dev_id, vchan, rte_mbuf_data_iova(srcs[i]),
+				 rte_mbuf_data_iova(dsts[i]), COPY_LEN, 0) != id_count++)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 	}
 	rte_dma_submit(dev_id, vchan);
@@ -317,9 +317,8 @@ test_failure_in_full_burst(int16_t dev_id, uint16_t vchan, bool fence,
 	rte_dma_stats_get(dev_id, vchan, &baseline); /* get a baseline set of stats */
 	for (i = 0; i < COMP_BURST_SZ; i++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(i == fail_idx ? 0 : (srcs[i]->buf_iova + srcs[i]->data_off)),
-				dsts[i]->buf_iova + dsts[i]->data_off,
-				COPY_LEN, OPT_FENCE(i));
+				      (i == fail_idx ? 0 : rte_mbuf_data_iova(srcs[i])),
+				      rte_mbuf_data_iova(dsts[i]), COPY_LEN, OPT_FENCE(i));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 		if (i == fail_idx)
@@ -407,9 +406,8 @@ test_individual_status_query_with_failure(int16_t dev_id, uint16_t vchan, bool f
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, OPT_FENCE(j));
+				      (j == fail_idx ? 0 : rte_mbuf_data_iova(srcs[j])),
+				      rte_mbuf_data_iova(dsts[j]), COPY_LEN, OPT_FENCE(j));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -470,9 +468,8 @@ test_single_item_status_query_with_failure(int16_t dev_id, uint16_t vchan,
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+				      (j == fail_idx ? 0 : rte_mbuf_data_iova(srcs[j])),
+				      rte_mbuf_data_iova(dsts[j]), COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -529,15 +526,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in one go */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_mbuf_data_iova(srcs[j]);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_mbuf_data_iova(dsts[j]),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
@@ -565,15 +561,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in bursts, but getting errors one at a time */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_mbuf_data_iova(srcs[j]);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_mbuf_data_iova(dsts[j]),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v4 3/7] build: add meson option to configure IOVA mode as PA
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
  2022-10-07 19:30                   ` [PATCH v4 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
  2022-10-07 19:30                   ` [PATCH v4 2/7] test/dma: use API to get mbuf data " Shijith Thotton
@ 2022-10-07 19:30                   ` Shijith Thotton
  2022-10-07 19:30                   ` [PATCH v4 4/7] mbuf: add second dynamic field member Shijith Thotton
                                     ` (5 subsequent siblings)
  8 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 19:30 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, ferruh.yigit, pbhagavatula,
	Dongdong Liu, Yisen Zhuang

IOVA mode in DPDK is either PA or VA. The new build option
enable_iova_as_pa configures the mode to PA at compile time. By default,
this option is enabled. If the options is disabled, only drivers which
supports it are enabled during build. Supported driver can set the flag
pmd_supports_disable_iova_as_pa in its build file.

mbuf structure holds the physical (PA) and virtual address (VA) of a
buffer. if IOVA as PA is disabled at compile time, PA field (buf_iova)
of mbuf is redundant as it is the same as VA and is replaced by a dummy
field.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 app/test/test_mbuf.c         | 12 +++++++-----
 config/meson.build           |  1 +
 drivers/meson.build          |  6 ++++++
 drivers/net/hns3/meson.build |  6 ++++++
 lib/eal/linux/eal.c          |  6 ++++++
 lib/mbuf/rte_mbuf.c          |  2 +-
 lib/mbuf/rte_mbuf.h          |  9 +++++++++
 lib/mbuf/rte_mbuf_core.h     |  6 ++++++
 lib/meson.build              |  3 +++
 meson_options.txt            |  2 ++
 10 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index 22e45e66c1..2d66786ace 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -1232,11 +1232,13 @@ test_failing_mbuf_sanity_check(struct rte_mempool *pktmbuf_pool)
 		return -1;
 	}
 
-	badbuf = *buf;
-	rte_mbuf_iova_set(&badbuf, 0);
-	if (verify_mbuf_check_panics(&badbuf)) {
-		printf("Error with bad-physaddr mbuf test\n");
-		return -1;
+	if (RTE_IOVA_AS_PA) {
+		badbuf = *buf;
+		rte_mbuf_iova_set(&badbuf, 0);
+		if (verify_mbuf_check_panics(&badbuf)) {
+			printf("Error with bad-physaddr mbuf test\n");
+			return -1;
+		}
 	}
 
 	badbuf = *buf;
diff --git a/config/meson.build b/config/meson.build
index 7f7b6c92fd..0fc209db01 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -309,6 +309,7 @@ endif
 if get_option('mbuf_refcnt_atomic')
     dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
 endif
+dpdk_conf.set10('RTE_IOVA_AS_PA', get_option('enable_iova_as_pa'))
 
 compile_time_cpuflags = []
 subdir(arch_subdir)
diff --git a/drivers/meson.build b/drivers/meson.build
index f6ba5ba4fb..2a29c210b5 100644
--- a/drivers/meson.build
+++ b/drivers/meson.build
@@ -106,6 +106,7 @@ foreach subpath:subdirs
         ext_deps = []
         pkgconfig_extra_libs = []
         testpmd_sources = []
+        pmd_supports_disable_iova_as_pa = false
 
         if not enable_drivers.contains(drv_path)
             build = false
@@ -123,6 +124,11 @@ foreach subpath:subdirs
             # pull in driver directory which should update all the local variables
             subdir(drv_path)
 
+            if dpdk_conf.get('RTE_IOVA_AS_PA') == 0 and not pmd_supports_disable_iova_as_pa and not always_enable.contains(drv_path)
+                build = false
+                reason = 'driver does not support disabling IOVA as PA mode'
+            endif
+
             # get dependency objs from strings
             shared_deps = ext_deps
             static_deps = ext_deps
diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build
index f2aede94ed..39d426f232 100644
--- a/drivers/net/hns3/meson.build
+++ b/drivers/net/hns3/meson.build
@@ -13,6 +13,12 @@ if arch_subdir != 'x86' and arch_subdir != 'arm' or not dpdk_conf.get('RTE_ARCH_
     subdir_done()
 endif
 
+if dpdk_conf.get('RTE_IOVA_AS_PA') == 0
+    build = false
+    reason = 'driver does not support disabling IOVA as PA mode'
+    subdir_done()
+endif
+
 sources = files(
         'hns3_cmd.c',
         'hns3_dcb.c',
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 46bf52cef0..a6eb45c65a 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1128,6 +1128,12 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	if (rte_eal_iova_mode() == RTE_IOVA_PA && !RTE_IOVA_AS_PA) {
+		rte_eal_init_alert("Cannot use IOVA as 'PA' as it is disabled during build");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
 	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
 		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
 
diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
index 16f6ed6731..cfd8062f1e 100644
--- a/lib/mbuf/rte_mbuf.c
+++ b/lib/mbuf/rte_mbuf.c
@@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header,
 		*reason = "bad mbuf pool";
 		return -1;
 	}
-	if (rte_mbuf_iova_get(m) == 0) {
+	if (RTE_IOVA_AS_PA && rte_mbuf_iova_get(m) == 0) {
 		*reason = "bad IO addr";
 		return -1;
 	}
diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
index c552dfbcac..481e1ec326 100644
--- a/lib/mbuf/rte_mbuf.h
+++ b/lib/mbuf/rte_mbuf.h
@@ -141,7 +141,11 @@ static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 static inline rte_iova_t
 rte_mbuf_iova_get(const struct rte_mbuf *m)
 {
+#if RTE_IOVA_AS_PA
 	return m->buf_iova;
+#else
+	return (rte_iova_t)m->buf_addr;
+#endif
 }
 
 /**
@@ -150,7 +154,12 @@ rte_mbuf_iova_get(const struct rte_mbuf *m)
 static inline void
 rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
 {
+#if RTE_IOVA_AS_PA
 	m->buf_iova = iova;
+#else
+	RTE_SET_USED(m);
+	RTE_SET_USED(iova);
+#endif
 }
 
 /**
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 51a12a1fb9..91c2211b44 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -467,13 +467,19 @@ struct rte_mbuf {
 	RTE_MARKER cacheline0;
 
 	void *buf_addr;           /**< Virtual address of segment buffer. */
+#if RTE_IOVA_AS_PA
 	/**
 	 * Physical address of segment buffer.
+	 * This field is undefined if the build is configured to use only
+	 * virtual address as IOVA (i.e. RTE_IOVA_AS_PA is 0).
 	 * Force alignment to 8-bytes, so as to ensure we have the exact
 	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
 	 * working on vector drivers easier.
 	 */
 	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+#else
+	uint64_t dummy;
+#endif
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
 	RTE_MARKER64 rearm_data;
diff --git a/lib/meson.build b/lib/meson.build
index c648f7d800..c071a6c8e0 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -88,6 +88,9 @@ optional_libs = [
 disabled_libs = []
 opt_disabled_libs = run_command(list_dir_globs, get_option('disable_libs'),
         check: true).stdout().split()
+if dpdk_conf.get('RTE_IOVA_AS_PA') == 0
+    opt_disabled_libs += ['kni']
+endif
 foreach l:opt_disabled_libs
     if not optional_libs.contains(l)
         warning('Cannot disable mandatory library "@0@"'.format(l))
diff --git a/meson_options.txt b/meson_options.txt
index 7c220ad68d..f6880410e2 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -44,6 +44,8 @@ option('platform', type: 'string', value: 'native', description:
        'Platform to build, either "native", "generic" or a SoC. Please refer to the Linux build guide for more information.')
 option('enable_trace_fp', type: 'boolean', value: false, description:
        'enable fast path trace points.')
+option('enable_iova_as_pa', type: 'boolean', value: true, description:
+       'Enable or disable support for IOVA as PA mode. Disabling this option removes the buf_iova field of mbuf.')
 option('tests', type: 'boolean', value: true, description:
        'build unit tests')
 option('use_hpet', type: 'boolean', value: false, description:
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v4 4/7] mbuf: add second dynamic field member
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
                                     ` (2 preceding siblings ...)
  2022-10-07 19:30                   ` [PATCH v4 3/7] build: add meson option to configure IOVA mode as PA Shijith Thotton
@ 2022-10-07 19:30                   ` Shijith Thotton
  2022-10-07 19:30                   ` [PATCH v4 5/7] lib: move mbuf next pointer to first cache line Shijith Thotton
                                     ` (4 subsequent siblings)
  8 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 19:30 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, ferruh.yigit, pbhagavatula

If IOVA as PA is disabled during build, mbuf physical address field is
undefined. This space is used to add the second dynamic field.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 lib/mbuf/rte_mbuf_core.h | 6 +++++-
 lib/mbuf/rte_mbuf_dyn.c  | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 91c2211b44..dc6c54015e 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -478,7 +478,11 @@ struct rte_mbuf {
 	 */
 	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
 #else
-	uint64_t dummy;
+	/**
+	 * Reserved for dynamic field in builds where physical address
+	 * field is undefined.
+	 */
+	uint64_t dynfield2;
 #endif
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
diff --git a/lib/mbuf/rte_mbuf_dyn.c b/lib/mbuf/rte_mbuf_dyn.c
index 4ae79383b5..35839e938c 100644
--- a/lib/mbuf/rte_mbuf_dyn.c
+++ b/lib/mbuf/rte_mbuf_dyn.c
@@ -128,6 +128,9 @@ init_shared_mem(void)
 		 */
 		memset(shm, 0, sizeof(*shm));
 		mark_free(dynfield1);
+#if !RTE_IOVA_AS_PA
+		mark_free(dynfield2);
+#endif
 
 		/* init free_flags */
 		for (mask = RTE_MBUF_F_FIRST_FREE; mask <= RTE_MBUF_F_LAST_FREE; mask <<= 1)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v4 5/7] lib: move mbuf next pointer to first cache line
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
                                     ` (3 preceding siblings ...)
  2022-10-07 19:30                   ` [PATCH v4 4/7] mbuf: add second dynamic field member Shijith Thotton
@ 2022-10-07 19:30                   ` Shijith Thotton
  2022-10-07 19:30                   ` [PATCH v4 6/7] drivers: mark cnxk PMDs work with IOVA as PA disabled Shijith Thotton
                                     ` (3 subsequent siblings)
  8 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 19:30 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, ferruh.yigit, pbhagavatula

Swapped position of mbuf next pointer and second dynamic field (dynfield2)
if the build is configured to disable IOVA as PA. This is to move the
mbuf next pointer to first cache line.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 doc/guides/rel_notes/release_22_11.rst |  3 +++
 lib/mbuf/rte_mbuf_core.h               | 19 ++++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/doc/guides/rel_notes/release_22_11.rst b/doc/guides/rel_notes/release_22_11.rst
index 0b4740abd1..006d1f5988 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -269,6 +269,9 @@ ABI Changes
 * eventdev: Added ``weight`` and ``affinity`` fields
   to ``rte_event_queue_conf`` structure.
 
+* mbuf: Replaced ``buf_iova`` field with ``next`` field and added a new field
+  ``dynfield2`` at its place in second cacheline if ``RTE_IOVA_AS_PA`` is 0.
+
 
 Known Issues
 ------------
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index dc6c54015e..37d3fcc3b8 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -479,10 +479,11 @@ struct rte_mbuf {
 	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
 #else
 	/**
-	 * Reserved for dynamic field in builds where physical address
-	 * field is undefined.
+	 * Next segment of scattered packet.
+	 * This field is valid when physical address field is undefined.
+	 * Otherwise next pointer in the second cache line will be used.
 	 */
-	uint64_t dynfield2;
+	struct rte_mbuf *next;
 #endif
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
@@ -599,11 +600,19 @@ struct rte_mbuf {
 	/* second cache line - fields only used in slow path or on TX */
 	RTE_MARKER cacheline1 __rte_cache_min_aligned;
 
+#if RTE_IOVA_AS_PA
 	/**
-	 * Next segment of scattered packet. Must be NULL in the last segment or
-	 * in case of non-segmented packet.
+	 * Next segment of scattered packet. Must be NULL in the last
+	 * segment or in case of non-segmented packet.
 	 */
 	struct rte_mbuf *next;
+#else
+	/**
+	 * Reserved for dynamic field when the next pointer is in first
+	 * cache line (i.e. RTE_IOVA_AS_PA is 0).
+	 */
+	uint64_t dynfield2;
+#endif
 
 	/* fields to support TX offloads */
 	RTE_STD_C11
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v4 6/7] drivers: mark cnxk PMDs work with IOVA as PA disabled
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
                                     ` (4 preceding siblings ...)
  2022-10-07 19:30                   ` [PATCH v4 5/7] lib: move mbuf next pointer to first cache line Shijith Thotton
@ 2022-10-07 19:30                   ` Shijith Thotton
  2022-10-07 19:30                   ` [PATCH v4 7/7] drivers: mark software " Shijith Thotton
                                     ` (2 subsequent siblings)
  8 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 19:30 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, ferruh.yigit, pbhagavatula,
	Ruifeng Wang, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Ankur Dwivedi, Anoob Joseph, Tejasree Kondoj,
	Radha Mohan Chintakuntla, Veerasenareddy Burru,
	Ashwin Sekhar T K, Jakub Palider, Tomasz Duszynski

Enabled the flag pmd_supports_disable_iova_as_pa in cnxk driver build
files as they work with IOVA as VA. Updated cn9k and cn10k soc build
configurations to disable the IOVA as PA build by default.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 config/arm/meson.build                   |  8 +++-
 doc/guides/platform/cnxk.rst             |  3 +-
 drivers/common/cnxk/meson.build          |  1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
 drivers/crypto/cnxk/meson.build          |  2 +
 drivers/dma/cnxk/meson.build             |  1 +
 drivers/event/cnxk/meson.build           |  1 +
 drivers/mempool/cnxk/meson.build         |  1 +
 drivers/net/cnxk/cn10k_ethdev.c          |  4 +-
 drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
 drivers/net/cnxk/cn9k_ethdev.c           |  4 +-
 drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
 drivers/net/cnxk/cnxk_ethdev.h           |  1 -
 drivers/net/cnxk/meson.build             |  1 +
 drivers/raw/cnxk_bphy/meson.build        |  1 +
 drivers/raw/cnxk_gpio/meson.build        |  1 +
 17 files changed, 56 insertions(+), 89 deletions(-)

diff --git a/config/arm/meson.build b/config/arm/meson.build
index 9f1636e0d5..6f55a36b56 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -294,7 +294,8 @@ soc_cn10k = {
     'flags': [
         ['RTE_MAX_LCORE', 24],
         ['RTE_MAX_NUMA_NODES', 1],
-        ['RTE_MEMPOOL_ALIGN', 128]
+        ['RTE_MEMPOOL_ALIGN', 128],
+        ['RTE_IOVA_AS_PA', 0]
     ],
     'part_number': '0xd49',
     'extra_march_features': ['crypto'],
@@ -370,7 +371,10 @@ soc_cn9k = {
     'description': 'Marvell OCTEON 9',
     'implementer': '0x43',
     'part_number': '0xb2',
-    'numa': false
+    'numa': false,
+    'flags': [
+        ['RTE_IOVA_AS_PA', 0]
+    ]
 }
 
 soc_stingray = {
diff --git a/doc/guides/platform/cnxk.rst b/doc/guides/platform/cnxk.rst
index 97b2be5c37..d0fdf9011d 100644
--- a/doc/guides/platform/cnxk.rst
+++ b/doc/guides/platform/cnxk.rst
@@ -574,7 +574,8 @@ Compile DPDK
 ------------
 
 DPDK may be compiled either natively on OCTEON CN9K/CN10K platform or cross-compiled on
-an x86 based platform.
+an x86 based platform. Meson build option ``enable_iova_as_pa`` is disabled on cnxk
+platforms and only PMDs supporting this option are enabled on cnxk platform builds.
 
 Native Compilation
 ~~~~~~~~~~~~~~~~~~
diff --git a/drivers/common/cnxk/meson.build b/drivers/common/cnxk/meson.build
index 127fcbcdc5..849735921c 100644
--- a/drivers/common/cnxk/meson.build
+++ b/drivers/common/cnxk/meson.build
@@ -87,3 +87,4 @@ sources += files('cnxk_telemetry_bphy.c',
 )
 
 deps += ['bus_pci', 'net', 'telemetry']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
index e220863799..21502e0eb2 100644
--- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
@@ -86,7 +86,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op *cop,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
@@ -103,7 +103,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn10k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
index e469596756..8b68e4c728 100644
--- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
@@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
+	inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->w7.u64 = sa->inst.w7;
 }
 #endif /* __CN9K_IPSEC_LA_OPS_H__ */
diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build
index 8db861f908..a5acabab2b 100644
--- a/drivers/crypto/cnxk/meson.build
+++ b/drivers/crypto/cnxk/meson.build
@@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug')
 else
     cflags += [ '-ULA_IPSEC_DEBUG','-UCNXK_CRYPTODEV_DEBUG' ]
 endif
+
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
index d4be4ee860..252e5ff78b 100644
--- a/drivers/dma/cnxk/meson.build
+++ b/drivers/dma/cnxk/meson.build
@@ -3,3 +3,4 @@
 
 deps += ['bus_pci', 'common_cnxk', 'dmadev']
 sources = files('cnxk_dmadev.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index b27bae7b12..aa42ab3a90 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -479,3 +479,4 @@ foreach flag: extra_flags
 endforeach
 
 deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/mempool/cnxk/meson.build b/drivers/mempool/cnxk/meson.build
index d5d1978569..d8bcc41ca0 100644
--- a/drivers/mempool/cnxk/meson.build
+++ b/drivers/mempool/cnxk/meson.build
@@ -17,3 +17,4 @@ sources = files(
 )
 
 deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
index e8faeebe1f..0b33b3a496 100644
--- a/drivers/net/cnxk/cn10k_ethdev.c
+++ b/drivers/net/cnxk/cn10k_ethdev.c
@@ -67,9 +67,9 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	RTE_BUILD_BUG_ON(RTE_MBUF_OUTL2_LEN_BITS != 7);
 	RTE_BUILD_BUG_ON(RTE_MBUF_OUTL3_LEN_BITS != 9);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
-			 offsetof(struct rte_mbuf, buf_iova) + 8);
+			 offsetof(struct rte_mbuf, buf_addr) + 16);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
-			 offsetof(struct rte_mbuf, buf_iova) + 16);
+			 offsetof(struct rte_mbuf, buf_addr) + 24);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
 			 offsetof(struct rte_mbuf, ol_flags) + 12);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, tx_offload) !=
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index b87fb29951..3e7494a6b2 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -1900,14 +1900,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		mbuf2 = (uint64_t *)tx_pkts[2];
 		mbuf3 = (uint64_t *)tx_pkts[3];
 
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, buf_iova));
 		/*
 		 * Get mbuf's, olflags, iova, pktlen, dataoff
 		 * dataoff_iovaX.D[0] = iova,
@@ -1915,28 +1907,24 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		 * len_olflagsX.D[0] = ol_flags,
 		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
 		 */
-		dataoff_iova0 = vld1q_u64(mbuf0);
-		len_olflags0 = vld1q_u64(mbuf0 + 2);
-		dataoff_iova1 = vld1q_u64(mbuf1);
-		len_olflags1 = vld1q_u64(mbuf1 + 2);
-		dataoff_iova2 = vld1q_u64(mbuf2);
-		len_olflags2 = vld1q_u64(mbuf2 + 2);
-		dataoff_iova3 = vld1q_u64(mbuf3);
-		len_olflags3 = vld1q_u64(mbuf3 + 2);
+		dataoff_iova0 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
+		len_olflags0 = vld1q_u64(mbuf0 + 3);
+		dataoff_iova1 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf1), 1);
+		len_olflags1 = vld1q_u64(mbuf1 + 3);
+		dataoff_iova2 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf2), 1);
+		len_olflags2 = vld1q_u64(mbuf2 + 3);
+		dataoff_iova3 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf3), 1);
+		len_olflags3 = vld1q_u64(mbuf3 + 3);
 
 		/* Move mbufs to point pool */
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
+		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
+		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
+		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
+		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
 
 		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
 			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
@@ -1986,17 +1974,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
 		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
 
-		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
-		const uint64x2_t and_mask0 = {
-			0xFFFFFFFFFFFFFFFF,
-			0x000000000000FFFF,
-		};
-
-		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
-		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
-		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
-		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
-
 		/*
 		 * Pick only 16 bits of pktlen preset at bits 63:32
 		 * and place them at bits 15:0.
diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c
index 4fb0e2d94e..3b702d9696 100644
--- a/drivers/net/cnxk/cn9k_ethdev.c
+++ b/drivers/net/cnxk/cn9k_ethdev.c
@@ -67,9 +67,9 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	RTE_BUILD_BUG_ON(RTE_MBUF_OUTL2_LEN_BITS != 7);
 	RTE_BUILD_BUG_ON(RTE_MBUF_OUTL3_LEN_BITS != 9);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
-			 offsetof(struct rte_mbuf, buf_iova) + 8);
+			 offsetof(struct rte_mbuf, buf_addr) + 16);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
-			 offsetof(struct rte_mbuf, buf_iova) + 16);
+			 offsetof(struct rte_mbuf, buf_addr) + 24);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
 			 offsetof(struct rte_mbuf, ol_flags) + 12);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, tx_offload) !=
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 6ce81f5c96..f5d99ccb5a 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -1005,14 +1005,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		mbuf2 = (uint64_t *)tx_pkts[2];
 		mbuf3 = (uint64_t *)tx_pkts[3];
 
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, buf_iova));
 		/*
 		 * Get mbuf's, olflags, iova, pktlen, dataoff
 		 * dataoff_iovaX.D[0] = iova,
@@ -1020,28 +1012,24 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		 * len_olflagsX.D[0] = ol_flags,
 		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
 		 */
-		dataoff_iova0 = vld1q_u64(mbuf0);
-		len_olflags0 = vld1q_u64(mbuf0 + 2);
-		dataoff_iova1 = vld1q_u64(mbuf1);
-		len_olflags1 = vld1q_u64(mbuf1 + 2);
-		dataoff_iova2 = vld1q_u64(mbuf2);
-		len_olflags2 = vld1q_u64(mbuf2 + 2);
-		dataoff_iova3 = vld1q_u64(mbuf3);
-		len_olflags3 = vld1q_u64(mbuf3 + 2);
+		dataoff_iova0 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
+		len_olflags0 = vld1q_u64(mbuf0 + 3);
+		dataoff_iova1 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off, vld1q_u64(mbuf1), 1);
+		len_olflags1 = vld1q_u64(mbuf1 + 3);
+		dataoff_iova2 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off, vld1q_u64(mbuf2), 1);
+		len_olflags2 = vld1q_u64(mbuf2 + 3);
+		dataoff_iova3 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off, vld1q_u64(mbuf3), 1);
+		len_olflags3 = vld1q_u64(mbuf3 + 3);
 
 		/* Move mbufs to point pool */
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
+		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
+		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
+		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
+		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
 
 		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
 			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
@@ -1091,17 +1079,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
 		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
 
-		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
-		const uint64x2_t and_mask0 = {
-			0xFFFFFFFFFFFFFFFF,
-			0x000000000000FFFF,
-		};
-
-		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
-		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
-		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
-		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
-
 		/*
 		 * Pick only 16 bits of pktlen preset at bits 63:32
 		 * and place them at bits 15:0.
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index c09e9bff8e..db8c9f59d3 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -697,7 +697,6 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index f347e98fce..5efb2000cf 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -194,3 +194,4 @@ foreach flag: extra_flags
 endforeach
 
 headers = files('rte_pmd_cnxk.h')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/raw/cnxk_bphy/meson.build b/drivers/raw/cnxk_bphy/meson.build
index 14147feaf4..ffb0ee6b7e 100644
--- a/drivers/raw/cnxk_bphy/meson.build
+++ b/drivers/raw/cnxk_bphy/meson.build
@@ -10,3 +10,4 @@ sources = files(
         'cnxk_bphy_irq.c',
 )
 headers = files('rte_pmd_bphy.h')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/raw/cnxk_gpio/meson.build b/drivers/raw/cnxk_gpio/meson.build
index a75a5b9084..f52a7be9eb 100644
--- a/drivers/raw/cnxk_gpio/meson.build
+++ b/drivers/raw/cnxk_gpio/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'cnxk_gpio_selftest.c',
 )
 headers = files('rte_pmd_cnxk_gpio.h')
+pmd_supports_disable_iova_as_pa = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v4 7/7] drivers: mark software PMDs work with IOVA as PA disabled
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
                                     ` (5 preceding siblings ...)
  2022-10-07 19:30                   ` [PATCH v4 6/7] drivers: mark cnxk PMDs work with IOVA as PA disabled Shijith Thotton
@ 2022-10-07 19:30                   ` Shijith Thotton
  2022-10-07 20:19                   ` [PATCH v4 0/7] mbuf dynamic field expansion Olivier Matz
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
  8 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 19:30 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, ferruh.yigit, pbhagavatula,
	Ruifeng Wang, Kai Ji, Pablo de Lara, Chengwen Feng, Kevin Laatz,
	Mattias Rönnblom, Liang Ma, Peter Mccarthy,
	Harry van Haaren, Artem V. Andreev, Andrew Rybchenko,
	John W. Linville, Ciara Loftus, Qi Zhang, Chas Williams,
	Min Hu (Connor),
	Gaetan Rivet, Jakub Grajciar, Tetsuya Mukawa, Sachin Saxena,
	Hemant Agrawal

Enabled software PMDs in IOVA as PA disabled build as they work with
IOVA as VA.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
 drivers/crypto/armv8/meson.build    | 1 +
 drivers/crypto/ipsec_mb/meson.build | 1 +
 drivers/crypto/null/meson.build     | 1 +
 drivers/crypto/openssl/meson.build  | 1 +
 drivers/dma/skeleton/meson.build    | 1 +
 drivers/event/dsw/meson.build       | 1 +
 drivers/event/opdl/meson.build      | 1 +
 drivers/event/skeleton/meson.build  | 1 +
 drivers/event/sw/meson.build        | 1 +
 drivers/mempool/bucket/meson.build  | 1 +
 drivers/mempool/ring/meson.build    | 1 +
 drivers/mempool/stack/meson.build   | 1 +
 drivers/net/af_packet/meson.build   | 1 +
 drivers/net/af_xdp/meson.build      | 2 ++
 drivers/net/bonding/meson.build     | 1 +
 drivers/net/failsafe/meson.build    | 1 +
 drivers/net/memif/meson.build       | 1 +
 drivers/net/null/meson.build        | 1 +
 drivers/net/pcap/meson.build        | 1 +
 drivers/net/ring/meson.build        | 1 +
 drivers/net/tap/meson.build         | 1 +
 drivers/raw/skeleton/meson.build    | 1 +
 22 files changed, 23 insertions(+)

diff --git a/drivers/crypto/armv8/meson.build b/drivers/crypto/armv8/meson.build
index 5effba8bbc..700fb80eb2 100644
--- a/drivers/crypto/armv8/meson.build
+++ b/drivers/crypto/armv8/meson.build
@@ -17,3 +17,4 @@ endif
 ext_deps += dep
 deps += ['bus_vdev']
 sources = files('rte_armv8_pmd.c', 'rte_armv8_pmd_ops.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/crypto/ipsec_mb/meson.build b/drivers/crypto/ipsec_mb/meson.build
index 64fc22611d..ec147d2110 100644
--- a/drivers/crypto/ipsec_mb/meson.build
+++ b/drivers/crypto/ipsec_mb/meson.build
@@ -41,3 +41,4 @@ sources = files(
         'pmd_zuc.c',
 )
 deps += ['bus_vdev', 'net', 'security']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/crypto/null/meson.build b/drivers/crypto/null/meson.build
index acc16e7d81..59a7508f18 100644
--- a/drivers/crypto/null/meson.build
+++ b/drivers/crypto/null/meson.build
@@ -9,3 +9,4 @@ endif
 
 deps += 'bus_vdev'
 sources = files('null_crypto_pmd.c', 'null_crypto_pmd_ops.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/crypto/openssl/meson.build b/drivers/crypto/openssl/meson.build
index cd962da1d6..d165c32ae8 100644
--- a/drivers/crypto/openssl/meson.build
+++ b/drivers/crypto/openssl/meson.build
@@ -15,3 +15,4 @@ endif
 deps += 'bus_vdev'
 sources = files('rte_openssl_pmd.c', 'rte_openssl_pmd_ops.c')
 ext_deps += dep
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/dma/skeleton/meson.build b/drivers/dma/skeleton/meson.build
index 8871b80956..2b0422ce61 100644
--- a/drivers/dma/skeleton/meson.build
+++ b/drivers/dma/skeleton/meson.build
@@ -5,3 +5,4 @@ deps += ['dmadev', 'kvargs', 'ring', 'bus_vdev']
 sources = files(
         'skeleton_dmadev.c',
 )
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/dsw/meson.build b/drivers/event/dsw/meson.build
index 2df0fac4ff..e6808c0f71 100644
--- a/drivers/event/dsw/meson.build
+++ b/drivers/event/dsw/meson.build
@@ -6,3 +6,4 @@ if cc.has_argument('-Wno-format-nonliteral')
     cflags += '-Wno-format-nonliteral'
 endif
 sources = files('dsw_evdev.c', 'dsw_event.c', 'dsw_xstats.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/opdl/meson.build b/drivers/event/opdl/meson.build
index 786d2f4e82..7abef44609 100644
--- a/drivers/event/opdl/meson.build
+++ b/drivers/event/opdl/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'opdl_test.c',
 )
 deps += ['bus_vdev']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/skeleton/meson.build b/drivers/event/skeleton/meson.build
index acfe156532..fa6a5e0a9f 100644
--- a/drivers/event/skeleton/meson.build
+++ b/drivers/event/skeleton/meson.build
@@ -3,3 +3,4 @@
 
 sources = files('skeleton_eventdev.c')
 deps += ['bus_pci', 'bus_vdev']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/sw/meson.build b/drivers/event/sw/meson.build
index 6f81567efb..8d815dfa84 100644
--- a/drivers/event/sw/meson.build
+++ b/drivers/event/sw/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'sw_evdev.c',
 )
 deps += ['hash', 'bus_vdev']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/mempool/bucket/meson.build b/drivers/mempool/bucket/meson.build
index 0051b6ac3c..94c060904b 100644
--- a/drivers/mempool/bucket/meson.build
+++ b/drivers/mempool/bucket/meson.build
@@ -12,3 +12,4 @@ if is_windows
 endif
 
 sources = files('rte_mempool_bucket.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/mempool/ring/meson.build b/drivers/mempool/ring/meson.build
index a021e908cf..65d203d4b7 100644
--- a/drivers/mempool/ring/meson.build
+++ b/drivers/mempool/ring/meson.build
@@ -2,3 +2,4 @@
 # Copyright(c) 2017 Intel Corporation
 
 sources = files('rte_mempool_ring.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/mempool/stack/meson.build b/drivers/mempool/stack/meson.build
index 580dde79eb..961e90fc04 100644
--- a/drivers/mempool/stack/meson.build
+++ b/drivers/mempool/stack/meson.build
@@ -4,3 +4,4 @@
 sources = files('rte_mempool_stack.c')
 
 deps += ['stack']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/af_packet/meson.build b/drivers/net/af_packet/meson.build
index c014e9b61b..bab008d083 100644
--- a/drivers/net/af_packet/meson.build
+++ b/drivers/net/af_packet/meson.build
@@ -6,3 +6,4 @@ if not is_linux
     reason = 'only supported on Linux'
 endif
 sources = files('rte_eth_af_packet.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build
index 1e0de23705..7bbab52d8b 100644
--- a/drivers/net/af_xdp/meson.build
+++ b/drivers/net/af_xdp/meson.build
@@ -55,3 +55,5 @@ else
     build = false
     reason = 'missing header, "linux/if_xdp.h"'
 endif
+
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/bonding/meson.build b/drivers/net/bonding/meson.build
index 18ad7e21f3..29022712cb 100644
--- a/drivers/net/bonding/meson.build
+++ b/drivers/net/bonding/meson.build
@@ -22,3 +22,4 @@ deps += 'sched' # needed for rte_bitmap.h
 deps += ['ip_frag']
 
 headers = files('rte_eth_bond.h', 'rte_eth_bond_8023ad.h')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/failsafe/meson.build b/drivers/net/failsafe/meson.build
index b8e5bf70f8..bf8f791984 100644
--- a/drivers/net/failsafe/meson.build
+++ b/drivers/net/failsafe/meson.build
@@ -27,3 +27,4 @@ sources = files(
         'failsafe_ops.c',
         'failsafe_rxtx.c',
 )
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
index 680bc8631c..28416a982f 100644
--- a/drivers/net/memif/meson.build
+++ b/drivers/net/memif/meson.build
@@ -12,3 +12,4 @@ sources = files(
 )
 
 deps += ['hash']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/null/meson.build b/drivers/net/null/meson.build
index 0251578aab..4a483955a7 100644
--- a/drivers/net/null/meson.build
+++ b/drivers/net/null/meson.build
@@ -8,3 +8,4 @@ if is_windows
 endif
 
 sources = files('rte_eth_null.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/pcap/meson.build b/drivers/net/pcap/meson.build
index ed7864eb9d..a5a2971f0e 100644
--- a/drivers/net/pcap/meson.build
+++ b/drivers/net/pcap/meson.build
@@ -15,3 +15,4 @@ ext_deps += pcap_dep
 if is_windows
     ext_deps += cc.find_library('iphlpapi', required: true)
 endif
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/ring/meson.build b/drivers/net/ring/meson.build
index 0156b37aad..72792e26b0 100644
--- a/drivers/net/ring/meson.build
+++ b/drivers/net/ring/meson.build
@@ -9,3 +9,4 @@ endif
 
 sources = files('rte_eth_ring.c')
 headers = files('rte_eth_ring.h')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/tap/meson.build b/drivers/net/tap/meson.build
index c09713a67b..4c9a9eac2b 100644
--- a/drivers/net/tap/meson.build
+++ b/drivers/net/tap/meson.build
@@ -35,3 +35,4 @@ foreach arg:args
     config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
 endforeach
 configure_file(output : 'tap_autoconf.h', configuration : config)
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/raw/skeleton/meson.build b/drivers/raw/skeleton/meson.build
index 950a33cc20..bfb8fd8bcc 100644
--- a/drivers/raw/skeleton/meson.build
+++ b/drivers/raw/skeleton/meson.build
@@ -6,3 +6,4 @@ sources = files(
         'skeleton_rawdev.c',
         'skeleton_rawdev_test.c',
 )
+pmd_supports_disable_iova_as_pa = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v3 0/5] mbuf dynamic field expansion
  2022-10-07 13:50                 ` Thomas Monjalon
@ 2022-10-07 19:35                   ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 19:35 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, olivier.matz,
	stephen, david.marchand

>> This is a continuation of the discussions[1] to add mbuf physical address field to
>dynamic field.
>> Previous version was to add PA field to dynamic field area based on the EAL
>IOVA mode option. It was
>> deemed unsafe as some components could still use the PA field without
>checking IOVA mode and there
>> are drivers which need PA to work. One suggestion was to make the IOVA mode
>check at compile time so
>> that drivers which need PA can be disabled during build. This series adds this
>new meson build
>> options. Second patch adds mbuf PA field to dynamic field on such builds. Last
>two patches enable
>> Marvell cnxk PMDs and software PMDs in IOVA as VA build as they work without
>PA field.
>
>Shijith, in case it was not clear,
>we can accept this change only in -rc1 closing today,
>and we didn't receive the expected v4 yet.
>

Hi Thomas,

Sorry for the delay, I was not aware of this deadline. I have posted v4 now.
https://patchwork.dpdk.org/project/dpdk/list/?series=25039

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v4 1/7] mbuf: add API to get and set mbuf physical address
  2022-10-07 19:30                   ` [PATCH v4 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
@ 2022-10-07 20:16                     ` Olivier Matz
  2022-10-07 20:20                       ` [EXT] " Shijith Thotton
  0 siblings, 1 reply; 88+ messages in thread
From: Olivier Matz @ 2022-10-07 20:16 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, Honnappa.Nagarahalli, bruce.richardson, jerinj, mb, stephen,
	thomas, ferruh.yigit, pbhagavatula, Nicolas Chautru, Ciara Power,
	Konstantin Ananyev, Reshma Pattan, Cristian Dumitrescu,
	Maxime Coquelin, Chenbo Xia

Hi,

On Sat, Oct 08, 2022 at 01:00:23AM +0530, Shijith Thotton wrote:
> Added APIs rte_mbuf_iova_set and rte_mbuf_iova_get to set and get the
> physical address of an mbuf respectively. Updated applications and
> library to use the same.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---
>  app/test-bbdev/test_bbdev_perf.c         |  2 +-
>  app/test-crypto-perf/cperf_test_common.c |  5 ++---
>  app/test/test_bpf.c                      |  2 +-
>  app/test/test_mbuf.c                     |  2 +-
>  app/test/test_pcapng.c                   |  2 +-
>  lib/kni/rte_kni.c                        |  3 +--
>  lib/mbuf/rte_mbuf.c                      | 12 +++++-----
>  lib/mbuf/rte_mbuf.h                      | 28 +++++++++++++++++++-----
>  lib/mbuf/rte_mbuf_core.h                 |  3 +--
>  lib/pipeline/rte_table_action.c          |  2 +-
>  lib/vhost/vhost.h                        |  2 +-
>  lib/vhost/vhost_crypto.c                 | 25 ++++++++++-----------
>  12 files changed, 51 insertions(+), 37 deletions(-)
> 
> diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
> index 311e5d1a96..e7fbf71f6d 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -1002,7 +1002,7 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
>  					seg->length);
>  				memcpy(data, seg->addr, seg->length);
>  				m_head->buf_addr = data;
> -				m_head->buf_iova = rte_malloc_virt2iova(data);
> +				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
>  				m_head->data_off = 0;
>  				m_head->data_len = seg->length;
>  			} else {
> diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-perf/cperf_test_common.c
> index 00aadc9a47..27646cd619 100644
> --- a/app/test-crypto-perf/cperf_test_common.c
> +++ b/app/test-crypto-perf/cperf_test_common.c
> @@ -26,8 +26,7 @@ fill_single_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
>  	/* start of buffer is after mbuf structure and priv data */
>  	m->priv_size = 0;
>  	m->buf_addr = (char *)m + mbuf_hdr_size;
> -	m->buf_iova = rte_mempool_virt2iova(obj) +
> -		mbuf_offset + mbuf_hdr_size;
> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(obj) + mbuf_offset + mbuf_hdr_size);
>  	m->buf_len = segment_sz;
>  	m->data_len = data_len;
>  	m->pkt_len = data_len;
> @@ -58,7 +57,7 @@ fill_multi_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
>  		/* start of buffer is after mbuf structure and priv data */
>  		m->priv_size = 0;
>  		m->buf_addr = (char *)m + mbuf_hdr_size;
> -		m->buf_iova = next_seg_phys_addr;
> +		rte_mbuf_iova_set(m, next_seg_phys_addr);
>  		next_seg_phys_addr += mbuf_hdr_size + segment_sz;
>  		m->buf_len = segment_sz;
>  		m->data_len = data_len;
> diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
> index 97f500809e..f5af5e8a3f 100644
> --- a/app/test/test_bpf.c
> +++ b/app/test/test_bpf.c
> @@ -2600,7 +2600,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
>  	uint8_t *db;
>  
>  	mb->buf_addr = buf;
> -	mb->buf_iova = (uintptr_t)buf;
> +	rte_mbuf_iova_set(mb, (uintptr_t)buf);
>  	mb->buf_len = buf_len;
>  	rte_mbuf_refcnt_set(mb, 1);
>  
> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> index e09b2549ca..22e45e66c1 100644
> --- a/app/test/test_mbuf.c
> +++ b/app/test/test_mbuf.c
> @@ -1233,7 +1233,7 @@ test_failing_mbuf_sanity_check(struct rte_mempool *pktmbuf_pool)
>  	}
>  
>  	badbuf = *buf;
> -	badbuf.buf_iova = 0;
> +	rte_mbuf_iova_set(&badbuf, 0);
>  	if (verify_mbuf_check_panics(&badbuf)) {
>  		printf("Error with bad-physaddr mbuf test\n");
>  		return -1;
> diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
> index 320dacea34..abbf00f6da 100644
> --- a/app/test/test_pcapng.c
> +++ b/app/test/test_pcapng.c
> @@ -40,7 +40,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
>  	uint8_t *db;
>  
>  	mb->buf_addr = buf;
> -	mb->buf_iova = (uintptr_t)buf;
> +	rte_mbuf_iova_set(mb, (uintptr_t)buf);
>  	mb->buf_len = buf_len;
>  	rte_mbuf_refcnt_set(mb, 1);
>  
> diff --git a/lib/kni/rte_kni.c b/lib/kni/rte_kni.c
> index 7971c56bb4..3737a91de7 100644
> --- a/lib/kni/rte_kni.c
> +++ b/lib/kni/rte_kni.c
> @@ -357,8 +357,7 @@ static void *
>  va2pa(struct rte_mbuf *m)
>  {
>  	return (void *)((unsigned long)m -
> -			((unsigned long)m->buf_addr -
> -			 (unsigned long)m->buf_iova));
> +			((unsigned long)m->buf_addr - (unsigned long)rte_mbuf_iova_get(m)));
>  }
>  
>  static void *
> diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
> index a2307cebe6..16f6ed6731 100644
> --- a/lib/mbuf/rte_mbuf.c
> +++ b/lib/mbuf/rte_mbuf.c
> @@ -89,7 +89,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
>  	/* start of buffer is after mbuf structure and priv data */
>  	m->priv_size = priv_size;
>  	m->buf_addr = (char *)m + mbuf_size;
> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>  	m->buf_len = (uint16_t)buf_len;
>  
>  	/* keep some headroom between start of buffer and data */
> @@ -187,8 +187,8 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
>  	RTE_ASSERT(ctx->off + ext_mem->elt_size <= ext_mem->buf_len);
>  
>  	m->buf_addr = RTE_PTR_ADD(ext_mem->buf_ptr, ctx->off);
> -	m->buf_iova = ext_mem->buf_iova == RTE_BAD_IOVA ?
> -		      RTE_BAD_IOVA : (ext_mem->buf_iova + ctx->off);
> +	rte_mbuf_iova_set(m, ext_mem->buf_iova == RTE_BAD_IOVA ? RTE_BAD_IOVA :
> +								 (ext_mem->buf_iova + ctx->off));
>  
>  	ctx->off += ext_mem->elt_size;
>  	if (ctx->off + ext_mem->elt_size > ext_mem->buf_len) {
> @@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header,
>  		*reason = "bad mbuf pool";
>  		return -1;
>  	}
> -	if (m->buf_iova == 0) {
> +	if (rte_mbuf_iova_get(m) == 0) {
>  		*reason = "bad IO addr";
>  		return -1;
>  	}
> @@ -669,8 +669,8 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
>  
>  	__rte_mbuf_sanity_check(m, 1);
>  
> -	fprintf(f, "dump mbuf at %p, iova=%#"PRIx64", buf_len=%u\n",
> -		m, m->buf_iova, m->buf_len);
> +	fprintf(f, "dump mbuf at %p, iova=%#" PRIx64 ", buf_len=%u\n", m, rte_mbuf_iova_get(m),
> +		m->buf_len);
>  	fprintf(f, "  pkt_len=%u, ol_flags=%#"PRIx64", nb_segs=%u, port=%u",
>  		m->pkt_len, m->ol_flags, m->nb_segs, m->port);
>  
> diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
> index 9811e8c760..c552dfbcac 100644
> --- a/lib/mbuf/rte_mbuf.h
> +++ b/lib/mbuf/rte_mbuf.h
> @@ -135,6 +135,24 @@ rte_mbuf_prefetch_part2(struct rte_mbuf *m)
>  
>  static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
>  
> +/**
> + * Get the mbuf physical address.
> + */

Not blocking for the rc1, but the comment should be clarified.

"Get the IOVA address of the mbuf data buffer." looks more accurate
to me.

Also, even if it does not bring a big added-value, it's better to
have documentation for @param and @return.

> +static inline rte_iova_t
> +rte_mbuf_iova_get(const struct rte_mbuf *m)
> +{
> +	return m->buf_iova;
> +}
> +
> +/**
> + * Set the mbuf physical address.
> + */

Same

> +static inline void
> +rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
> +{
> +	m->buf_iova = iova;
> +}
> +
>  /**
>   * Return the IO address of the beginning of the mbuf data
>   *
> @@ -146,7 +164,7 @@ static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
>  static inline rte_iova_t
>  rte_mbuf_data_iova(const struct rte_mbuf *mb)
>  {
> -	return mb->buf_iova + mb->data_off;
> +	return rte_mbuf_iova_get(mb) + mb->data_off;
>  }
>  
>  /**
> @@ -164,7 +182,7 @@ rte_mbuf_data_iova(const struct rte_mbuf *mb)
>  static inline rte_iova_t
>  rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
>  {
> -	return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
> +	return rte_mbuf_iova_get(mb) + RTE_PKTMBUF_HEADROOM;
>  }
>  
>  /**
> @@ -1056,7 +1074,7 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
>  	RTE_ASSERT(shinfo->free_cb != NULL);
>  
>  	m->buf_addr = buf_addr;
> -	m->buf_iova = buf_iova;
> +	rte_mbuf_iova_set(m, buf_iova);
>  	m->buf_len = buf_len;
>  
>  	m->data_len = 0;
> @@ -1143,7 +1161,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
>  
>  	mi->data_off = m->data_off;
>  	mi->data_len = m->data_len;
> -	mi->buf_iova = m->buf_iova;
> +	rte_mbuf_iova_set(mi, rte_mbuf_iova_get(m));
>  	mi->buf_addr = m->buf_addr;
>  	mi->buf_len = m->buf_len;
>  
> @@ -1245,7 +1263,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
>  
>  	m->priv_size = priv_size;
>  	m->buf_addr = (char *)m + mbuf_size;
> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>  	m->buf_len = (uint16_t)buf_len;
>  	rte_pktmbuf_reset_headroom(m);
>  	m->data_len = 0;
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index e672d59b36..51a12a1fb9 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -736,8 +736,7 @@ struct rte_mbuf_ext_shared_info {
>   * @param o
>   *   The offset into the data to calculate address from.
>   */
> -#define rte_pktmbuf_iova_offset(m, o) \
> -	(rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
> +#define rte_pktmbuf_iova_offset(m, o) (rte_iova_t)(rte_mbuf_iova_get(m) + (m)->data_off + (o))
>  
>  /**
>   * A macro that returns the IO address that points to the start of the
> diff --git a/lib/pipeline/rte_table_action.c b/lib/pipeline/rte_table_action.c
> index b1310be565..06a8cdcf05 100644
> --- a/lib/pipeline/rte_table_action.c
> +++ b/lib/pipeline/rte_table_action.c
> @@ -1929,7 +1929,7 @@ pkt_work_sym_crypto(struct rte_mbuf *mbuf, struct sym_crypto_data *data,
>  
>  	op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
>  	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
> -	op->phys_addr = mbuf->buf_iova + cfg->op_offset - sizeof(*mbuf);
> +	op->phys_addr = rte_mbuf_iova_get(mbuf) + cfg->op_offset - sizeof(*mbuf);
>  	op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
>  	sym->m_src = mbuf;
>  	sym->m_dst = NULL;
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index eeeda681cc..ef211ed519 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -971,7 +971,7 @@ restore_mbuf(struct rte_mbuf *m)
>  		/* start of buffer is after mbuf structure and priv data */
>  
>  		m->buf_addr = (char *)m + mbuf_size;
> -		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
> +		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>  		m = m->next;
>  	}
>  }
> diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
> index 54946f46d9..3077a21ae8 100644
> --- a/lib/vhost/vhost_crypto.c
> +++ b/lib/vhost/vhost_crypto.c
> @@ -823,11 +823,10 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
>  	switch (vcrypto->option) {
>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>  		m_src->data_len = cipher->para.src_data_len;
> -		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
> -				cipher->para.src_data_len);
> +		rte_mbuf_iova_set(m_src,
> +				  gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.src_data_len));
>  		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
> -		if (unlikely(m_src->buf_iova == 0 ||
> -				m_src->buf_addr == NULL)) {
> +		if (unlikely(rte_mbuf_iova_get(m_src) == 0 || m_src->buf_addr == NULL)) {
>  			VC_LOG_ERR("zero_copy may fail due to cross page data");
>  			ret = VIRTIO_CRYPTO_ERR;
>  			goto error_exit;
> @@ -867,10 +866,10 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
>  
>  	switch (vcrypto->option) {
>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
> -		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
> -				desc->addr, cipher->para.dst_data_len);
> +		rte_mbuf_iova_set(m_dst,
> +				  gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.dst_data_len));
>  		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
> -		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
> +		if (unlikely(rte_mbuf_iova_get(m_dst) == 0 || m_dst->buf_addr == NULL)) {
>  			VC_LOG_ERR("zero_copy may fail due to cross page data");
>  			ret = VIRTIO_CRYPTO_ERR;
>  			goto error_exit;
> @@ -981,10 +980,10 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
>  		m_src->data_len = chain->para.src_data_len;
>  		m_dst->data_len = chain->para.dst_data_len;
>  
> -		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
> -				chain->para.src_data_len);
> +		rte_mbuf_iova_set(m_src,
> +				  gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.src_data_len));
>  		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
> -		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
> +		if (unlikely(rte_mbuf_iova_get(m_src) == 0 || m_src->buf_addr == NULL)) {
>  			VC_LOG_ERR("zero_copy may fail due to cross page data");
>  			ret = VIRTIO_CRYPTO_ERR;
>  			goto error_exit;
> @@ -1024,10 +1023,10 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
>  
>  	switch (vcrypto->option) {
>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
> -		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
> -				desc->addr, chain->para.dst_data_len);
> +		rte_mbuf_iova_set(m_dst,
> +				  gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.dst_data_len));
>  		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
> -		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
> +		if (unlikely(rte_mbuf_iova_get(m_dst) == 0 || m_dst->buf_addr == NULL)) {
>  			VC_LOG_ERR("zero_copy may fail due to cross page data");
>  			ret = VIRTIO_CRYPTO_ERR;
>  			goto error_exit;
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v4 2/7] test/dma: use API to get mbuf data physical address
  2022-10-07 19:30                   ` [PATCH v4 2/7] test/dma: use API to get mbuf data " Shijith Thotton
@ 2022-10-07 20:17                     ` Olivier Matz
  0 siblings, 0 replies; 88+ messages in thread
From: Olivier Matz @ 2022-10-07 20:17 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, Honnappa.Nagarahalli, bruce.richardson, jerinj, mb, stephen,
	thomas, ferruh.yigit, pbhagavatula, Chengwen Feng, Kevin Laatz

On Sat, Oct 08, 2022 at 01:00:24AM +0530, Shijith Thotton wrote:
> Used rte_mbuf_data_iova API to get the physical address of mbuf data.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---
>  app/test/test_dmadev.c | 33 ++++++++++++++-------------------
>  1 file changed, 14 insertions(+), 19 deletions(-)
> 
> diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
> index 9e8e101f40..fe62e98af8 100644
> --- a/app/test/test_dmadev.c
> +++ b/app/test/test_dmadev.c
> @@ -110,8 +110,8 @@ do_multi_copies(int16_t dev_id, uint16_t vchan,
>  		for (j = 0; j < COPY_LEN/sizeof(uint64_t); j++)
>  			src_data[j] = rte_rand();
>  
> -		if (rte_dma_copy(dev_id, vchan, srcs[i]->buf_iova + srcs[i]->data_off,
> -				dsts[i]->buf_iova + dsts[i]->data_off, COPY_LEN, 0) != id_count++)
> +		if (rte_dma_copy(dev_id, vchan, rte_mbuf_data_iova(srcs[i]),
> +				 rte_mbuf_data_iova(dsts[i]), COPY_LEN, 0) != id_count++)

This is not related to your patch, but for the record: I realize that there are
2 APIs for the same thing: rte_pktmbf_iova() and rte_mbuf_data_iova().

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA
  2022-09-21 13:56                 ` [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
  2022-09-28 12:53                   ` Olivier Matz
@ 2022-10-07 20:17                   ` Olivier Matz
  2022-10-07 20:22                     ` [EXT] " Shijith Thotton
  1 sibling, 1 reply; 88+ messages in thread
From: Olivier Matz @ 2022-10-07 20:17 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, pbhagavatula, Honnappa.Nagarahalli, bruce.richardson,
	jerinj, mb, stephen, thomas, david.marchand, Ruifeng Wang,
	Jan Viktorin, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Ankur Dwivedi, Anoob Joseph, Tejasree Kondoj,
	Radha Mohan Chintakuntla, Veerasenareddy Burru,
	Ashwin Sekhar T K, Jakub Palider, Tomasz Duszynski

On Wed, Sep 21, 2022 at 07:26:20PM +0530, Shijith Thotton wrote:
> Enabled the flag pmd_iova_as_va in cnxk driver build files as they work
> with IOVA as VA. Updated cn9k and cn10k soc build configurations to
> enable the IOVA as VA build by default.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> ---
>  config/arm/meson.build                   |  8 +++-
>  drivers/common/cnxk/meson.build          |  1 +
>  drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
>  drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
>  drivers/crypto/cnxk/meson.build          |  2 +
>  drivers/dma/cnxk/meson.build             |  1 +
>  drivers/event/cnxk/meson.build           |  1 +
>  drivers/mempool/cnxk/meson.build         |  1 +
>  drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
>  drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
>  drivers/net/cnxk/cnxk_ethdev.h           |  1 -
>  drivers/net/cnxk/meson.build             |  1 +
>  drivers/raw/cnxk_bphy/meson.build        |  1 +
>  drivers/raw/cnxk_gpio/meson.build        |  1 +
>  14 files changed, 50 insertions(+), 84 deletions(-)
> 
> diff --git a/config/arm/meson.build b/config/arm/meson.build
> index 9f1636e0d5..4e95e8b388 100644
> --- a/config/arm/meson.build
> +++ b/config/arm/meson.build
> @@ -294,7 +294,8 @@ soc_cn10k = {
>      'flags': [
>          ['RTE_MAX_LCORE', 24],
>          ['RTE_MAX_NUMA_NODES', 1],
> -        ['RTE_MEMPOOL_ALIGN', 128]
> +        ['RTE_MEMPOOL_ALIGN', 128],
> +        ['RTE_IOVA_AS_VA', 1]
>      ],
>      'part_number': '0xd49',
>      'extra_march_features': ['crypto'],
> @@ -370,7 +371,10 @@ soc_cn9k = {
>      'description': 'Marvell OCTEON 9',
>      'implementer': '0x43',
>      'part_number': '0xb2',
> -    'numa': false
> +    'numa': false,
> +    'flags': [
> +        ['RTE_IOVA_AS_VA', 1]
> +    ]
>  }
>  
>  soc_stingray = {
> diff --git a/drivers/common/cnxk/meson.build b/drivers/common/cnxk/meson.build
> index 6f808271d1..d019cfa8d1 100644
> --- a/drivers/common/cnxk/meson.build
> +++ b/drivers/common/cnxk/meson.build
> @@ -86,3 +86,4 @@ sources += files('cnxk_telemetry_bphy.c',
>  )
>  
>  deps += ['bus_pci', 'net', 'telemetry']
> +pmd_iova_as_va = true
> diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
> index 66cfe6ca98..16db14344d 100644
> --- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
> +++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
> @@ -85,7 +85,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op *cop,
>  
>  	/* Prepare CPT instruction */
>  	inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src);
> -	dptr = rte_pktmbuf_iova(m_src);
> +	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
>  	inst->dptr = dptr;
>  	inst->rptr = dptr;
>  
> @@ -102,7 +102,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn10k_ipsec_sa *sa,
>  
>  	/* Prepare CPT instruction */
>  	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
> -	dptr = rte_pktmbuf_iova(m_src);
> +	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
>  	inst->dptr = dptr;
>  	inst->rptr = dptr;
>  
> diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
> index e469596756..8b68e4c728 100644
> --- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
> +++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
> @@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
>  
>  	/* Prepare CPT instruction */
>  	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
> -	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
> +	inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t);
>  	inst->w7.u64 = sa->inst.w7;
>  }
>  #endif /* __CN9K_IPSEC_LA_OPS_H__ */
> diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build
> index 23a1cc3aac..764e7bb99a 100644
> --- a/drivers/crypto/cnxk/meson.build
> +++ b/drivers/crypto/cnxk/meson.build
> @@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug')
>  else
>      cflags += [ '-ULA_IPSEC_DEBUG' ]
>  endif
> +
> +pmd_iova_as_va = true
> diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
> index d4be4ee860..ef0e3db109 100644
> --- a/drivers/dma/cnxk/meson.build
> +++ b/drivers/dma/cnxk/meson.build
> @@ -3,3 +3,4 @@
>  
>  deps += ['bus_pci', 'common_cnxk', 'dmadev']
>  sources = files('cnxk_dmadev.c')
> +pmd_iova_as_va = true
> diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
> index b27bae7b12..650d0d4256 100644
> --- a/drivers/event/cnxk/meson.build
> +++ b/drivers/event/cnxk/meson.build
> @@ -479,3 +479,4 @@ foreach flag: extra_flags
>  endforeach
>  
>  deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
> +pmd_iova_as_va = true
> diff --git a/drivers/mempool/cnxk/meson.build b/drivers/mempool/cnxk/meson.build
> index d5d1978569..a328176457 100644
> --- a/drivers/mempool/cnxk/meson.build
> +++ b/drivers/mempool/cnxk/meson.build
> @@ -17,3 +17,4 @@ sources = files(
>  )
>  
>  deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool']
> +pmd_iova_as_va = true
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index ea13866b20..2ef62da132 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -1775,14 +1775,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
>  		mbuf2 = (uint64_t *)tx_pkts[2];
>  		mbuf3 = (uint64_t *)tx_pkts[3];
>  
> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
> -				     offsetof(struct rte_mbuf, buf_iova));
>  		/*
>  		 * Get mbuf's, olflags, iova, pktlen, dataoff
>  		 * dataoff_iovaX.D[0] = iova,
> @@ -1790,28 +1782,24 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
>  		 * len_olflagsX.D[0] = ol_flags,
>  		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
>  		 */
> -		dataoff_iova0 = vld1q_u64(mbuf0);
> -		len_olflags0 = vld1q_u64(mbuf0 + 2);
> -		dataoff_iova1 = vld1q_u64(mbuf1);
> -		len_olflags1 = vld1q_u64(mbuf1 + 2);
> -		dataoff_iova2 = vld1q_u64(mbuf2);
> -		len_olflags2 = vld1q_u64(mbuf2 + 2);
> -		dataoff_iova3 = vld1q_u64(mbuf3);
> -		len_olflags3 = vld1q_u64(mbuf3 + 2);
> +		dataoff_iova0 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
> +		len_olflags0 = vld1q_u64(mbuf0 + 3);
> +		dataoff_iova1 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf1), 1);
> +		len_olflags1 = vld1q_u64(mbuf1 + 3);
> +		dataoff_iova2 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf2), 1);
> +		len_olflags2 = vld1q_u64(mbuf2 + 3);
> +		dataoff_iova3 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf3), 1);
> +		len_olflags3 = vld1q_u64(mbuf3 + 3);
>  
>  		/* Move mbufs to point pool */
> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> +		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
> +		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
> +		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
> +		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
>  
>  		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
>  			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
> @@ -1861,17 +1849,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
>  		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
>  		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
>  
> -		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
> -		const uint64x2_t and_mask0 = {
> -			0xFFFFFFFFFFFFFFFF,
> -			0x000000000000FFFF,
> -		};
> -
> -		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
> -		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
> -		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
> -		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
> -
>  		/*
>  		 * Pick only 16 bits of pktlen preset at bits 63:32
>  		 * and place them at bits 15:0.
> diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
> index 6ce81f5c96..f5d99ccb5a 100644
> --- a/drivers/net/cnxk/cn9k_tx.h
> +++ b/drivers/net/cnxk/cn9k_tx.h
> @@ -1005,14 +1005,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		mbuf2 = (uint64_t *)tx_pkts[2];
>  		mbuf3 = (uint64_t *)tx_pkts[3];
>  
> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
> -				     offsetof(struct rte_mbuf, buf_iova));
>  		/*
>  		 * Get mbuf's, olflags, iova, pktlen, dataoff
>  		 * dataoff_iovaX.D[0] = iova,
> @@ -1020,28 +1012,24 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		 * len_olflagsX.D[0] = ol_flags,
>  		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
>  		 */
> -		dataoff_iova0 = vld1q_u64(mbuf0);
> -		len_olflags0 = vld1q_u64(mbuf0 + 2);
> -		dataoff_iova1 = vld1q_u64(mbuf1);
> -		len_olflags1 = vld1q_u64(mbuf1 + 2);
> -		dataoff_iova2 = vld1q_u64(mbuf2);
> -		len_olflags2 = vld1q_u64(mbuf2 + 2);
> -		dataoff_iova3 = vld1q_u64(mbuf3);
> -		len_olflags3 = vld1q_u64(mbuf3 + 2);
> +		dataoff_iova0 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
> +		len_olflags0 = vld1q_u64(mbuf0 + 3);
> +		dataoff_iova1 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off, vld1q_u64(mbuf1), 1);
> +		len_olflags1 = vld1q_u64(mbuf1 + 3);
> +		dataoff_iova2 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off, vld1q_u64(mbuf2), 1);
> +		len_olflags2 = vld1q_u64(mbuf2 + 3);
> +		dataoff_iova3 =
> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off, vld1q_u64(mbuf3), 1);
> +		len_olflags3 = vld1q_u64(mbuf3 + 3);
>  
>  		/* Move mbufs to point pool */
> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
> -				     offsetof(struct rte_mbuf, pool) -
> -				     offsetof(struct rte_mbuf, buf_iova));
> +		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
> +		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
> +		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
> +		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
>  
>  		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
>  			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
> @@ -1091,17 +1079,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
>  		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
>  
> -		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
> -		const uint64x2_t and_mask0 = {
> -			0xFFFFFFFFFFFFFFFF,
> -			0x000000000000FFFF,
> -		};
> -
> -		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
> -		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
> -		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
> -		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
> -
>  		/*
>  		 * Pick only 16 bits of pktlen preset at bits 63:32
>  		 * and place them at bits 15:0.
> diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
> index 4cb7c9e90c..abf1e4215f 100644
> --- a/drivers/net/cnxk/cnxk_ethdev.h
> +++ b/drivers/net/cnxk/cnxk_ethdev.h
> @@ -690,7 +690,6 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m)
>  
>  	m->priv_size = priv_size;
>  	m->buf_addr = (char *)m + mbuf_size;
> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>  	m->buf_len = (uint16_t)buf_len;
>  	rte_pktmbuf_reset_headroom(m);
>  	m->data_len = 0;

I missed it during previous review, but shouldn't the accessor be used
instead?  I mean, if the build is done with PA enabled, and another
driver accesses to m->buf_iova, it has to be correct.


> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
> index f347e98fce..01489b3a36 100644
> --- a/drivers/net/cnxk/meson.build
> +++ b/drivers/net/cnxk/meson.build
> @@ -194,3 +194,4 @@ foreach flag: extra_flags
>  endforeach
>  
>  headers = files('rte_pmd_cnxk.h')
> +pmd_iova_as_va = true
> diff --git a/drivers/raw/cnxk_bphy/meson.build b/drivers/raw/cnxk_bphy/meson.build
> index 14147feaf4..781ed63e05 100644
> --- a/drivers/raw/cnxk_bphy/meson.build
> +++ b/drivers/raw/cnxk_bphy/meson.build
> @@ -10,3 +10,4 @@ sources = files(
>          'cnxk_bphy_irq.c',
>  )
>  headers = files('rte_pmd_bphy.h')
> +pmd_iova_as_va = true
> diff --git a/drivers/raw/cnxk_gpio/meson.build b/drivers/raw/cnxk_gpio/meson.build
> index a75a5b9084..f9aed173b6 100644
> --- a/drivers/raw/cnxk_gpio/meson.build
> +++ b/drivers/raw/cnxk_gpio/meson.build
> @@ -9,3 +9,4 @@ sources = files(
>          'cnxk_gpio_selftest.c',
>  )
>  headers = files('rte_pmd_cnxk_gpio.h')
> +pmd_iova_as_va = true
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v4 0/7] mbuf dynamic field expansion
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
                                     ` (6 preceding siblings ...)
  2022-10-07 19:30                   ` [PATCH v4 7/7] drivers: mark software " Shijith Thotton
@ 2022-10-07 20:19                   ` Olivier Matz
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
  8 siblings, 0 replies; 88+ messages in thread
From: Olivier Matz @ 2022-10-07 20:19 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, Honnappa.Nagarahalli, bruce.richardson, jerinj, mb, stephen,
	thomas, ferruh.yigit, pbhagavatula, david.marchand

Hi Shijith,

On Sat, Oct 08, 2022 at 01:00:22AM +0530, Shijith Thotton wrote:
> This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
> Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
> deemed unsafe as some components could still use the PA field without checking IOVA mode and there
> are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
> that drivers which need PA can be disabled during build. This series adds this new meson build
> options. Fourth patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
> Marvell cnxk PMDs and software PMDs in IOVA as PA disabled build as they work without PA field.
> 
> 1. https://inbox.dpdk.org/dev/57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605763.git.sthotton@marvell.com/.
> 
> v4:
>  * Restructured changes to multiple patches.
>  * Moved to #if scheme instead of union.
>  * Updated release notes.
> 
> v3:
>  * Cleared use of buf_iova from cnxk PMD.
> 
> v2:
>  * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
>  * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.
> 
> Shijith Thotton (7):
>   mbuf: add API to get and set mbuf physical address
>   test/dma: use API to get mbuf data physical address
>   build: add meson option to configure IOVA mode as PA
>   mbuf: add second dynamic field member
>   lib: move mbuf next pointer to first cache line
>   drivers: mark cnxk PMDs work with IOVA as PA disabled
>   drivers: mark software PMDs work with IOVA as PA disabled
> 
>  app/test-bbdev/test_bbdev_perf.c         |  2 +-
>  app/test-crypto-perf/cperf_test_common.c |  5 +--
>  app/test/test_bpf.c                      |  2 +-
>  app/test/test_dmadev.c                   | 33 ++++++--------
>  app/test/test_mbuf.c                     | 12 +++---
>  app/test/test_pcapng.c                   |  2 +-
>  config/arm/meson.build                   |  8 +++-
>  config/meson.build                       |  1 +
>  doc/guides/platform/cnxk.rst             |  3 +-
>  doc/guides/rel_notes/release_22_11.rst   |  3 ++
>  drivers/common/cnxk/meson.build          |  1 +
>  drivers/crypto/armv8/meson.build         |  1 +
>  drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
>  drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
>  drivers/crypto/cnxk/meson.build          |  2 +
>  drivers/crypto/ipsec_mb/meson.build      |  1 +
>  drivers/crypto/null/meson.build          |  1 +
>  drivers/crypto/openssl/meson.build       |  1 +
>  drivers/dma/cnxk/meson.build             |  1 +
>  drivers/dma/skeleton/meson.build         |  1 +
>  drivers/event/cnxk/meson.build           |  1 +
>  drivers/event/dsw/meson.build            |  1 +
>  drivers/event/opdl/meson.build           |  1 +
>  drivers/event/skeleton/meson.build       |  1 +
>  drivers/event/sw/meson.build             |  1 +
>  drivers/mempool/bucket/meson.build       |  1 +
>  drivers/mempool/cnxk/meson.build         |  1 +
>  drivers/mempool/ring/meson.build         |  1 +
>  drivers/mempool/stack/meson.build        |  1 +
>  drivers/meson.build                      |  6 +++
>  drivers/net/af_packet/meson.build        |  1 +
>  drivers/net/af_xdp/meson.build           |  2 +
>  drivers/net/bonding/meson.build          |  1 +
>  drivers/net/cnxk/cn10k_ethdev.c          |  4 +-
>  drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
>  drivers/net/cnxk/cn9k_ethdev.c           |  4 +-
>  drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
>  drivers/net/cnxk/cnxk_ethdev.h           |  1 -
>  drivers/net/cnxk/meson.build             |  1 +
>  drivers/net/failsafe/meson.build         |  1 +
>  drivers/net/hns3/meson.build             |  6 +++
>  drivers/net/memif/meson.build            |  1 +
>  drivers/net/null/meson.build             |  1 +
>  drivers/net/pcap/meson.build             |  1 +
>  drivers/net/ring/meson.build             |  1 +
>  drivers/net/tap/meson.build              |  1 +
>  drivers/raw/cnxk_bphy/meson.build        |  1 +
>  drivers/raw/cnxk_gpio/meson.build        |  1 +
>  drivers/raw/skeleton/meson.build         |  1 +
>  lib/eal/linux/eal.c                      |  6 +++
>  lib/kni/rte_kni.c                        |  3 +-
>  lib/mbuf/rte_mbuf.c                      | 12 +++---
>  lib/mbuf/rte_mbuf.h                      | 37 +++++++++++++---
>  lib/mbuf/rte_mbuf_core.h                 | 26 +++++++++--
>  lib/mbuf/rte_mbuf_dyn.c                  |  3 ++
>  lib/meson.build                          |  3 ++
>  lib/pipeline/rte_table_action.c          |  2 +-
>  lib/vhost/vhost.h                        |  2 +-
>  lib/vhost/vhost_crypto.c                 | 25 ++++++-----
>  meson_options.txt                        |  2 +
>  60 files changed, 210 insertions(+), 151 deletions(-)
> 
> -- 
> 2.25.1
> 

for the series:
Acked-by: Olivier Matz <olivier.matz@6wind.com>

There are few minor comments but I think it can go in rc1 anyway.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v4 1/7] mbuf: add API to get and set mbuf physical address
  2022-10-07 20:16                     ` Olivier Matz
@ 2022-10-07 20:20                       ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 20:20 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Honnappa.Nagarahalli, bruce.richardson,
	Jerin Jacob Kollanukkaran, mb, stephen, thomas, ferruh.yigit,
	Pavan Nikhilesh Bhagavatula, Nicolas Chautru, Ciara Power,
	Konstantin Ananyev, Reshma Pattan, Cristian Dumitrescu,
	Maxime Coquelin, Chenbo Xia

Hi Olivier,

>On Sat, Oct 08, 2022 at 01:00:23AM +0530, Shijith Thotton wrote:
>> Added APIs rte_mbuf_iova_set and rte_mbuf_iova_get to set and get the
>> physical address of an mbuf respectively. Updated applications and
>> library to use the same.
>>
>> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> ---
>>  app/test-bbdev/test_bbdev_perf.c         |  2 +-
>>  app/test-crypto-perf/cperf_test_common.c |  5 ++---
>>  app/test/test_bpf.c                      |  2 +-
>>  app/test/test_mbuf.c                     |  2 +-
>>  app/test/test_pcapng.c                   |  2 +-
>>  lib/kni/rte_kni.c                        |  3 +--
>>  lib/mbuf/rte_mbuf.c                      | 12 +++++-----
>>  lib/mbuf/rte_mbuf.h                      | 28 +++++++++++++++++++-----
>>  lib/mbuf/rte_mbuf_core.h                 |  3 +--
>>  lib/pipeline/rte_table_action.c          |  2 +-
>>  lib/vhost/vhost.h                        |  2 +-
>>  lib/vhost/vhost_crypto.c                 | 25 ++++++++++-----------
>>  12 files changed, 51 insertions(+), 37 deletions(-)
>>
>> diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-
>bbdev/test_bbdev_perf.c
>> index 311e5d1a96..e7fbf71f6d 100644
>> --- a/app/test-bbdev/test_bbdev_perf.c
>> +++ b/app/test-bbdev/test_bbdev_perf.c
>> @@ -1002,7 +1002,7 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
>>  					seg->length);
>>  				memcpy(data, seg->addr, seg->length);
>>  				m_head->buf_addr = data;
>> -				m_head->buf_iova = rte_malloc_virt2iova(data);
>> +				rte_mbuf_iova_set(m_head,
>rte_malloc_virt2iova(data));
>>  				m_head->data_off = 0;
>>  				m_head->data_len = seg->length;
>>  			} else {
>> diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-
>perf/cperf_test_common.c
>> index 00aadc9a47..27646cd619 100644
>> --- a/app/test-crypto-perf/cperf_test_common.c
>> +++ b/app/test-crypto-perf/cperf_test_common.c
>> @@ -26,8 +26,7 @@ fill_single_seg_mbuf(struct rte_mbuf *m, struct
>rte_mempool *mp,
>>  	/* start of buffer is after mbuf structure and priv data */
>>  	m->priv_size = 0;
>>  	m->buf_addr = (char *)m + mbuf_hdr_size;
>> -	m->buf_iova = rte_mempool_virt2iova(obj) +
>> -		mbuf_offset + mbuf_hdr_size;
>> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(obj) + mbuf_offset +
>mbuf_hdr_size);
>>  	m->buf_len = segment_sz;
>>  	m->data_len = data_len;
>>  	m->pkt_len = data_len;
>> @@ -58,7 +57,7 @@ fill_multi_seg_mbuf(struct rte_mbuf *m, struct
>rte_mempool *mp,
>>  		/* start of buffer is after mbuf structure and priv data */
>>  		m->priv_size = 0;
>>  		m->buf_addr = (char *)m + mbuf_hdr_size;
>> -		m->buf_iova = next_seg_phys_addr;
>> +		rte_mbuf_iova_set(m, next_seg_phys_addr);
>>  		next_seg_phys_addr += mbuf_hdr_size + segment_sz;
>>  		m->buf_len = segment_sz;
>>  		m->data_len = data_len;
>> diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
>> index 97f500809e..f5af5e8a3f 100644
>> --- a/app/test/test_bpf.c
>> +++ b/app/test/test_bpf.c
>> @@ -2600,7 +2600,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t
>buf[], uint32_t buf_len,
>>  	uint8_t *db;
>>
>>  	mb->buf_addr = buf;
>> -	mb->buf_iova = (uintptr_t)buf;
>> +	rte_mbuf_iova_set(mb, (uintptr_t)buf);
>>  	mb->buf_len = buf_len;
>>  	rte_mbuf_refcnt_set(mb, 1);
>>
>> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
>> index e09b2549ca..22e45e66c1 100644
>> --- a/app/test/test_mbuf.c
>> +++ b/app/test/test_mbuf.c
>> @@ -1233,7 +1233,7 @@ test_failing_mbuf_sanity_check(struct rte_mempool
>*pktmbuf_pool)
>>  	}
>>
>>  	badbuf = *buf;
>> -	badbuf.buf_iova = 0;
>> +	rte_mbuf_iova_set(&badbuf, 0);
>>  	if (verify_mbuf_check_panics(&badbuf)) {
>>  		printf("Error with bad-physaddr mbuf test\n");
>>  		return -1;
>> diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
>> index 320dacea34..abbf00f6da 100644
>> --- a/app/test/test_pcapng.c
>> +++ b/app/test/test_pcapng.c
>> @@ -40,7 +40,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[],
>uint32_t buf_len,
>>  	uint8_t *db;
>>
>>  	mb->buf_addr = buf;
>> -	mb->buf_iova = (uintptr_t)buf;
>> +	rte_mbuf_iova_set(mb, (uintptr_t)buf);
>>  	mb->buf_len = buf_len;
>>  	rte_mbuf_refcnt_set(mb, 1);
>>
>> diff --git a/lib/kni/rte_kni.c b/lib/kni/rte_kni.c
>> index 7971c56bb4..3737a91de7 100644
>> --- a/lib/kni/rte_kni.c
>> +++ b/lib/kni/rte_kni.c
>> @@ -357,8 +357,7 @@ static void *
>>  va2pa(struct rte_mbuf *m)
>>  {
>>  	return (void *)((unsigned long)m -
>> -			((unsigned long)m->buf_addr -
>> -			 (unsigned long)m->buf_iova));
>> +			((unsigned long)m->buf_addr - (unsigned
>long)rte_mbuf_iova_get(m)));
>>  }
>>
>>  static void *
>> diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
>> index a2307cebe6..16f6ed6731 100644
>> --- a/lib/mbuf/rte_mbuf.c
>> +++ b/lib/mbuf/rte_mbuf.c
>> @@ -89,7 +89,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
>>  	/* start of buffer is after mbuf structure and priv data */
>>  	m->priv_size = priv_size;
>>  	m->buf_addr = (char *)m + mbuf_size;
>> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>>  	m->buf_len = (uint16_t)buf_len;
>>
>>  	/* keep some headroom between start of buffer and data */
>> @@ -187,8 +187,8 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
>>  	RTE_ASSERT(ctx->off + ext_mem->elt_size <= ext_mem->buf_len);
>>
>>  	m->buf_addr = RTE_PTR_ADD(ext_mem->buf_ptr, ctx->off);
>> -	m->buf_iova = ext_mem->buf_iova == RTE_BAD_IOVA ?
>> -		      RTE_BAD_IOVA : (ext_mem->buf_iova + ctx->off);
>> +	rte_mbuf_iova_set(m, ext_mem->buf_iova == RTE_BAD_IOVA ?
>RTE_BAD_IOVA :
>> +								 (ext_mem-
>>buf_iova + ctx->off));
>>
>>  	ctx->off += ext_mem->elt_size;
>>  	if (ctx->off + ext_mem->elt_size > ext_mem->buf_len) {
>> @@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int
>is_header,
>>  		*reason = "bad mbuf pool";
>>  		return -1;
>>  	}
>> -	if (m->buf_iova == 0) {
>> +	if (rte_mbuf_iova_get(m) == 0) {
>>  		*reason = "bad IO addr";
>>  		return -1;
>>  	}
>> @@ -669,8 +669,8 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m,
>unsigned dump_len)
>>
>>  	__rte_mbuf_sanity_check(m, 1);
>>
>> -	fprintf(f, "dump mbuf at %p, iova=%#"PRIx64", buf_len=%u\n",
>> -		m, m->buf_iova, m->buf_len);
>> +	fprintf(f, "dump mbuf at %p, iova=%#" PRIx64 ", buf_len=%u\n", m,
>rte_mbuf_iova_get(m),
>> +		m->buf_len);
>>  	fprintf(f, "  pkt_len=%u, ol_flags=%#"PRIx64", nb_segs=%u, port=%u",
>>  		m->pkt_len, m->ol_flags, m->nb_segs, m->port);
>>
>> diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
>> index 9811e8c760..c552dfbcac 100644
>> --- a/lib/mbuf/rte_mbuf.h
>> +++ b/lib/mbuf/rte_mbuf.h
>> @@ -135,6 +135,24 @@ rte_mbuf_prefetch_part2(struct rte_mbuf *m)
>>
>>  static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
>>
>> +/**
>> + * Get the mbuf physical address.
>> + */
>
>Not blocking for the rc1, but the comment should be clarified.
>
>"Get the IOVA address of the mbuf data buffer." looks more accurate
>to me.
>
>Also, even if it does not bring a big added-value, it's better to
>have documentation for @param and @return.
>
 
I will update.

>> +static inline rte_iova_t
>> +rte_mbuf_iova_get(const struct rte_mbuf *m)
>> +{
>> +	return m->buf_iova;
>> +}
>> +
>> +/**
>> + * Set the mbuf physical address.
>> + */
>
>Same
>
 
Ack.

>> +static inline void
>> +rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
>> +{
>> +	m->buf_iova = iova;
>> +}
>> +
>>  /**
>>   * Return the IO address of the beginning of the mbuf data
>>   *
>> @@ -146,7 +164,7 @@ static inline uint16_t rte_pktmbuf_priv_size(struct
>rte_mempool *mp);
>>  static inline rte_iova_t
>>  rte_mbuf_data_iova(const struct rte_mbuf *mb)
>>  {
>> -	return mb->buf_iova + mb->data_off;
>> +	return rte_mbuf_iova_get(mb) + mb->data_off;
>>  }
>>
>>  /**
>> @@ -164,7 +182,7 @@ rte_mbuf_data_iova(const struct rte_mbuf *mb)
>>  static inline rte_iova_t
>>  rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
>>  {
>> -	return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
>> +	return rte_mbuf_iova_get(mb) + RTE_PKTMBUF_HEADROOM;
>>  }
>>
>>  /**
>> @@ -1056,7 +1074,7 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void
>*buf_addr,
>>  	RTE_ASSERT(shinfo->free_cb != NULL);
>>
>>  	m->buf_addr = buf_addr;
>> -	m->buf_iova = buf_iova;
>> +	rte_mbuf_iova_set(m, buf_iova);
>>  	m->buf_len = buf_len;
>>
>>  	m->data_len = 0;
>> @@ -1143,7 +1161,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf
>*mi, struct rte_mbuf *m)
>>
>>  	mi->data_off = m->data_off;
>>  	mi->data_len = m->data_len;
>> -	mi->buf_iova = m->buf_iova;
>> +	rte_mbuf_iova_set(mi, rte_mbuf_iova_get(m));
>>  	mi->buf_addr = m->buf_addr;
>>  	mi->buf_len = m->buf_len;
>>
>> @@ -1245,7 +1263,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf
>*m)
>>
>>  	m->priv_size = priv_size;
>>  	m->buf_addr = (char *)m + mbuf_size;
>> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>> +	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>>  	m->buf_len = (uint16_t)buf_len;
>>  	rte_pktmbuf_reset_headroom(m);
>>  	m->data_len = 0;
>> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
>> index e672d59b36..51a12a1fb9 100644
>> --- a/lib/mbuf/rte_mbuf_core.h
>> +++ b/lib/mbuf/rte_mbuf_core.h
>> @@ -736,8 +736,7 @@ struct rte_mbuf_ext_shared_info {
>>   * @param o
>>   *   The offset into the data to calculate address from.
>>   */
>> -#define rte_pktmbuf_iova_offset(m, o) \
>> -	(rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
>> +#define rte_pktmbuf_iova_offset(m, o) (rte_iova_t)(rte_mbuf_iova_get(m) +
>(m)->data_off + (o))
>>
>>  /**
>>   * A macro that returns the IO address that points to the start of the
>> diff --git a/lib/pipeline/rte_table_action.c b/lib/pipeline/rte_table_action.c
>> index b1310be565..06a8cdcf05 100644
>> --- a/lib/pipeline/rte_table_action.c
>> +++ b/lib/pipeline/rte_table_action.c
>> @@ -1929,7 +1929,7 @@ pkt_work_sym_crypto(struct rte_mbuf *mbuf, struct
>sym_crypto_data *data,
>>
>>  	op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
>>  	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
>> -	op->phys_addr = mbuf->buf_iova + cfg->op_offset - sizeof(*mbuf);
>> +	op->phys_addr = rte_mbuf_iova_get(mbuf) + cfg->op_offset -
>sizeof(*mbuf);
>>  	op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
>>  	sym->m_src = mbuf;
>>  	sym->m_dst = NULL;
>> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
>> index eeeda681cc..ef211ed519 100644
>> --- a/lib/vhost/vhost.h
>> +++ b/lib/vhost/vhost.h
>> @@ -971,7 +971,7 @@ restore_mbuf(struct rte_mbuf *m)
>>  		/* start of buffer is after mbuf structure and priv data */
>>
>>  		m->buf_addr = (char *)m + mbuf_size;
>> -		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>> +		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
>>  		m = m->next;
>>  	}
>>  }
>> diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
>> index 54946f46d9..3077a21ae8 100644
>> --- a/lib/vhost/vhost_crypto.c
>> +++ b/lib/vhost/vhost_crypto.c
>> @@ -823,11 +823,10 @@ prepare_sym_cipher_op(struct vhost_crypto
>*vcrypto, struct rte_crypto_op *op,
>>  	switch (vcrypto->option) {
>>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>>  		m_src->data_len = cipher->para.src_data_len;
>> -		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
>> -				cipher->para.src_data_len);
>> +		rte_mbuf_iova_set(m_src,
>> +				  gpa_to_hpa(vcrypto->dev, desc->addr, cipher-
>>para.src_data_len));
>>  		m_src->buf_addr = get_data_ptr(vc_req, desc,
>VHOST_ACCESS_RO);
>> -		if (unlikely(m_src->buf_iova == 0 ||
>> -				m_src->buf_addr == NULL)) {
>> +		if (unlikely(rte_mbuf_iova_get(m_src) == 0 || m_src->buf_addr
>== NULL)) {
>>  			VC_LOG_ERR("zero_copy may fail due to cross page
>data");
>>  			ret = VIRTIO_CRYPTO_ERR;
>>  			goto error_exit;
>> @@ -867,10 +866,10 @@ prepare_sym_cipher_op(struct vhost_crypto
>*vcrypto, struct rte_crypto_op *op,
>>
>>  	switch (vcrypto->option) {
>>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>> -		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
>> -				desc->addr, cipher->para.dst_data_len);
>> +		rte_mbuf_iova_set(m_dst,
>> +				  gpa_to_hpa(vcrypto->dev, desc->addr, cipher-
>>para.dst_data_len));
>>  		m_dst->buf_addr = get_data_ptr(vc_req, desc,
>VHOST_ACCESS_RW);
>> -		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
>> +		if (unlikely(rte_mbuf_iova_get(m_dst) == 0 || m_dst->buf_addr
>== NULL)) {
>>  			VC_LOG_ERR("zero_copy may fail due to cross page
>data");
>>  			ret = VIRTIO_CRYPTO_ERR;
>>  			goto error_exit;
>> @@ -981,10 +980,10 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto,
>struct rte_crypto_op *op,
>>  		m_src->data_len = chain->para.src_data_len;
>>  		m_dst->data_len = chain->para.dst_data_len;
>>
>> -		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
>> -				chain->para.src_data_len);
>> +		rte_mbuf_iova_set(m_src,
>> +				  gpa_to_hpa(vcrypto->dev, desc->addr, chain-
>>para.src_data_len));
>>  		m_src->buf_addr = get_data_ptr(vc_req, desc,
>VHOST_ACCESS_RO);
>> -		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
>> +		if (unlikely(rte_mbuf_iova_get(m_src) == 0 || m_src->buf_addr
>== NULL)) {
>>  			VC_LOG_ERR("zero_copy may fail due to cross page
>data");
>>  			ret = VIRTIO_CRYPTO_ERR;
>>  			goto error_exit;
>> @@ -1024,10 +1023,10 @@ prepare_sym_chain_op(struct vhost_crypto
>*vcrypto, struct rte_crypto_op *op,
>>
>>  	switch (vcrypto->option) {
>>  	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
>> -		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
>> -				desc->addr, chain->para.dst_data_len);
>> +		rte_mbuf_iova_set(m_dst,
>> +				  gpa_to_hpa(vcrypto->dev, desc->addr, chain-
>>para.dst_data_len));
>>  		m_dst->buf_addr = get_data_ptr(vc_req, desc,
>VHOST_ACCESS_RW);
>> -		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
>> +		if (unlikely(rte_mbuf_iova_get(m_dst) == 0 || m_dst->buf_addr
>== NULL)) {
>>  			VC_LOG_ERR("zero_copy may fail due to cross page
>data");
>>  			ret = VIRTIO_CRYPTO_ERR;
>>  			goto error_exit;
>> --
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA
  2022-10-07 20:17                   ` Olivier Matz
@ 2022-10-07 20:22                     ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 20:22 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Pavan Nikhilesh Bhagavatula, Honnappa.Nagarahalli,
	bruce.richardson, Jerin Jacob Kollanukkaran, mb, stephen, thomas,
	david.marchand, Ruifeng Wang, Jan Viktorin,
	Nithin Kumar Dabilpuram, Kiran Kumar Kokkilagadda,
	Sunil Kumar Kori, Satha Koteswara Rao Kottidi, Ankur Dwivedi,
	Anoob Joseph, Tejasree Kondoj, Radha Chintakuntla,
	Veerasenareddy Burru, Ashwin Sekhar T K, Jakub Palider,
	Tomasz Duszynski

>> Enabled the flag pmd_iova_as_va in cnxk driver build files as they work
>> with IOVA as VA. Updated cn9k and cn10k soc build configurations to
>> enable the IOVA as VA build by default.
>>
>> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> ---
>>  config/arm/meson.build                   |  8 +++-
>>  drivers/common/cnxk/meson.build          |  1 +
>>  drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
>>  drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
>>  drivers/crypto/cnxk/meson.build          |  2 +
>>  drivers/dma/cnxk/meson.build             |  1 +
>>  drivers/event/cnxk/meson.build           |  1 +
>>  drivers/mempool/cnxk/meson.build         |  1 +
>>  drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
>>  drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
>>  drivers/net/cnxk/cnxk_ethdev.h           |  1 -
>>  drivers/net/cnxk/meson.build             |  1 +
>>  drivers/raw/cnxk_bphy/meson.build        |  1 +
>>  drivers/raw/cnxk_gpio/meson.build        |  1 +
>>  14 files changed, 50 insertions(+), 84 deletions(-)
>>
>> diff --git a/config/arm/meson.build b/config/arm/meson.build
>> index 9f1636e0d5..4e95e8b388 100644
>> --- a/config/arm/meson.build
>> +++ b/config/arm/meson.build
>> @@ -294,7 +294,8 @@ soc_cn10k = {
>>      'flags': [
>>          ['RTE_MAX_LCORE', 24],
>>          ['RTE_MAX_NUMA_NODES', 1],
>> -        ['RTE_MEMPOOL_ALIGN', 128]
>> +        ['RTE_MEMPOOL_ALIGN', 128],
>> +        ['RTE_IOVA_AS_VA', 1]
>>      ],
>>      'part_number': '0xd49',
>>      'extra_march_features': ['crypto'],
>> @@ -370,7 +371,10 @@ soc_cn9k = {
>>      'description': 'Marvell OCTEON 9',
>>      'implementer': '0x43',
>>      'part_number': '0xb2',
>> -    'numa': false
>> +    'numa': false,
>> +    'flags': [
>> +        ['RTE_IOVA_AS_VA', 1]
>> +    ]
>>  }
>>
>>  soc_stingray = {
>> diff --git a/drivers/common/cnxk/meson.build
>b/drivers/common/cnxk/meson.build
>> index 6f808271d1..d019cfa8d1 100644
>> --- a/drivers/common/cnxk/meson.build
>> +++ b/drivers/common/cnxk/meson.build
>> @@ -86,3 +86,4 @@ sources += files('cnxk_telemetry_bphy.c',
>>  )
>>
>>  deps += ['bus_pci', 'net', 'telemetry']
>> +pmd_iova_as_va = true
>> diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
>b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
>> index 66cfe6ca98..16db14344d 100644
>> --- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
>> +++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
>> @@ -85,7 +85,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op
>*cop,
>>
>>  	/* Prepare CPT instruction */
>>  	inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src);
>> -	dptr = rte_pktmbuf_iova(m_src);
>> +	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
>>  	inst->dptr = dptr;
>>  	inst->rptr = dptr;
>>
>> @@ -102,7 +102,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct
>cn10k_ipsec_sa *sa,
>>
>>  	/* Prepare CPT instruction */
>>  	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
>> -	dptr = rte_pktmbuf_iova(m_src);
>> +	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
>>  	inst->dptr = dptr;
>>  	inst->rptr = dptr;
>>
>> diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
>b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
>> index e469596756..8b68e4c728 100644
>> --- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
>> +++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
>> @@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct
>cn9k_ipsec_sa *sa,
>>
>>  	/* Prepare CPT instruction */
>>  	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
>> -	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
>> +	inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t);
>>  	inst->w7.u64 = sa->inst.w7;
>>  }
>>  #endif /* __CN9K_IPSEC_LA_OPS_H__ */
>> diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build
>> index 23a1cc3aac..764e7bb99a 100644
>> --- a/drivers/crypto/cnxk/meson.build
>> +++ b/drivers/crypto/cnxk/meson.build
>> @@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug')
>>  else
>>      cflags += [ '-ULA_IPSEC_DEBUG' ]
>>  endif
>> +
>> +pmd_iova_as_va = true
>> diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
>> index d4be4ee860..ef0e3db109 100644
>> --- a/drivers/dma/cnxk/meson.build
>> +++ b/drivers/dma/cnxk/meson.build
>> @@ -3,3 +3,4 @@
>>
>>  deps += ['bus_pci', 'common_cnxk', 'dmadev']
>>  sources = files('cnxk_dmadev.c')
>> +pmd_iova_as_va = true
>> diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
>> index b27bae7b12..650d0d4256 100644
>> --- a/drivers/event/cnxk/meson.build
>> +++ b/drivers/event/cnxk/meson.build
>> @@ -479,3 +479,4 @@ foreach flag: extra_flags
>>  endforeach
>>
>>  deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
>> +pmd_iova_as_va = true
>> diff --git a/drivers/mempool/cnxk/meson.build
>b/drivers/mempool/cnxk/meson.build
>> index d5d1978569..a328176457 100644
>> --- a/drivers/mempool/cnxk/meson.build
>> +++ b/drivers/mempool/cnxk/meson.build
>> @@ -17,3 +17,4 @@ sources = files(
>>  )
>>
>>  deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool']
>> +pmd_iova_as_va = true
>> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
>> index ea13866b20..2ef62da132 100644
>> --- a/drivers/net/cnxk/cn10k_tx.h
>> +++ b/drivers/net/cnxk/cn10k_tx.h
>> @@ -1775,14 +1775,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue,
>uint64_t *ws,
>>  		mbuf2 = (uint64_t *)tx_pkts[2];
>>  		mbuf3 = (uint64_t *)tx_pkts[3];
>>
>> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>>  		/*
>>  		 * Get mbuf's, olflags, iova, pktlen, dataoff
>>  		 * dataoff_iovaX.D[0] = iova,
>> @@ -1790,28 +1782,24 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue,
>uint64_t *ws,
>>  		 * len_olflagsX.D[0] = ol_flags,
>>  		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
>>  		 */
>> -		dataoff_iova0 = vld1q_u64(mbuf0);
>> -		len_olflags0 = vld1q_u64(mbuf0 + 2);
>> -		dataoff_iova1 = vld1q_u64(mbuf1);
>> -		len_olflags1 = vld1q_u64(mbuf1 + 2);
>> -		dataoff_iova2 = vld1q_u64(mbuf2);
>> -		len_olflags2 = vld1q_u64(mbuf2 + 2);
>> -		dataoff_iova3 = vld1q_u64(mbuf3);
>> -		len_olflags3 = vld1q_u64(mbuf3 + 2);
>> +		dataoff_iova0 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf0), 1);
>> +		len_olflags0 = vld1q_u64(mbuf0 + 3);
>> +		dataoff_iova1 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf1), 1);
>> +		len_olflags1 = vld1q_u64(mbuf1 + 3);
>> +		dataoff_iova2 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf2), 1);
>> +		len_olflags2 = vld1q_u64(mbuf2 + 3);
>> +		dataoff_iova3 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf3), 1);
>> +		len_olflags3 = vld1q_u64(mbuf3 + 3);
>>
>>  		/* Move mbufs to point pool */
>> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> +		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf,
>pool));
>>
>>  		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
>>  			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
>> @@ -1861,17 +1849,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue,
>uint64_t *ws,
>>  		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
>>  		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
>>
>> -		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
>> -		const uint64x2_t and_mask0 = {
>> -			0xFFFFFFFFFFFFFFFF,
>> -			0x000000000000FFFF,
>> -		};
>> -
>> -		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
>> -		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
>> -		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
>> -		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
>> -
>>  		/*
>>  		 * Pick only 16 bits of pktlen preset at bits 63:32
>>  		 * and place them at bits 15:0.
>> diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
>> index 6ce81f5c96..f5d99ccb5a 100644
>> --- a/drivers/net/cnxk/cn9k_tx.h
>> +++ b/drivers/net/cnxk/cn9k_tx.h
>> @@ -1005,14 +1005,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct
>rte_mbuf **tx_pkts,
>>  		mbuf2 = (uint64_t *)tx_pkts[2];
>>  		mbuf3 = (uint64_t *)tx_pkts[3];
>>
>> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
>> -				     offsetof(struct rte_mbuf, buf_iova));
>>  		/*
>>  		 * Get mbuf's, olflags, iova, pktlen, dataoff
>>  		 * dataoff_iovaX.D[0] = iova,
>> @@ -1020,28 +1012,24 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct
>rte_mbuf **tx_pkts,
>>  		 * len_olflagsX.D[0] = ol_flags,
>>  		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
>>  		 */
>> -		dataoff_iova0 = vld1q_u64(mbuf0);
>> -		len_olflags0 = vld1q_u64(mbuf0 + 2);
>> -		dataoff_iova1 = vld1q_u64(mbuf1);
>> -		len_olflags1 = vld1q_u64(mbuf1 + 2);
>> -		dataoff_iova2 = vld1q_u64(mbuf2);
>> -		len_olflags2 = vld1q_u64(mbuf2 + 2);
>> -		dataoff_iova3 = vld1q_u64(mbuf3);
>> -		len_olflags3 = vld1q_u64(mbuf3 + 2);
>> +		dataoff_iova0 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off,
>vld1q_u64(mbuf0), 1);
>> +		len_olflags0 = vld1q_u64(mbuf0 + 3);
>> +		dataoff_iova1 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off,
>vld1q_u64(mbuf1), 1);
>> +		len_olflags1 = vld1q_u64(mbuf1 + 3);
>> +		dataoff_iova2 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off,
>vld1q_u64(mbuf2), 1);
>> +		len_olflags2 = vld1q_u64(mbuf2 + 3);
>> +		dataoff_iova3 =
>> +			vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off,
>vld1q_u64(mbuf3), 1);
>> +		len_olflags3 = vld1q_u64(mbuf3 + 3);
>>
>>  		/* Move mbufs to point pool */
>> -		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> -		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
>> -				     offsetof(struct rte_mbuf, pool) -
>> -				     offsetof(struct rte_mbuf, buf_iova));
>> +		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf,
>pool));
>> +		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf,
>pool));
>>
>>  		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
>>  			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
>> @@ -1091,17 +1079,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct
>rte_mbuf **tx_pkts,
>>  		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
>>  		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
>>
>> -		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
>> -		const uint64x2_t and_mask0 = {
>> -			0xFFFFFFFFFFFFFFFF,
>> -			0x000000000000FFFF,
>> -		};
>> -
>> -		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
>> -		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
>> -		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
>> -		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
>> -
>>  		/*
>>  		 * Pick only 16 bits of pktlen preset at bits 63:32
>>  		 * and place them at bits 15:0.
>> diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
>> index 4cb7c9e90c..abf1e4215f 100644
>> --- a/drivers/net/cnxk/cnxk_ethdev.h
>> +++ b/drivers/net/cnxk/cnxk_ethdev.h
>> @@ -690,7 +690,6 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m)
>>
>>  	m->priv_size = priv_size;
>>  	m->buf_addr = (char *)m + mbuf_size;
>> -	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
>>  	m->buf_len = (uint16_t)buf_len;
>>  	rte_pktmbuf_reset_headroom(m);
>>  	m->data_len = 0;
>
>I missed it during previous review, but shouldn't the accessor be used
>instead?  I mean, if the build is done with PA enabled, and another
>driver accesses to m->buf_iova, it has to be correct.
>

Thanks, I will change.

>> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
>> index f347e98fce..01489b3a36 100644
>> --- a/drivers/net/cnxk/meson.build
>> +++ b/drivers/net/cnxk/meson.build
>> @@ -194,3 +194,4 @@ foreach flag: extra_flags
>>  endforeach
>>
>>  headers = files('rte_pmd_cnxk.h')
>> +pmd_iova_as_va = true
>> diff --git a/drivers/raw/cnxk_bphy/meson.build
>b/drivers/raw/cnxk_bphy/meson.build
>> index 14147feaf4..781ed63e05 100644
>> --- a/drivers/raw/cnxk_bphy/meson.build
>> +++ b/drivers/raw/cnxk_bphy/meson.build
>> @@ -10,3 +10,4 @@ sources = files(
>>          'cnxk_bphy_irq.c',
>>  )
>>  headers = files('rte_pmd_bphy.h')
>> +pmd_iova_as_va = true
>> diff --git a/drivers/raw/cnxk_gpio/meson.build
>b/drivers/raw/cnxk_gpio/meson.build
>> index a75a5b9084..f9aed173b6 100644
>> --- a/drivers/raw/cnxk_gpio/meson.build
>> +++ b/drivers/raw/cnxk_gpio/meson.build
>> @@ -9,3 +9,4 @@ sources = files(
>>          'cnxk_gpio_selftest.c',
>>  )
>>  headers = files('rte_pmd_cnxk_gpio.h')
>> +pmd_iova_as_va = true
>> --
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v5 0/7] mbuf dynamic field expansion
  2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
                                     ` (7 preceding siblings ...)
  2022-10-07 20:19                   ` [PATCH v4 0/7] mbuf dynamic field expansion Olivier Matz
@ 2022-10-07 21:02                   ` Shijith Thotton
  2022-10-07 21:02                     ` [PATCH v5 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
                                       ` (7 more replies)
  8 siblings, 8 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 21:02 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, ferruh.yigit, pbhagavatula,
	david.marchand

This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
deemed unsafe as some components could still use the PA field without checking IOVA mode and there
are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
that drivers which need PA can be disabled during build. This series adds this new meson build
options. Fourth patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
Marvell cnxk PMDs and software PMDs in IOVA as PA disabled build as they work without PA field.

1. https://inbox.dpdk.org/dev/57d2ab7fff672716d37ba4078e2e3bb2db126607.1656605763.git.sthotton@marvell.com/.

v5:
 * Fixed setting mbuf in cnxk PMD.

v4:
 * Restructured changes to multiple patches.
 * Moved to #if scheme instead of union.
 * Updated release notes.

v3:
 * Cleared use of buf_iova from cnxk PMD.

v2:
 * Used RTE_IOVA_AS_VA instread of rte_is_iova_as_va_build().
 * Moved mbuf next pointer to first cacheline if RTE_IOVA_AS_VA = 1.

Shijith Thotton (7):
  mbuf: add API to get and set mbuf physical address
  test/dma: use API to get mbuf data physical address
  build: add meson option to configure IOVA mode as PA
  mbuf: add second dynamic field member
  lib: move mbuf next pointer to first cache line
  drivers: mark cnxk PMDs work with IOVA as PA disabled
  drivers: mark software PMDs work with IOVA as PA disabled

 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 +--
 app/test/test_bpf.c                      |  2 +-
 app/test/test_dmadev.c                   | 33 ++++++--------
 app/test/test_mbuf.c                     | 12 +++---
 app/test/test_pcapng.c                   |  2 +-
 config/arm/meson.build                   |  8 +++-
 config/meson.build                       |  1 +
 doc/guides/platform/cnxk.rst             |  3 +-
 doc/guides/rel_notes/release_22_11.rst   |  3 ++
 drivers/common/cnxk/meson.build          |  1 +
 drivers/crypto/armv8/meson.build         |  1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
 drivers/crypto/cnxk/meson.build          |  2 +
 drivers/crypto/ipsec_mb/meson.build      |  1 +
 drivers/crypto/null/meson.build          |  1 +
 drivers/crypto/openssl/meson.build       |  1 +
 drivers/dma/cnxk/meson.build             |  1 +
 drivers/dma/skeleton/meson.build         |  1 +
 drivers/event/cnxk/meson.build           |  1 +
 drivers/event/dsw/meson.build            |  1 +
 drivers/event/opdl/meson.build           |  1 +
 drivers/event/skeleton/meson.build       |  1 +
 drivers/event/sw/meson.build             |  1 +
 drivers/mempool/bucket/meson.build       |  1 +
 drivers/mempool/cnxk/meson.build         |  1 +
 drivers/mempool/ring/meson.build         |  1 +
 drivers/mempool/stack/meson.build        |  1 +
 drivers/meson.build                      |  6 +++
 drivers/net/af_packet/meson.build        |  1 +
 drivers/net/af_xdp/meson.build           |  2 +
 drivers/net/bonding/meson.build          |  1 +
 drivers/net/cnxk/cn10k_ethdev.c          |  4 +-
 drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
 drivers/net/cnxk/cn9k_ethdev.c           |  4 +-
 drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
 drivers/net/cnxk/cnxk_ethdev.h           |  2 +-
 drivers/net/cnxk/meson.build             |  1 +
 drivers/net/failsafe/meson.build         |  1 +
 drivers/net/hns3/meson.build             |  6 +++
 drivers/net/memif/meson.build            |  1 +
 drivers/net/null/meson.build             |  1 +
 drivers/net/pcap/meson.build             |  1 +
 drivers/net/ring/meson.build             |  1 +
 drivers/net/tap/meson.build              |  1 +
 drivers/raw/cnxk_bphy/meson.build        |  1 +
 drivers/raw/cnxk_gpio/meson.build        |  1 +
 drivers/raw/skeleton/meson.build         |  1 +
 lib/eal/linux/eal.c                      |  6 +++
 lib/kni/rte_kni.c                        |  3 +-
 lib/mbuf/rte_mbuf.c                      | 12 +++---
 lib/mbuf/rte_mbuf.h                      | 47 +++++++++++++++++---
 lib/mbuf/rte_mbuf_core.h                 | 26 +++++++++--
 lib/mbuf/rte_mbuf_dyn.c                  |  3 ++
 lib/meson.build                          |  3 ++
 lib/pipeline/rte_table_action.c          |  2 +-
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 25 ++++++-----
 meson_options.txt                        |  2 +
 60 files changed, 221 insertions(+), 151 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v5 1/7] mbuf: add API to get and set mbuf physical address
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
@ 2022-10-07 21:02                     ` Shijith Thotton
  2022-10-07 21:20                       ` Stephen Hemminger
  2022-10-07 21:02                     ` [PATCH v5 2/7] test/dma: use API to get mbuf data " Shijith Thotton
                                       ` (6 subsequent siblings)
  7 siblings, 1 reply; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 21:02 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, Nicolas Chautru, Ciara Power,
	Konstantin Ananyev, Reshma Pattan, Cristian Dumitrescu,
	Maxime Coquelin, Chenbo Xia

Added APIs rte_mbuf_iova_set and rte_mbuf_iova_get to set and get the
physical address of an mbuf respectively. Updated applications and
library to use the same.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---
 app/test-bbdev/test_bbdev_perf.c         |  2 +-
 app/test-crypto-perf/cperf_test_common.c |  5 ++--
 app/test/test_bpf.c                      |  2 +-
 app/test/test_mbuf.c                     |  2 +-
 app/test/test_pcapng.c                   |  2 +-
 lib/kni/rte_kni.c                        |  3 +-
 lib/mbuf/rte_mbuf.c                      | 12 ++++----
 lib/mbuf/rte_mbuf.h                      | 38 ++++++++++++++++++++----
 lib/mbuf/rte_mbuf_core.h                 |  3 +-
 lib/pipeline/rte_table_action.c          |  2 +-
 lib/vhost/vhost.h                        |  2 +-
 lib/vhost/vhost_crypto.c                 | 25 ++++++++--------
 12 files changed, 61 insertions(+), 37 deletions(-)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 311e5d1a96..e7fbf71f6d 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -1002,7 +1002,7 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
 					seg->length);
 				memcpy(data, seg->addr, seg->length);
 				m_head->buf_addr = data;
-				m_head->buf_iova = rte_malloc_virt2iova(data);
+				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
 				m_head->data_off = 0;
 				m_head->data_len = seg->length;
 			} else {
diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-perf/cperf_test_common.c
index 00aadc9a47..27646cd619 100644
--- a/app/test-crypto-perf/cperf_test_common.c
+++ b/app/test-crypto-perf/cperf_test_common.c
@@ -26,8 +26,7 @@ fill_single_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = 0;
 	m->buf_addr = (char *)m + mbuf_hdr_size;
-	m->buf_iova = rte_mempool_virt2iova(obj) +
-		mbuf_offset + mbuf_hdr_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(obj) + mbuf_offset + mbuf_hdr_size);
 	m->buf_len = segment_sz;
 	m->data_len = data_len;
 	m->pkt_len = data_len;
@@ -58,7 +57,7 @@ fill_multi_seg_mbuf(struct rte_mbuf *m, struct rte_mempool *mp,
 		/* start of buffer is after mbuf structure and priv data */
 		m->priv_size = 0;
 		m->buf_addr = (char *)m + mbuf_hdr_size;
-		m->buf_iova = next_seg_phys_addr;
+		rte_mbuf_iova_set(m, next_seg_phys_addr);
 		next_seg_phys_addr += mbuf_hdr_size + segment_sz;
 		m->buf_len = segment_sz;
 		m->data_len = data_len;
diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 97f500809e..f5af5e8a3f 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -2600,7 +2600,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index be4c3ff970..f94d33f212 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -1233,7 +1233,7 @@ test_failing_mbuf_sanity_check(struct rte_mempool *pktmbuf_pool)
 	}
 
 	badbuf = *buf;
-	badbuf.buf_iova = 0;
+	rte_mbuf_iova_set(&badbuf, 0);
 	if (verify_mbuf_check_panics(&badbuf)) {
 		printf("Error with bad-physaddr mbuf test\n");
 		return -1;
diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index 320dacea34..abbf00f6da 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -40,7 +40,7 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
 	uint8_t *db;
 
 	mb->buf_addr = buf;
-	mb->buf_iova = (uintptr_t)buf;
+	rte_mbuf_iova_set(mb, (uintptr_t)buf);
 	mb->buf_len = buf_len;
 	rte_mbuf_refcnt_set(mb, 1);
 
diff --git a/lib/kni/rte_kni.c b/lib/kni/rte_kni.c
index 7971c56bb4..3737a91de7 100644
--- a/lib/kni/rte_kni.c
+++ b/lib/kni/rte_kni.c
@@ -357,8 +357,7 @@ static void *
 va2pa(struct rte_mbuf *m)
 {
 	return (void *)((unsigned long)m -
-			((unsigned long)m->buf_addr -
-			 (unsigned long)m->buf_iova));
+			((unsigned long)m->buf_addr - (unsigned long)rte_mbuf_iova_get(m)));
 }
 
 static void *
diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
index a2307cebe6..16f6ed6731 100644
--- a/lib/mbuf/rte_mbuf.c
+++ b/lib/mbuf/rte_mbuf.c
@@ -89,7 +89,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 	/* start of buffer is after mbuf structure and priv data */
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 
 	/* keep some headroom between start of buffer and data */
@@ -187,8 +187,8 @@ __rte_pktmbuf_init_extmem(struct rte_mempool *mp,
 	RTE_ASSERT(ctx->off + ext_mem->elt_size <= ext_mem->buf_len);
 
 	m->buf_addr = RTE_PTR_ADD(ext_mem->buf_ptr, ctx->off);
-	m->buf_iova = ext_mem->buf_iova == RTE_BAD_IOVA ?
-		      RTE_BAD_IOVA : (ext_mem->buf_iova + ctx->off);
+	rte_mbuf_iova_set(m, ext_mem->buf_iova == RTE_BAD_IOVA ? RTE_BAD_IOVA :
+								 (ext_mem->buf_iova + ctx->off));
 
 	ctx->off += ext_mem->elt_size;
 	if (ctx->off + ext_mem->elt_size > ext_mem->buf_len) {
@@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header,
 		*reason = "bad mbuf pool";
 		return -1;
 	}
-	if (m->buf_iova == 0) {
+	if (rte_mbuf_iova_get(m) == 0) {
 		*reason = "bad IO addr";
 		return -1;
 	}
@@ -669,8 +669,8 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
 
 	__rte_mbuf_sanity_check(m, 1);
 
-	fprintf(f, "dump mbuf at %p, iova=%#"PRIx64", buf_len=%u\n",
-		m, m->buf_iova, m->buf_len);
+	fprintf(f, "dump mbuf at %p, iova=%#" PRIx64 ", buf_len=%u\n", m, rte_mbuf_iova_get(m),
+		m->buf_len);
 	fprintf(f, "  pkt_len=%u, ol_flags=%#"PRIx64", nb_segs=%u, port=%u",
 		m->pkt_len, m->ol_flags, m->nb_segs, m->port);
 
diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
index b6e23d98ce..c9f74586c3 100644
--- a/lib/mbuf/rte_mbuf.h
+++ b/lib/mbuf/rte_mbuf.h
@@ -135,6 +135,34 @@ rte_mbuf_prefetch_part2(struct rte_mbuf *m)
 
 static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 
+/**
+ * Get the IOVA address of the mbuf data buffer.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @return
+ *   The IOVA address of the mbuf.
+ */
+static inline rte_iova_t
+rte_mbuf_iova_get(const struct rte_mbuf *m)
+{
+	return m->buf_iova;
+}
+
+/**
+ * Set the IOVA address of the mbuf data buffer
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @param iova
+ *   Value to set as IOVA address of the mbuf.
+ */
+static inline void
+rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
+{
+	m->buf_iova = iova;
+}
+
 /**
  * Return the IO address of the beginning of the mbuf data
  *
@@ -146,7 +174,7 @@ static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 static inline rte_iova_t
 rte_mbuf_data_iova(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + mb->data_off;
+	return rte_mbuf_iova_get(mb) + mb->data_off;
 }
 
 /**
@@ -164,7 +192,7 @@ rte_mbuf_data_iova(const struct rte_mbuf *mb)
 static inline rte_iova_t
 rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
 {
-	return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
+	return rte_mbuf_iova_get(mb) + RTE_PKTMBUF_HEADROOM;
 }
 
 /**
@@ -1053,7 +1081,7 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
 	RTE_ASSERT(shinfo->free_cb != NULL);
 
 	m->buf_addr = buf_addr;
-	m->buf_iova = buf_iova;
+	rte_mbuf_iova_set(m, buf_iova);
 	m->buf_len = buf_len;
 
 	m->data_len = 0;
@@ -1140,7 +1168,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 
 	mi->data_off = m->data_off;
 	mi->data_len = m->data_len;
-	mi->buf_iova = m->buf_iova;
+	rte_mbuf_iova_set(mi, rte_mbuf_iova_get(m));
 	mi->buf_addr = m->buf_addr;
 	mi->buf_len = m->buf_len;
 
@@ -1242,7 +1270,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index e672d59b36..51a12a1fb9 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -736,8 +736,7 @@ struct rte_mbuf_ext_shared_info {
  * @param o
  *   The offset into the data to calculate address from.
  */
-#define rte_pktmbuf_iova_offset(m, o) \
-	(rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
+#define rte_pktmbuf_iova_offset(m, o) (rte_iova_t)(rte_mbuf_iova_get(m) + (m)->data_off + (o))
 
 /**
  * A macro that returns the IO address that points to the start of the
diff --git a/lib/pipeline/rte_table_action.c b/lib/pipeline/rte_table_action.c
index cb792bbe0d..de5da0b695 100644
--- a/lib/pipeline/rte_table_action.c
+++ b/lib/pipeline/rte_table_action.c
@@ -1923,7 +1923,7 @@ pkt_work_sym_crypto(struct rte_mbuf *mbuf, struct sym_crypto_data *data,
 
 	op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
 	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
-	op->phys_addr = mbuf->buf_iova + cfg->op_offset - sizeof(*mbuf);
+	op->phys_addr = rte_mbuf_iova_get(mbuf) + cfg->op_offset - sizeof(*mbuf);
 	op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
 	sym->m_src = mbuf;
 	sym->m_dst = NULL;
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index eeeda681cc..ef211ed519 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -971,7 +971,7 @@ restore_mbuf(struct rte_mbuf *m)
 		/* start of buffer is after mbuf structure and priv data */
 
 		m->buf_addr = (char *)m + mbuf_size;
-		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 		m = m->next;
 	}
 }
diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
index 7321da21b7..b448b6685d 100644
--- a/lib/vhost/vhost_crypto.c
+++ b/lib/vhost/vhost_crypto.c
@@ -807,11 +807,10 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_src->data_len = cipher->para.src_data_len;
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				cipher->para.src_data_len);
+		rte_mbuf_iova_set(m_src,
+				  gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.src_data_len));
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 ||
-				m_src->buf_addr == NULL)) {
+		if (unlikely(rte_mbuf_iova_get(m_src) == 0 || m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -851,10 +850,10 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, cipher->para.dst_data_len);
+		rte_mbuf_iova_set(m_dst,
+				  gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.dst_data_len));
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(rte_mbuf_iova_get(m_dst) == 0 || m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -965,10 +964,10 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 		m_src->data_len = chain->para.src_data_len;
 		m_dst->data_len = chain->para.dst_data_len;
 
-		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
-				chain->para.src_data_len);
+		rte_mbuf_iova_set(m_src,
+				  gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.src_data_len));
 		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
-		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
+		if (unlikely(rte_mbuf_iova_get(m_src) == 0 || m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
@@ -1008,10 +1007,10 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
-		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
-				desc->addr, chain->para.dst_data_len);
+		rte_mbuf_iova_set(m_dst,
+				  gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.dst_data_len));
 		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
-		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+		if (unlikely(rte_mbuf_iova_get(m_dst) == 0 || m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v5 2/7] test/dma: use API to get mbuf data physical address
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
  2022-10-07 21:02                     ` [PATCH v5 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
@ 2022-10-07 21:02                     ` Shijith Thotton
  2022-10-07 21:02                     ` [PATCH v5 3/7] build: add meson option to configure IOVA mode as PA Shijith Thotton
                                       ` (5 subsequent siblings)
  7 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 21:02 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, Chengwen Feng, Kevin Laatz

Used rte_mbuf_data_iova API to get the physical address of mbuf data.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---
 app/test/test_dmadev.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 9e8e101f40..fe62e98af8 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -110,8 +110,8 @@ do_multi_copies(int16_t dev_id, uint16_t vchan,
 		for (j = 0; j < COPY_LEN/sizeof(uint64_t); j++)
 			src_data[j] = rte_rand();
 
-		if (rte_dma_copy(dev_id, vchan, srcs[i]->buf_iova + srcs[i]->data_off,
-				dsts[i]->buf_iova + dsts[i]->data_off, COPY_LEN, 0) != id_count++)
+		if (rte_dma_copy(dev_id, vchan, rte_mbuf_data_iova(srcs[i]),
+				 rte_mbuf_data_iova(dsts[i]), COPY_LEN, 0) != id_count++)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 	}
 	rte_dma_submit(dev_id, vchan);
@@ -317,9 +317,8 @@ test_failure_in_full_burst(int16_t dev_id, uint16_t vchan, bool fence,
 	rte_dma_stats_get(dev_id, vchan, &baseline); /* get a baseline set of stats */
 	for (i = 0; i < COMP_BURST_SZ; i++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(i == fail_idx ? 0 : (srcs[i]->buf_iova + srcs[i]->data_off)),
-				dsts[i]->buf_iova + dsts[i]->data_off,
-				COPY_LEN, OPT_FENCE(i));
+				      (i == fail_idx ? 0 : rte_mbuf_data_iova(srcs[i])),
+				      rte_mbuf_data_iova(dsts[i]), COPY_LEN, OPT_FENCE(i));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", i);
 		if (i == fail_idx)
@@ -407,9 +406,8 @@ test_individual_status_query_with_failure(int16_t dev_id, uint16_t vchan, bool f
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, OPT_FENCE(j));
+				      (j == fail_idx ? 0 : rte_mbuf_data_iova(srcs[j])),
+				      rte_mbuf_data_iova(dsts[j]), COPY_LEN, OPT_FENCE(j));
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -470,9 +468,8 @@ test_single_item_status_query_with_failure(int16_t dev_id, uint16_t vchan,
 
 	for (j = 0; j < COMP_BURST_SZ; j++) {
 		int id = rte_dma_copy(dev_id, vchan,
-				(j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)),
-				dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+				      (j == fail_idx ? 0 : rte_mbuf_data_iova(srcs[j])),
+				      rte_mbuf_data_iova(dsts[j]), COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 		if (j == fail_idx)
@@ -529,15 +526,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in one go */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_mbuf_data_iova(srcs[j]);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_mbuf_data_iova(dsts[j]),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
@@ -565,15 +561,14 @@ test_multi_failure(int16_t dev_id, uint16_t vchan, struct rte_mbuf **srcs, struc
 
 	/* enqueue and gather completions in bursts, but getting errors one at a time */
 	for (j = 0; j < COMP_BURST_SZ; j++) {
-		uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+		uintptr_t src = rte_mbuf_data_iova(srcs[j]);
 		/* set up for failure if the current index is anywhere is the fails array */
 		for (i = 0; i < num_fail; i++)
 			if (j == fail[i])
 				src = 0;
 
-		int id = rte_dma_copy(dev_id, vchan,
-				src, dsts[j]->buf_iova + dsts[j]->data_off,
-				COPY_LEN, 0);
+		int id = rte_dma_copy(dev_id, vchan, src, rte_mbuf_data_iova(dsts[j]),
+				      COPY_LEN, 0);
 		if (id < 0)
 			ERR_RETURN("Error with rte_dma_copy for buffer %u\n", j);
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v5 3/7] build: add meson option to configure IOVA mode as PA
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
  2022-10-07 21:02                     ` [PATCH v5 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
  2022-10-07 21:02                     ` [PATCH v5 2/7] test/dma: use API to get mbuf data " Shijith Thotton
@ 2022-10-07 21:02                     ` Shijith Thotton
  2022-10-07 21:02                     ` [PATCH v5 4/7] mbuf: add second dynamic field member Shijith Thotton
                                       ` (4 subsequent siblings)
  7 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 21:02 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, Dongdong Liu, Yisen Zhuang

IOVA mode in DPDK is either PA or VA. The new build option
enable_iova_as_pa configures the mode to PA at compile time. By default,
this option is enabled. If the options is disabled, only drivers which
supports it are enabled during build. Supported driver can set the flag
pmd_supports_disable_iova_as_pa in its build file.

mbuf structure holds the physical (PA) and virtual address (VA) of a
buffer. if IOVA as PA is disabled at compile time, PA field (buf_iova)
of mbuf is redundant as it is the same as VA and is replaced by a dummy
field.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---
 app/test/test_mbuf.c         | 12 +++++++-----
 config/meson.build           |  1 +
 drivers/meson.build          |  6 ++++++
 drivers/net/hns3/meson.build |  6 ++++++
 lib/eal/linux/eal.c          |  6 ++++++
 lib/mbuf/rte_mbuf.c          |  2 +-
 lib/mbuf/rte_mbuf.h          |  9 +++++++++
 lib/mbuf/rte_mbuf_core.h     |  6 ++++++
 lib/meson.build              |  3 +++
 meson_options.txt            |  2 ++
 10 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index f94d33f212..53fe898a38 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -1232,11 +1232,13 @@ test_failing_mbuf_sanity_check(struct rte_mempool *pktmbuf_pool)
 		return -1;
 	}
 
-	badbuf = *buf;
-	rte_mbuf_iova_set(&badbuf, 0);
-	if (verify_mbuf_check_panics(&badbuf)) {
-		printf("Error with bad-physaddr mbuf test\n");
-		return -1;
+	if (RTE_IOVA_AS_PA) {
+		badbuf = *buf;
+		rte_mbuf_iova_set(&badbuf, 0);
+		if (verify_mbuf_check_panics(&badbuf)) {
+			printf("Error with bad-physaddr mbuf test\n");
+			return -1;
+		}
 	}
 
 	badbuf = *buf;
diff --git a/config/meson.build b/config/meson.build
index 7f7b6c92fd..0fc209db01 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -309,6 +309,7 @@ endif
 if get_option('mbuf_refcnt_atomic')
     dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
 endif
+dpdk_conf.set10('RTE_IOVA_AS_PA', get_option('enable_iova_as_pa'))
 
 compile_time_cpuflags = []
 subdir(arch_subdir)
diff --git a/drivers/meson.build b/drivers/meson.build
index f6ba5ba4fb..2a29c210b5 100644
--- a/drivers/meson.build
+++ b/drivers/meson.build
@@ -106,6 +106,7 @@ foreach subpath:subdirs
         ext_deps = []
         pkgconfig_extra_libs = []
         testpmd_sources = []
+        pmd_supports_disable_iova_as_pa = false
 
         if not enable_drivers.contains(drv_path)
             build = false
@@ -123,6 +124,11 @@ foreach subpath:subdirs
             # pull in driver directory which should update all the local variables
             subdir(drv_path)
 
+            if dpdk_conf.get('RTE_IOVA_AS_PA') == 0 and not pmd_supports_disable_iova_as_pa and not always_enable.contains(drv_path)
+                build = false
+                reason = 'driver does not support disabling IOVA as PA mode'
+            endif
+
             # get dependency objs from strings
             shared_deps = ext_deps
             static_deps = ext_deps
diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build
index f2aede94ed..39d426f232 100644
--- a/drivers/net/hns3/meson.build
+++ b/drivers/net/hns3/meson.build
@@ -13,6 +13,12 @@ if arch_subdir != 'x86' and arch_subdir != 'arm' or not dpdk_conf.get('RTE_ARCH_
     subdir_done()
 endif
 
+if dpdk_conf.get('RTE_IOVA_AS_PA') == 0
+    build = false
+    reason = 'driver does not support disabling IOVA as PA mode'
+    subdir_done()
+endif
+
 sources = files(
         'hns3_cmd.c',
         'hns3_dcb.c',
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index e74542fc71..8c118d0d9f 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1128,6 +1128,12 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	if (rte_eal_iova_mode() == RTE_IOVA_PA && !RTE_IOVA_AS_PA) {
+		rte_eal_init_alert("Cannot use IOVA as 'PA' as it is disabled during build");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
 	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
 		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
 
diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c
index 16f6ed6731..cfd8062f1e 100644
--- a/lib/mbuf/rte_mbuf.c
+++ b/lib/mbuf/rte_mbuf.c
@@ -388,7 +388,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header,
 		*reason = "bad mbuf pool";
 		return -1;
 	}
-	if (rte_mbuf_iova_get(m) == 0) {
+	if (RTE_IOVA_AS_PA && rte_mbuf_iova_get(m) == 0) {
 		*reason = "bad IO addr";
 		return -1;
 	}
diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
index c9f74586c3..ea0c5e16a0 100644
--- a/lib/mbuf/rte_mbuf.h
+++ b/lib/mbuf/rte_mbuf.h
@@ -146,7 +146,11 @@ static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 static inline rte_iova_t
 rte_mbuf_iova_get(const struct rte_mbuf *m)
 {
+#if RTE_IOVA_AS_PA
 	return m->buf_iova;
+#else
+	return (rte_iova_t)m->buf_addr;
+#endif
 }
 
 /**
@@ -160,7 +164,12 @@ rte_mbuf_iova_get(const struct rte_mbuf *m)
 static inline void
 rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
 {
+#if RTE_IOVA_AS_PA
 	m->buf_iova = iova;
+#else
+	RTE_SET_USED(m);
+	RTE_SET_USED(iova);
+#endif
 }
 
 /**
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 51a12a1fb9..91c2211b44 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -467,13 +467,19 @@ struct rte_mbuf {
 	RTE_MARKER cacheline0;
 
 	void *buf_addr;           /**< Virtual address of segment buffer. */
+#if RTE_IOVA_AS_PA
 	/**
 	 * Physical address of segment buffer.
+	 * This field is undefined if the build is configured to use only
+	 * virtual address as IOVA (i.e. RTE_IOVA_AS_PA is 0).
 	 * Force alignment to 8-bytes, so as to ensure we have the exact
 	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
 	 * working on vector drivers easier.
 	 */
 	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
+#else
+	uint64_t dummy;
+#endif
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
 	RTE_MARKER64 rearm_data;
diff --git a/lib/meson.build b/lib/meson.build
index c648f7d800..c071a6c8e0 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -88,6 +88,9 @@ optional_libs = [
 disabled_libs = []
 opt_disabled_libs = run_command(list_dir_globs, get_option('disable_libs'),
         check: true).stdout().split()
+if dpdk_conf.get('RTE_IOVA_AS_PA') == 0
+    opt_disabled_libs += ['kni']
+endif
 foreach l:opt_disabled_libs
     if not optional_libs.contains(l)
         warning('Cannot disable mandatory library "@0@"'.format(l))
diff --git a/meson_options.txt b/meson_options.txt
index 8640f599ae..54d504fab2 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -44,6 +44,8 @@ option('platform', type: 'string', value: 'native', description:
        'Platform to build, either "native", "generic" or a SoC. Please refer to the Linux build guide for more information.')
 option('enable_trace_fp', type: 'boolean', value: false, description:
        'enable fast path trace points.')
+option('enable_iova_as_pa', type: 'boolean', value: true, description:
+       'Enable or disable support for IOVA as PA mode. Disabling this option removes the buf_iova field of mbuf.')
 option('tests', type: 'boolean', value: true, description:
        'build unit tests')
 option('use_hpet', type: 'boolean', value: false, description:
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v5 4/7] mbuf: add second dynamic field member
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
                                       ` (2 preceding siblings ...)
  2022-10-07 21:02                     ` [PATCH v5 3/7] build: add meson option to configure IOVA mode as PA Shijith Thotton
@ 2022-10-07 21:02                     ` Shijith Thotton
  2022-10-07 21:02                     ` [PATCH v5 5/7] lib: move mbuf next pointer to first cache line Shijith Thotton
                                       ` (3 subsequent siblings)
  7 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 21:02 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas

If IOVA as PA is disabled during build, mbuf physical address field is
undefined. This space is used to add the second dynamic field.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---
 lib/mbuf/rte_mbuf_core.h | 6 +++++-
 lib/mbuf/rte_mbuf_dyn.c  | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 91c2211b44..dc6c54015e 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -478,7 +478,11 @@ struct rte_mbuf {
 	 */
 	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
 #else
-	uint64_t dummy;
+	/**
+	 * Reserved for dynamic field in builds where physical address
+	 * field is undefined.
+	 */
+	uint64_t dynfield2;
 #endif
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
diff --git a/lib/mbuf/rte_mbuf_dyn.c b/lib/mbuf/rte_mbuf_dyn.c
index 4ae79383b5..35839e938c 100644
--- a/lib/mbuf/rte_mbuf_dyn.c
+++ b/lib/mbuf/rte_mbuf_dyn.c
@@ -128,6 +128,9 @@ init_shared_mem(void)
 		 */
 		memset(shm, 0, sizeof(*shm));
 		mark_free(dynfield1);
+#if !RTE_IOVA_AS_PA
+		mark_free(dynfield2);
+#endif
 
 		/* init free_flags */
 		for (mask = RTE_MBUF_F_FIRST_FREE; mask <= RTE_MBUF_F_LAST_FREE; mask <<= 1)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v5 5/7] lib: move mbuf next pointer to first cache line
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
                                       ` (3 preceding siblings ...)
  2022-10-07 21:02                     ` [PATCH v5 4/7] mbuf: add second dynamic field member Shijith Thotton
@ 2022-10-07 21:02                     ` Shijith Thotton
  2022-10-07 21:22                       ` Stephen Hemminger
  2022-10-07 21:02                     ` [PATCH v5 6/7] drivers: mark cnxk PMDs work with IOVA as PA disabled Shijith Thotton
                                       ` (2 subsequent siblings)
  7 siblings, 1 reply; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 21:02 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas

Swapped position of mbuf next pointer and second dynamic field (dynfield2)
if the build is configured to disable IOVA as PA. This is to move the
mbuf next pointer to first cache line.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---
 doc/guides/rel_notes/release_22_11.rst |  3 +++
 lib/mbuf/rte_mbuf_core.h               | 19 ++++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/doc/guides/rel_notes/release_22_11.rst b/doc/guides/rel_notes/release_22_11.rst
index 7431dda461..ab69db8d70 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -385,6 +385,9 @@ ABI Changes
 * eventdev: Added ``weight`` and ``affinity`` fields
   to ``rte_event_queue_conf`` structure.
 
+* mbuf: Replaced ``buf_iova`` field with ``next`` field and added a new field
+  ``dynfield2`` at its place in second cacheline if ``RTE_IOVA_AS_PA`` is 0.
+
 
 Known Issues
 ------------
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index dc6c54015e..37d3fcc3b8 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -479,10 +479,11 @@ struct rte_mbuf {
 	rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
 #else
 	/**
-	 * Reserved for dynamic field in builds where physical address
-	 * field is undefined.
+	 * Next segment of scattered packet.
+	 * This field is valid when physical address field is undefined.
+	 * Otherwise next pointer in the second cache line will be used.
 	 */
-	uint64_t dynfield2;
+	struct rte_mbuf *next;
 #endif
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
@@ -599,11 +600,19 @@ struct rte_mbuf {
 	/* second cache line - fields only used in slow path or on TX */
 	RTE_MARKER cacheline1 __rte_cache_min_aligned;
 
+#if RTE_IOVA_AS_PA
 	/**
-	 * Next segment of scattered packet. Must be NULL in the last segment or
-	 * in case of non-segmented packet.
+	 * Next segment of scattered packet. Must be NULL in the last
+	 * segment or in case of non-segmented packet.
 	 */
 	struct rte_mbuf *next;
+#else
+	/**
+	 * Reserved for dynamic field when the next pointer is in first
+	 * cache line (i.e. RTE_IOVA_AS_PA is 0).
+	 */
+	uint64_t dynfield2;
+#endif
 
 	/* fields to support TX offloads */
 	RTE_STD_C11
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v5 6/7] drivers: mark cnxk PMDs work with IOVA as PA disabled
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
                                       ` (4 preceding siblings ...)
  2022-10-07 21:02                     ` [PATCH v5 5/7] lib: move mbuf next pointer to first cache line Shijith Thotton
@ 2022-10-07 21:02                     ` Shijith Thotton
  2022-10-07 21:02                     ` [PATCH v5 7/7] drivers: mark software " Shijith Thotton
  2022-10-09  9:34                     ` [PATCH v5 0/7] mbuf dynamic field expansion Thomas Monjalon
  7 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 21:02 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, Ruifeng Wang,
	Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
	Ankur Dwivedi, Anoob Joseph, Tejasree Kondoj,
	Radha Mohan Chintakuntla, Veerasenareddy Burru, Pavan Nikhilesh,
	Ashwin Sekhar T K, Jakub Palider, Tomasz Duszynski

Enabled the flag pmd_supports_disable_iova_as_pa in cnxk driver build
files as they work with IOVA as VA. Updated cn9k and cn10k soc build
configurations to disable the IOVA as PA build by default.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---
 config/arm/meson.build                   |  8 +++-
 doc/guides/platform/cnxk.rst             |  3 +-
 drivers/common/cnxk/meson.build          |  1 +
 drivers/crypto/cnxk/cn10k_ipsec_la_ops.h |  4 +-
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  |  2 +-
 drivers/crypto/cnxk/meson.build          |  2 +
 drivers/dma/cnxk/meson.build             |  1 +
 drivers/event/cnxk/meson.build           |  1 +
 drivers/mempool/cnxk/meson.build         |  1 +
 drivers/net/cnxk/cn10k_ethdev.c          |  4 +-
 drivers/net/cnxk/cn10k_tx.h              | 55 +++++++-----------------
 drivers/net/cnxk/cn9k_ethdev.c           |  4 +-
 drivers/net/cnxk/cn9k_tx.h               | 55 +++++++-----------------
 drivers/net/cnxk/cnxk_ethdev.h           |  2 +-
 drivers/net/cnxk/meson.build             |  1 +
 drivers/raw/cnxk_bphy/meson.build        |  1 +
 drivers/raw/cnxk_gpio/meson.build        |  1 +
 17 files changed, 57 insertions(+), 89 deletions(-)

diff --git a/config/arm/meson.build b/config/arm/meson.build
index 9f1636e0d5..6f55a36b56 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -294,7 +294,8 @@ soc_cn10k = {
     'flags': [
         ['RTE_MAX_LCORE', 24],
         ['RTE_MAX_NUMA_NODES', 1],
-        ['RTE_MEMPOOL_ALIGN', 128]
+        ['RTE_MEMPOOL_ALIGN', 128],
+        ['RTE_IOVA_AS_PA', 0]
     ],
     'part_number': '0xd49',
     'extra_march_features': ['crypto'],
@@ -370,7 +371,10 @@ soc_cn9k = {
     'description': 'Marvell OCTEON 9',
     'implementer': '0x43',
     'part_number': '0xb2',
-    'numa': false
+    'numa': false,
+    'flags': [
+        ['RTE_IOVA_AS_PA', 0]
+    ]
 }
 
 soc_stingray = {
diff --git a/doc/guides/platform/cnxk.rst b/doc/guides/platform/cnxk.rst
index 97b2be5c37..d922e83f26 100644
--- a/doc/guides/platform/cnxk.rst
+++ b/doc/guides/platform/cnxk.rst
@@ -574,7 +574,8 @@ Compile DPDK
 ------------
 
 DPDK may be compiled either natively on OCTEON CN9K/CN10K platform or cross-compiled on
-an x86 based platform.
+an x86 based platform. Meson build option ``enable_iova_as_pa`` is disabled on cnxk
+platforms. So only PMDs supporting this option are enabled on cnxk platform builds.
 
 Native Compilation
 ~~~~~~~~~~~~~~~~~~
diff --git a/drivers/common/cnxk/meson.build b/drivers/common/cnxk/meson.build
index 127fcbcdc5..849735921c 100644
--- a/drivers/common/cnxk/meson.build
+++ b/drivers/common/cnxk/meson.build
@@ -87,3 +87,4 @@ sources += files('cnxk_telemetry_bphy.c',
 )
 
 deps += ['bus_pci', 'net', 'telemetry']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
index e220863799..21502e0eb2 100644
--- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h
@@ -86,7 +86,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op *cop,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
@@ -103,7 +103,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn10k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	dptr = rte_pktmbuf_iova(m_src);
+	dptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->dptr = dptr;
 	inst->rptr = dptr;
 
diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
index e469596756..8b68e4c728 100644
--- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
@@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
+	inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t);
 	inst->w7.u64 = sa->inst.w7;
 }
 #endif /* __CN9K_IPSEC_LA_OPS_H__ */
diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build
index 8db861f908..a5acabab2b 100644
--- a/drivers/crypto/cnxk/meson.build
+++ b/drivers/crypto/cnxk/meson.build
@@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug')
 else
     cflags += [ '-ULA_IPSEC_DEBUG','-UCNXK_CRYPTODEV_DEBUG' ]
 endif
+
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
index d4be4ee860..252e5ff78b 100644
--- a/drivers/dma/cnxk/meson.build
+++ b/drivers/dma/cnxk/meson.build
@@ -3,3 +3,4 @@
 
 deps += ['bus_pci', 'common_cnxk', 'dmadev']
 sources = files('cnxk_dmadev.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index b27bae7b12..aa42ab3a90 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -479,3 +479,4 @@ foreach flag: extra_flags
 endforeach
 
 deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/mempool/cnxk/meson.build b/drivers/mempool/cnxk/meson.build
index d5d1978569..d8bcc41ca0 100644
--- a/drivers/mempool/cnxk/meson.build
+++ b/drivers/mempool/cnxk/meson.build
@@ -17,3 +17,4 @@ sources = files(
 )
 
 deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
index e8faeebe1f..0b33b3a496 100644
--- a/drivers/net/cnxk/cn10k_ethdev.c
+++ b/drivers/net/cnxk/cn10k_ethdev.c
@@ -67,9 +67,9 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	RTE_BUILD_BUG_ON(RTE_MBUF_OUTL2_LEN_BITS != 7);
 	RTE_BUILD_BUG_ON(RTE_MBUF_OUTL3_LEN_BITS != 9);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
-			 offsetof(struct rte_mbuf, buf_iova) + 8);
+			 offsetof(struct rte_mbuf, buf_addr) + 16);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
-			 offsetof(struct rte_mbuf, buf_iova) + 16);
+			 offsetof(struct rte_mbuf, buf_addr) + 24);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
 			 offsetof(struct rte_mbuf, ol_flags) + 12);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, tx_offload) !=
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 492942de15..36fa96f83f 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -1900,14 +1900,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		mbuf2 = (uint64_t *)tx_pkts[2];
 		mbuf3 = (uint64_t *)tx_pkts[3];
 
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, buf_iova));
 		/*
 		 * Get mbuf's, olflags, iova, pktlen, dataoff
 		 * dataoff_iovaX.D[0] = iova,
@@ -1915,28 +1907,24 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		 * len_olflagsX.D[0] = ol_flags,
 		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
 		 */
-		dataoff_iova0 = vld1q_u64(mbuf0);
-		len_olflags0 = vld1q_u64(mbuf0 + 2);
-		dataoff_iova1 = vld1q_u64(mbuf1);
-		len_olflags1 = vld1q_u64(mbuf1 + 2);
-		dataoff_iova2 = vld1q_u64(mbuf2);
-		len_olflags2 = vld1q_u64(mbuf2 + 2);
-		dataoff_iova3 = vld1q_u64(mbuf3);
-		len_olflags3 = vld1q_u64(mbuf3 + 2);
+		dataoff_iova0 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
+		len_olflags0 = vld1q_u64(mbuf0 + 3);
+		dataoff_iova1 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf1), 1);
+		len_olflags1 = vld1q_u64(mbuf1 + 3);
+		dataoff_iova2 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf2), 1);
+		len_olflags2 = vld1q_u64(mbuf2 + 3);
+		dataoff_iova3 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf3), 1);
+		len_olflags3 = vld1q_u64(mbuf3 + 3);
 
 		/* Move mbufs to point pool */
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
+		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
+		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
+		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
+		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
 
 		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
 			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
@@ -1986,17 +1974,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
 		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
 
-		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
-		const uint64x2_t and_mask0 = {
-			0xFFFFFFFFFFFFFFFF,
-			0x000000000000FFFF,
-		};
-
-		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
-		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
-		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
-		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
-
 		/*
 		 * Pick only 16 bits of pktlen preset at bits 63:32
 		 * and place them at bits 15:0.
diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c
index 4fb0e2d94e..3b702d9696 100644
--- a/drivers/net/cnxk/cn9k_ethdev.c
+++ b/drivers/net/cnxk/cn9k_ethdev.c
@@ -67,9 +67,9 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	RTE_BUILD_BUG_ON(RTE_MBUF_OUTL2_LEN_BITS != 7);
 	RTE_BUILD_BUG_ON(RTE_MBUF_OUTL3_LEN_BITS != 9);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
-			 offsetof(struct rte_mbuf, buf_iova) + 8);
+			 offsetof(struct rte_mbuf, buf_addr) + 16);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
-			 offsetof(struct rte_mbuf, buf_iova) + 16);
+			 offsetof(struct rte_mbuf, buf_addr) + 24);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
 			 offsetof(struct rte_mbuf, ol_flags) + 12);
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, tx_offload) !=
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index a609814dfb..404edd6aed 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -1005,14 +1005,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		mbuf2 = (uint64_t *)tx_pkts[2];
 		mbuf3 = (uint64_t *)tx_pkts[3];
 
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, buf_iova));
 		/*
 		 * Get mbuf's, olflags, iova, pktlen, dataoff
 		 * dataoff_iovaX.D[0] = iova,
@@ -1020,28 +1012,24 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		 * len_olflagsX.D[0] = ol_flags,
 		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
 		 */
-		dataoff_iova0 = vld1q_u64(mbuf0);
-		len_olflags0 = vld1q_u64(mbuf0 + 2);
-		dataoff_iova1 = vld1q_u64(mbuf1);
-		len_olflags1 = vld1q_u64(mbuf1 + 2);
-		dataoff_iova2 = vld1q_u64(mbuf2);
-		len_olflags2 = vld1q_u64(mbuf2 + 2);
-		dataoff_iova3 = vld1q_u64(mbuf3);
-		len_olflags3 = vld1q_u64(mbuf3 + 2);
+		dataoff_iova0 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
+		len_olflags0 = vld1q_u64(mbuf0 + 3);
+		dataoff_iova1 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off, vld1q_u64(mbuf1), 1);
+		len_olflags1 = vld1q_u64(mbuf1 + 3);
+		dataoff_iova2 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off, vld1q_u64(mbuf2), 1);
+		len_olflags2 = vld1q_u64(mbuf2 + 3);
+		dataoff_iova3 =
+			vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off, vld1q_u64(mbuf3), 1);
+		len_olflags3 = vld1q_u64(mbuf3 + 3);
 
 		/* Move mbufs to point pool */
-		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
-		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
-				     offsetof(struct rte_mbuf, pool) -
-				     offsetof(struct rte_mbuf, buf_iova));
+		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
+		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
+		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
+		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
 
 		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
 			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
@@ -1091,17 +1079,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
 		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
 
-		/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
-		const uint64x2_t and_mask0 = {
-			0xFFFFFFFFFFFFFFFF,
-			0x000000000000FFFF,
-		};
-
-		dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
-		dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
-		dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
-		dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
-
 		/*
 		 * Pick only 16 bits of pktlen preset at bits 63:32
 		 * and place them at bits 15:0.
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 5204c46244..dd0946912f 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -697,7 +697,7 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m)
 
 	m->priv_size = priv_size;
 	m->buf_addr = (char *)m + mbuf_size;
-	m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+	rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
 	m->buf_len = (uint16_t)buf_len;
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index f347e98fce..5efb2000cf 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -194,3 +194,4 @@ foreach flag: extra_flags
 endforeach
 
 headers = files('rte_pmd_cnxk.h')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/raw/cnxk_bphy/meson.build b/drivers/raw/cnxk_bphy/meson.build
index 14147feaf4..ffb0ee6b7e 100644
--- a/drivers/raw/cnxk_bphy/meson.build
+++ b/drivers/raw/cnxk_bphy/meson.build
@@ -10,3 +10,4 @@ sources = files(
         'cnxk_bphy_irq.c',
 )
 headers = files('rte_pmd_bphy.h')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/raw/cnxk_gpio/meson.build b/drivers/raw/cnxk_gpio/meson.build
index a75a5b9084..f52a7be9eb 100644
--- a/drivers/raw/cnxk_gpio/meson.build
+++ b/drivers/raw/cnxk_gpio/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'cnxk_gpio_selftest.c',
 )
 headers = files('rte_pmd_cnxk_gpio.h')
+pmd_supports_disable_iova_as_pa = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* [PATCH v5 7/7] drivers: mark software PMDs work with IOVA as PA disabled
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
                                       ` (5 preceding siblings ...)
  2022-10-07 21:02                     ` [PATCH v5 6/7] drivers: mark cnxk PMDs work with IOVA as PA disabled Shijith Thotton
@ 2022-10-07 21:02                     ` Shijith Thotton
  2022-10-09  9:34                     ` [PATCH v5 0/7] mbuf dynamic field expansion Thomas Monjalon
  7 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 21:02 UTC (permalink / raw)
  To: dev
  Cc: Shijith Thotton, Honnappa.Nagarahalli, bruce.richardson, jerinj,
	mb, olivier.matz, stephen, thomas, Ruifeng Wang, Kai Ji,
	Pablo de Lara, Chengwen Feng, Kevin Laatz, Mattias Rönnblom,
	Liang Ma, Peter Mccarthy, Harry van Haaren, Artem V. Andreev,
	Andrew Rybchenko, John W. Linville, Ciara Loftus, Qi Zhang,
	Chas Williams, Min Hu (Connor),
	Gaetan Rivet, Jakub Grajciar, Tetsuya Mukawa, Sachin Saxena,
	Hemant Agrawal

Enabled software PMDs in IOVA as PA disabled build as they work with
IOVA as VA.

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---
 drivers/crypto/armv8/meson.build    | 1 +
 drivers/crypto/ipsec_mb/meson.build | 1 +
 drivers/crypto/null/meson.build     | 1 +
 drivers/crypto/openssl/meson.build  | 1 +
 drivers/dma/skeleton/meson.build    | 1 +
 drivers/event/dsw/meson.build       | 1 +
 drivers/event/opdl/meson.build      | 1 +
 drivers/event/skeleton/meson.build  | 1 +
 drivers/event/sw/meson.build        | 1 +
 drivers/mempool/bucket/meson.build  | 1 +
 drivers/mempool/ring/meson.build    | 1 +
 drivers/mempool/stack/meson.build   | 1 +
 drivers/net/af_packet/meson.build   | 1 +
 drivers/net/af_xdp/meson.build      | 2 ++
 drivers/net/bonding/meson.build     | 1 +
 drivers/net/failsafe/meson.build    | 1 +
 drivers/net/memif/meson.build       | 1 +
 drivers/net/null/meson.build        | 1 +
 drivers/net/pcap/meson.build        | 1 +
 drivers/net/ring/meson.build        | 1 +
 drivers/net/tap/meson.build         | 1 +
 drivers/raw/skeleton/meson.build    | 1 +
 22 files changed, 23 insertions(+)

diff --git a/drivers/crypto/armv8/meson.build b/drivers/crypto/armv8/meson.build
index 5effba8bbc..700fb80eb2 100644
--- a/drivers/crypto/armv8/meson.build
+++ b/drivers/crypto/armv8/meson.build
@@ -17,3 +17,4 @@ endif
 ext_deps += dep
 deps += ['bus_vdev']
 sources = files('rte_armv8_pmd.c', 'rte_armv8_pmd_ops.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/crypto/ipsec_mb/meson.build b/drivers/crypto/ipsec_mb/meson.build
index 64fc22611d..ec147d2110 100644
--- a/drivers/crypto/ipsec_mb/meson.build
+++ b/drivers/crypto/ipsec_mb/meson.build
@@ -41,3 +41,4 @@ sources = files(
         'pmd_zuc.c',
 )
 deps += ['bus_vdev', 'net', 'security']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/crypto/null/meson.build b/drivers/crypto/null/meson.build
index acc16e7d81..59a7508f18 100644
--- a/drivers/crypto/null/meson.build
+++ b/drivers/crypto/null/meson.build
@@ -9,3 +9,4 @@ endif
 
 deps += 'bus_vdev'
 sources = files('null_crypto_pmd.c', 'null_crypto_pmd_ops.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/crypto/openssl/meson.build b/drivers/crypto/openssl/meson.build
index cd962da1d6..d165c32ae8 100644
--- a/drivers/crypto/openssl/meson.build
+++ b/drivers/crypto/openssl/meson.build
@@ -15,3 +15,4 @@ endif
 deps += 'bus_vdev'
 sources = files('rte_openssl_pmd.c', 'rte_openssl_pmd_ops.c')
 ext_deps += dep
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/dma/skeleton/meson.build b/drivers/dma/skeleton/meson.build
index 8871b80956..2b0422ce61 100644
--- a/drivers/dma/skeleton/meson.build
+++ b/drivers/dma/skeleton/meson.build
@@ -5,3 +5,4 @@ deps += ['dmadev', 'kvargs', 'ring', 'bus_vdev']
 sources = files(
         'skeleton_dmadev.c',
 )
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/dsw/meson.build b/drivers/event/dsw/meson.build
index 2df0fac4ff..e6808c0f71 100644
--- a/drivers/event/dsw/meson.build
+++ b/drivers/event/dsw/meson.build
@@ -6,3 +6,4 @@ if cc.has_argument('-Wno-format-nonliteral')
     cflags += '-Wno-format-nonliteral'
 endif
 sources = files('dsw_evdev.c', 'dsw_event.c', 'dsw_xstats.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/opdl/meson.build b/drivers/event/opdl/meson.build
index 786d2f4e82..7abef44609 100644
--- a/drivers/event/opdl/meson.build
+++ b/drivers/event/opdl/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'opdl_test.c',
 )
 deps += ['bus_vdev']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/skeleton/meson.build b/drivers/event/skeleton/meson.build
index acfe156532..fa6a5e0a9f 100644
--- a/drivers/event/skeleton/meson.build
+++ b/drivers/event/skeleton/meson.build
@@ -3,3 +3,4 @@
 
 sources = files('skeleton_eventdev.c')
 deps += ['bus_pci', 'bus_vdev']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/event/sw/meson.build b/drivers/event/sw/meson.build
index 6f81567efb..8d815dfa84 100644
--- a/drivers/event/sw/meson.build
+++ b/drivers/event/sw/meson.build
@@ -9,3 +9,4 @@ sources = files(
         'sw_evdev.c',
 )
 deps += ['hash', 'bus_vdev']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/mempool/bucket/meson.build b/drivers/mempool/bucket/meson.build
index 0051b6ac3c..94c060904b 100644
--- a/drivers/mempool/bucket/meson.build
+++ b/drivers/mempool/bucket/meson.build
@@ -12,3 +12,4 @@ if is_windows
 endif
 
 sources = files('rte_mempool_bucket.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/mempool/ring/meson.build b/drivers/mempool/ring/meson.build
index a021e908cf..65d203d4b7 100644
--- a/drivers/mempool/ring/meson.build
+++ b/drivers/mempool/ring/meson.build
@@ -2,3 +2,4 @@
 # Copyright(c) 2017 Intel Corporation
 
 sources = files('rte_mempool_ring.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/mempool/stack/meson.build b/drivers/mempool/stack/meson.build
index 580dde79eb..961e90fc04 100644
--- a/drivers/mempool/stack/meson.build
+++ b/drivers/mempool/stack/meson.build
@@ -4,3 +4,4 @@
 sources = files('rte_mempool_stack.c')
 
 deps += ['stack']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/af_packet/meson.build b/drivers/net/af_packet/meson.build
index c014e9b61b..bab008d083 100644
--- a/drivers/net/af_packet/meson.build
+++ b/drivers/net/af_packet/meson.build
@@ -6,3 +6,4 @@ if not is_linux
     reason = 'only supported on Linux'
 endif
 sources = files('rte_eth_af_packet.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build
index 1e0de23705..7bbab52d8b 100644
--- a/drivers/net/af_xdp/meson.build
+++ b/drivers/net/af_xdp/meson.build
@@ -55,3 +55,5 @@ else
     build = false
     reason = 'missing header, "linux/if_xdp.h"'
 endif
+
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/bonding/meson.build b/drivers/net/bonding/meson.build
index 18ad7e21f3..29022712cb 100644
--- a/drivers/net/bonding/meson.build
+++ b/drivers/net/bonding/meson.build
@@ -22,3 +22,4 @@ deps += 'sched' # needed for rte_bitmap.h
 deps += ['ip_frag']
 
 headers = files('rte_eth_bond.h', 'rte_eth_bond_8023ad.h')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/failsafe/meson.build b/drivers/net/failsafe/meson.build
index b8e5bf70f8..bf8f791984 100644
--- a/drivers/net/failsafe/meson.build
+++ b/drivers/net/failsafe/meson.build
@@ -27,3 +27,4 @@ sources = files(
         'failsafe_ops.c',
         'failsafe_rxtx.c',
 )
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
index 680bc8631c..28416a982f 100644
--- a/drivers/net/memif/meson.build
+++ b/drivers/net/memif/meson.build
@@ -12,3 +12,4 @@ sources = files(
 )
 
 deps += ['hash']
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/null/meson.build b/drivers/net/null/meson.build
index 0251578aab..4a483955a7 100644
--- a/drivers/net/null/meson.build
+++ b/drivers/net/null/meson.build
@@ -8,3 +8,4 @@ if is_windows
 endif
 
 sources = files('rte_eth_null.c')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/pcap/meson.build b/drivers/net/pcap/meson.build
index ed7864eb9d..a5a2971f0e 100644
--- a/drivers/net/pcap/meson.build
+++ b/drivers/net/pcap/meson.build
@@ -15,3 +15,4 @@ ext_deps += pcap_dep
 if is_windows
     ext_deps += cc.find_library('iphlpapi', required: true)
 endif
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/ring/meson.build b/drivers/net/ring/meson.build
index 0156b37aad..72792e26b0 100644
--- a/drivers/net/ring/meson.build
+++ b/drivers/net/ring/meson.build
@@ -9,3 +9,4 @@ endif
 
 sources = files('rte_eth_ring.c')
 headers = files('rte_eth_ring.h')
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/net/tap/meson.build b/drivers/net/tap/meson.build
index c09713a67b..4c9a9eac2b 100644
--- a/drivers/net/tap/meson.build
+++ b/drivers/net/tap/meson.build
@@ -35,3 +35,4 @@ foreach arg:args
     config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
 endforeach
 configure_file(output : 'tap_autoconf.h', configuration : config)
+pmd_supports_disable_iova_as_pa = true
diff --git a/drivers/raw/skeleton/meson.build b/drivers/raw/skeleton/meson.build
index 950a33cc20..bfb8fd8bcc 100644
--- a/drivers/raw/skeleton/meson.build
+++ b/drivers/raw/skeleton/meson.build
@@ -6,3 +6,4 @@ sources = files(
         'skeleton_rawdev.c',
         'skeleton_rawdev_test.c',
 )
+pmd_supports_disable_iova_as_pa = true
-- 
2.25.1


^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v5 1/7] mbuf: add API to get and set mbuf physical address
  2022-10-07 21:02                     ` [PATCH v5 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
@ 2022-10-07 21:20                       ` Stephen Hemminger
  0 siblings, 0 replies; 88+ messages in thread
From: Stephen Hemminger @ 2022-10-07 21:20 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, Honnappa.Nagarahalli, bruce.richardson, jerinj, mb,
	olivier.matz, thomas, Nicolas Chautru, Ciara Power,
	Konstantin Ananyev, Reshma Pattan, Cristian Dumitrescu,
	Maxime Coquelin, Chenbo Xia

On Sat, 8 Oct 2022 02:32:05 +0530
Shijith Thotton <sthotton@marvell.com> wrote:

>  
> +/**
> + * Get the IOVA address of the mbuf data buffer.
> + *
> + * @param m
> + *   The pointer to the mbuf.
> + * @return
> + *   The IOVA address of the mbuf.
> + */
> +static inline rte_iova_t
> +rte_mbuf_iova_get(const struct rte_mbuf *m)
> +{
> +	return m->buf_iova;
> +}
> +
> +/**
> + * Set the IOVA address of the mbuf data buffer
> + *
> + * @param m
> + *   The pointer to the mbuf.
> + * @param iova
> + *   Value to set as IOVA address of the mbuf.
> + */
> +static inline void
> +rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova)
> +{
> +	m->buf_iova = iova;
> +}

If this was macro, it could be used on either side of expression.


^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v5 5/7] lib: move mbuf next pointer to first cache line
  2022-10-07 21:02                     ` [PATCH v5 5/7] lib: move mbuf next pointer to first cache line Shijith Thotton
@ 2022-10-07 21:22                       ` Stephen Hemminger
  2022-10-07 21:30                         ` [EXT] " Shijith Thotton
  0 siblings, 1 reply; 88+ messages in thread
From: Stephen Hemminger @ 2022-10-07 21:22 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, Honnappa.Nagarahalli, bruce.richardson, jerinj, mb,
	olivier.matz, thomas

On Sat, 8 Oct 2022 02:32:09 +0530
Shijith Thotton <sthotton@marvell.com> wrote:

> Swapped position of mbuf next pointer and second dynamic field (dynfield2)
> if the build is configured to disable IOVA as PA. This is to move the
> mbuf next pointer to first cache line.
> 
> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
> Acked-by: Olivier Matz <olivier.matz@6wind.com>

Why not always move it?
Having things on different cache lines based on config options
could cause surprising performance impacts.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* RE: [EXT] Re: [PATCH v5 5/7] lib: move mbuf next pointer to first cache line
  2022-10-07 21:22                       ` Stephen Hemminger
@ 2022-10-07 21:30                         ` Shijith Thotton
  0 siblings, 0 replies; 88+ messages in thread
From: Shijith Thotton @ 2022-10-07 21:30 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Honnappa.Nagarahalli, bruce.richardson,
	Jerin Jacob Kollanukkaran, mb, olivier.matz, thomas

>> Swapped position of mbuf next pointer and second dynamic field (dynfield2)
>> if the build is configured to disable IOVA as PA. This is to move the
>> mbuf next pointer to first cache line.
>>
>> Signed-off-by: Shijith Thotton <sthotton@marvell.com>
>> Acked-by: Olivier Matz <olivier.matz@6wind.com>
>
>Why not always move it?
>Having things on different cache lines based on config options
>could cause surprising performance impacts.

Some drivers are using the offset of buf_iova.

^ permalink raw reply	[flat|nested] 88+ messages in thread

* Re: [PATCH v5 0/7] mbuf dynamic field expansion
  2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
                                       ` (6 preceding siblings ...)
  2022-10-07 21:02                     ` [PATCH v5 7/7] drivers: mark software " Shijith Thotton
@ 2022-10-09  9:34                     ` Thomas Monjalon
  7 siblings, 0 replies; 88+ messages in thread
From: Thomas Monjalon @ 2022-10-09  9:34 UTC (permalink / raw)
  To: Shijith Thotton
  Cc: dev, Honnappa.Nagarahalli, bruce.richardson, jerinj, mb,
	olivier.matz, stephen, ferruh.yigit, pbhagavatula,
	david.marchand

07/10/2022 23:02, Shijith Thotton:
> This is a continuation of the discussions[1] to add mbuf physical address field to dynamic field.
> Previous version was to add PA field to dynamic field area based on the EAL IOVA mode option. It was
> deemed unsafe as some components could still use the PA field without checking IOVA mode and there
> are drivers which need PA to work. One suggestion was to make the IOVA mode check at compile time so
> that drivers which need PA can be disabled during build. This series adds this new meson build
> options. Fourth patch adds mbuf PA field to dynamic field on such builds. Last two patches enable
> Marvell cnxk PMDs and software PMDs in IOVA as PA disabled build as they work without PA field.

Applied, thanks.



^ permalink raw reply	[flat|nested] 88+ messages in thread

end of thread, other threads:[~2022-10-09  9:34 UTC | newest]

Thread overview: 88+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-30 16:25 [PATCH] mbuf: add mbuf physical address field to dynamic field Shijith Thotton
2022-06-30 16:45 ` Stephen Hemminger
2022-07-01 12:16   ` Shijith Thotton
2022-07-01 12:24   ` Shijith Thotton
2022-07-03  7:31     ` Morten Brørup
2022-07-04 14:00       ` Bruce Richardson
2022-08-03 15:34         ` [EXT] " Shijith Thotton
2022-08-29 15:16           ` [PATCH v1 0/4] mbuf dynamic field expansion Shijith Thotton
2022-09-07 13:43             ` [PATCH v2 0/5] " Shijith Thotton
2022-09-21  9:43               ` David Marchand
2022-09-21 14:01                 ` [EXT] " Shijith Thotton
2022-09-21 13:56               ` [PATCH v3 " Shijith Thotton
2022-09-21 13:56                 ` [PATCH v3 1/5] build: add meson option to configure IOVA mode as VA Shijith Thotton
2022-09-28 12:52                   ` Olivier Matz
2022-09-29  5:48                     ` [EXT] " Shijith Thotton
2022-09-21 13:56                 ` [PATCH v3 2/5] mbuf: add second dynamic field member for VA only build Shijith Thotton
2022-09-28  7:24                   ` Thomas Monjalon
2022-09-28 12:52                     ` Olivier Matz
2022-09-28 19:33                       ` Thomas Monjalon
2022-09-28 19:48                       ` Stephen Hemminger
2022-09-29  6:13                         ` [EXT] " Shijith Thotton
2022-09-28 12:52                   ` Olivier Matz
2022-09-21 13:56                 ` [PATCH v3 3/5] lib: move mbuf next pointer to first cache line Shijith Thotton
2022-09-21 14:07                   ` Morten Brørup
2022-09-28 12:52                   ` Olivier Matz
2022-09-29  6:14                     ` [EXT] " Shijith Thotton
2022-09-21 13:56                 ` [PATCH v3 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
2022-09-28 12:53                   ` Olivier Matz
2022-09-29  6:19                     ` [EXT] " Shijith Thotton
2022-09-29  7:44                       ` Olivier Matz
2022-09-29  8:10                         ` Shijith Thotton
2022-10-07 20:17                   ` Olivier Matz
2022-10-07 20:22                     ` [EXT] " Shijith Thotton
2022-09-21 13:56                 ` [PATCH v3 5/5] drivers: mark software " Shijith Thotton
2022-09-28  5:41                 ` [PATCH v3 0/5] mbuf dynamic field expansion Shijith Thotton
2022-09-28 12:52                 ` Olivier Matz
2022-09-29  4:51                   ` [EXT] " Shijith Thotton
2022-10-07 13:50                 ` Thomas Monjalon
2022-10-07 19:35                   ` [EXT] " Shijith Thotton
2022-10-07 19:30                 ` [PATCH v4 0/7] " Shijith Thotton
2022-10-07 19:30                   ` [PATCH v4 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
2022-10-07 20:16                     ` Olivier Matz
2022-10-07 20:20                       ` [EXT] " Shijith Thotton
2022-10-07 19:30                   ` [PATCH v4 2/7] test/dma: use API to get mbuf data " Shijith Thotton
2022-10-07 20:17                     ` Olivier Matz
2022-10-07 19:30                   ` [PATCH v4 3/7] build: add meson option to configure IOVA mode as PA Shijith Thotton
2022-10-07 19:30                   ` [PATCH v4 4/7] mbuf: add second dynamic field member Shijith Thotton
2022-10-07 19:30                   ` [PATCH v4 5/7] lib: move mbuf next pointer to first cache line Shijith Thotton
2022-10-07 19:30                   ` [PATCH v4 6/7] drivers: mark cnxk PMDs work with IOVA as PA disabled Shijith Thotton
2022-10-07 19:30                   ` [PATCH v4 7/7] drivers: mark software " Shijith Thotton
2022-10-07 20:19                   ` [PATCH v4 0/7] mbuf dynamic field expansion Olivier Matz
2022-10-07 21:02                   ` [PATCH v5 " Shijith Thotton
2022-10-07 21:02                     ` [PATCH v5 1/7] mbuf: add API to get and set mbuf physical address Shijith Thotton
2022-10-07 21:20                       ` Stephen Hemminger
2022-10-07 21:02                     ` [PATCH v5 2/7] test/dma: use API to get mbuf data " Shijith Thotton
2022-10-07 21:02                     ` [PATCH v5 3/7] build: add meson option to configure IOVA mode as PA Shijith Thotton
2022-10-07 21:02                     ` [PATCH v5 4/7] mbuf: add second dynamic field member Shijith Thotton
2022-10-07 21:02                     ` [PATCH v5 5/7] lib: move mbuf next pointer to first cache line Shijith Thotton
2022-10-07 21:22                       ` Stephen Hemminger
2022-10-07 21:30                         ` [EXT] " Shijith Thotton
2022-10-07 21:02                     ` [PATCH v5 6/7] drivers: mark cnxk PMDs work with IOVA as PA disabled Shijith Thotton
2022-10-07 21:02                     ` [PATCH v5 7/7] drivers: mark software " Shijith Thotton
2022-10-09  9:34                     ` [PATCH v5 0/7] mbuf dynamic field expansion Thomas Monjalon
2022-09-07 13:43             ` [PATCH v2 1/5] build: add meson option to configure IOVA mode as VA Shijith Thotton
2022-09-07 15:31               ` Stephen Hemminger
2022-09-07 15:38                 ` Bruce Richardson
2022-09-07 21:33                   ` Morten Brørup
2022-09-07 13:43             ` [PATCH v2 2/5] mbuf: add second dynamic field member for VA only build Shijith Thotton
2022-09-07 13:43             ` [PATCH v2 3/5] lib: move mbuf next pointer to first cache line Shijith Thotton
2022-09-07 13:43             ` [PATCH v2 4/5] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
2022-09-07 13:43             ` [PATCH v2 5/5] drivers: mark software " Shijith Thotton
2022-08-29 15:16           ` [PATCH v1 1/4] build: add meson option to configure IOVA mode " Shijith Thotton
2022-08-29 18:18             ` Morten Brørup
2022-08-30  8:32               ` Bruce Richardson
2022-08-29 15:16           ` [PATCH v1 2/4] mbuf: add second dynamic field member for VA only build Shijith Thotton
2022-08-29 18:32             ` Morten Brørup
2022-08-30  8:35               ` Bruce Richardson
2022-08-30  8:41                 ` [EXT] " Pavan Nikhilesh Bhagavatula
2022-08-30 13:22                   ` Honnappa Nagarahalli
2022-09-07 13:55                     ` Shijith Thotton
2022-08-29 15:16           ` [PATCH v1 3/4] drivers: mark Marvell cnxk PMDs work with IOVA as VA Shijith Thotton
2022-08-29 15:16           ` [PATCH v1 4/4] drivers: mark software " Shijith Thotton
2022-08-30 13:07     ` [PATCH] mbuf: add mbuf physical address field to dynamic field Ferruh Yigit
2022-09-12 13:19       ` [EXT] " Shijith Thotton
2022-06-30 16:55 ` Bruce Richardson
2022-07-01  9:48   ` Olivier Matz
2022-07-01 11:53     ` Slava Ovsiienko
2022-07-01 12:01     ` [EXT] " Shijith Thotton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).