DPDK patches and discussions
 help / color / Atom feed
* [dpdk-dev] [RFC PATCH] kni: properly translate pa2va for cloned mbuf
@ 2019-09-10 18:58 Junxiao Shi
  2020-01-28 16:16 ` [dpdk-dev] [RFC PATCH v2] " Junxiao Shi
  0 siblings, 1 reply; 5+ messages in thread
From: Junxiao Shi @ 2019-09-10 18:58 UTC (permalink / raw)
  To: dev

Previously, KNI kernel module uses the difference between m->buf_addr
and m->buf_iova to calculate userspace virtual address from physical
address. This works for direct mbufs, but does not work for indirect
(cloned) mbufs that come from another mempool. Transmitting a cloned
mbuf may cause segmentation fault in userspace.

Now, userspace KNI library writes the virtual address of each mbuf
in m->userdata field, and KNI kernel module uses this field to restore
virtual address before putting mbuf into free_q. This approach works
for both direct and indirect mbufs.

NOTE TO REVIEWER - DO NOT MERGE
The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
I only modified kni_net_rx_normal function.
If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.

Bugzilla ID: 183

Signed-off-by: Junxiao Shi <git@mail1.yoursunny.com>
---
 kernel/linux/kni/kni_net.c                        | 4 ++--
 lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
 lib/librte_kni/rte_kni.c                          | 8 ++++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 7bd3a9f1e..b34ed2ed4 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -341,7 +341,7 @@ kni_net_rx_normal(struct kni_dev *kni)
 		kva = pa2kva(kni->pa[i]);
 		len = kva->pkt_len;
 		data_kva = kva2data_kva(kva);
-		kni->va[i] = pa2va(kni->pa[i], kva);
+		kni->va[i] = kva->va;
 
 		skb = netdev_alloc_skb(dev, len);
 		if (!skb) {
@@ -367,7 +367,7 @@ kni_net_rx_normal(struct kni_dev *kni)
 				kva = pa2kva(kva->next);
 				data_kva = kva2data_kva(kva);
 				/* Convert physical address to virtual address */
-				prev_kva->next = pa2va(prev_kva->next, kva);
+				prev_kva->next = kva->va;
 			}
 		}
 
diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index 37d9ee8f0..bc92c0067 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -84,7 +84,8 @@ struct rte_kni_mbuf {
 	uint16_t data_len;      /**< Amount of data in segment buffer. */
 
 	/* fields on second cache line */
-	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+	void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+	         /**< Virtual address of this mbuf in userspace (overwrites userdata). */
 	void *pool;
 	void *next;             /**< Physical address of next mbuf in kernel. */
 };
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 4b51fb4fe..96a6f6af2 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -352,13 +352,15 @@ va2pa(struct rte_mbuf *m)
 static void *
 va2pa_all(struct rte_mbuf *mbuf)
 {
-	void *phy_mbuf = va2pa(mbuf);
+	void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
 	struct rte_mbuf *next = mbuf->next;
 	while (next) {
-		mbuf->next = va2pa(next);
+		mbuf->userdata = mbuf;
+		mbuf->next = (void*)rte_mempool_virt2iova(next);
 		mbuf = next;
 		next = mbuf->next;
 	}
+	mbuf->userdata = mbuf;
 	return phy_mbuf;
 }
 
@@ -609,6 +611,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
 			 offsetof(struct rte_kni_mbuf, buf_addr));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
 			 offsetof(struct rte_kni_mbuf, next));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
+			 offsetof(struct rte_kni_mbuf, va));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
 			 offsetof(struct rte_kni_mbuf, data_off));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-- 
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [dpdk-dev] [RFC PATCH v2] kni: properly translate pa2va for cloned mbuf
  2019-09-10 18:58 [dpdk-dev] [RFC PATCH] kni: properly translate pa2va for cloned mbuf Junxiao Shi
@ 2020-01-28 16:16 ` " Junxiao Shi
  2020-02-04 16:17   ` Ferruh Yigit
  0 siblings, 1 reply; 5+ messages in thread
From: Junxiao Shi @ 2020-01-28 16:16 UTC (permalink / raw)
  To: dev; +Cc: sunnylandh

Previously, KNI kernel module uses the difference between m->buf_addr
and m->buf_iova to calculate userspace virtual address from physical
address. This works for direct mbufs, but does not work for indirect
(cloned) mbufs that come from another mempool. Transmitting a cloned
mbuf may cause segmentation fault in userspace.

Now, userspace KNI library writes the virtual address of each mbuf
in m->userdata field, and KNI kernel module uses this field to restore
virtual address before putting mbuf into free_q. This approach works
for both direct and indirect mbufs.

NOTE TO REVIEWER - DO NOT MERGE
The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
I only modified kni_net_rx_normal function.
If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.

Bugzilla ID: 183

Signed-off-by: Junxiao Shi <git@mail1.yoursunny.com>
---
 kernel/linux/kni/kni_net.c                        | 4 ++--
 lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
 lib/librte_kni/rte_kni.c                          | 8 ++++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 97fe85b..d783545 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -377,7 +377,7 @@ kni_net_rx_normal(struct kni_dev *kni)
 		kva = get_kva(kni, kni->pa[i]);
 		len = kva->pkt_len;
 		data_kva = get_data_kva(kni, kva);
-		kni->va[i] = pa2va(kni->pa[i], kva);
+		kni->va[i] = kva->va;
 
 		skb = netdev_alloc_skb(dev, len);
 		if (!skb) {
@@ -403,7 +403,7 @@ kni_net_rx_normal(struct kni_dev *kni)
 				kva = pa2kva(kva->next);
 				data_kva = kva2data_kva(kva);
 				/* Convert physical address to virtual address */
-				prev_kva->next = pa2va(prev_kva->next, kva);
+				prev_kva->next = kva->va;
 			}
 		}
 
diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index 7313ef5..c694a1d 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -86,7 +86,8 @@ struct rte_kni_mbuf {
 	uint16_t data_len;      /**< Amount of data in segment buffer. */
 
 	/* fields on second cache line */
-	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+	void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+	         /**< Virtual address of this mbuf in userspace (overwrites userdata). */
 	void *pool;
 	void *next;             /**< Physical address of next mbuf in kernel. */
 };
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index e388751..463485f 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -359,13 +359,15 @@ va2pa(struct rte_mbuf *m)
 static void *
 va2pa_all(struct rte_mbuf *mbuf)
 {
-	void *phy_mbuf = va2pa(mbuf);
+	void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
 	struct rte_mbuf *next = mbuf->next;
 	while (next) {
-		mbuf->next = va2pa(next);
+		mbuf->userdata = mbuf;
+		mbuf->next = (void*)rte_mempool_virt2iova(next);
 		mbuf = next;
 		next = mbuf->next;
 	}
+	mbuf->userdata = mbuf;
 	return phy_mbuf;
 }
 
@@ -652,6 +654,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
 			 offsetof(struct rte_kni_mbuf, buf_addr));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
 			 offsetof(struct rte_kni_mbuf, next));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
+			 offsetof(struct rte_kni_mbuf, va));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
 			 offsetof(struct rte_kni_mbuf, data_off));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-- 
2.7.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v2] kni: properly translate pa2va for cloned mbuf
  2020-01-28 16:16 ` [dpdk-dev] [RFC PATCH v2] " Junxiao Shi
@ 2020-02-04 16:17   ` Ferruh Yigit
  2020-02-04 16:49     ` yoursunny
  0 siblings, 1 reply; 5+ messages in thread
From: Ferruh Yigit @ 2020-02-04 16:17 UTC (permalink / raw)
  To: Junxiao Shi, dev
  Cc: sunnylandh, Olivier MATZ, Jerin Jacob, Igor Ryzhov, Vamsi Attunuru

On 1/28/2020 4:16 PM, Junxiao Shi wrote:
> Previously, KNI kernel module uses the difference between m->buf_addr
> and m->buf_iova to calculate userspace virtual address from physical
> address. This works for direct mbufs, but does not work for indirect
> (cloned) mbufs that come from another mempool. Transmitting a cloned
> mbuf may cause segmentation fault in userspace.

Yes, indirect buffers from another mempool won't work, I think first thing to do
is detect these mbufs and return error to prevent the crash.

> 
> Now, userspace KNI library writes the virtual address of each mbuf
> in m->userdata field, and KNI kernel module uses this field to restore
> virtual address before putting mbuf into free_q. This approach works
> for both direct and indirect mbufs.

Overwriting the mbuf metadata is not ideal and in some cases it may be
unacceptable so I think we can't make it default behavior.


But why not have this as option, need to find a way to pass this option to kni
library.

And kni library may add a flag to mbuf to say the 'userdata' is used to as 'va'
before sending packet to kernel, this can make kernel code unified which can use
'pa2va()' when there is no flag and 'kva->va' when there is.
Adding this flag to mbuf.h prevents the any possible conflict in the future if
@Oliver accepts this.

And in mbuf, not sure if there is flag for mbuf that marks the 'userdata' have
valid value or not. But detecting it in userspace and not overwriting the value
but returning error in that case can be better option.

What do you think?

> 
> NOTE TO REVIEWER - DO NOT MERGE
> The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
> Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
> I only modified kni_net_rx_normal function.
> If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
> kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.
> 
> Bugzilla ID: 183
> 
> Signed-off-by: Junxiao Shi <git@mail1.yoursunny.com>
> ---
>  kernel/linux/kni/kni_net.c                        | 4 ++--
>  lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
>  lib/librte_kni/rte_kni.c                          | 8 ++++++--
>  3 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> index 97fe85b..d783545 100644
> --- a/kernel/linux/kni/kni_net.c
> +++ b/kernel/linux/kni/kni_net.c
> @@ -377,7 +377,7 @@ kni_net_rx_normal(struct kni_dev *kni)
>  		kva = get_kva(kni, kni->pa[i]);
>  		len = kva->pkt_len;
>  		data_kva = get_data_kva(kni, kva);
> -		kni->va[i] = pa2va(kni->pa[i], kva);
> +		kni->va[i] = kva->va;
>  
>  		skb = netdev_alloc_skb(dev, len);
>  		if (!skb) {
> @@ -403,7 +403,7 @@ kni_net_rx_normal(struct kni_dev *kni)
>  				kva = pa2kva(kva->next);
>  				data_kva = kva2data_kva(kva);
>  				/* Convert physical address to virtual address */
> -				prev_kva->next = pa2va(prev_kva->next, kva);
> +				prev_kva->next = kva->va;
>  			}
>  		}
>  
> diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> index 7313ef5..c694a1d 100644
> --- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
> +++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> @@ -86,7 +86,8 @@ struct rte_kni_mbuf {
>  	uint16_t data_len;      /**< Amount of data in segment buffer. */
>  
>  	/* fields on second cache line */
> -	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
> +	void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
> +	         /**< Virtual address of this mbuf in userspace (overwrites userdata). */
>  	void *pool;
>  	void *next;             /**< Physical address of next mbuf in kernel. */
>  };
> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> index e388751..463485f 100644
> --- a/lib/librte_kni/rte_kni.c
> +++ b/lib/librte_kni/rte_kni.c
> @@ -359,13 +359,15 @@ va2pa(struct rte_mbuf *m)
>  static void *
>  va2pa_all(struct rte_mbuf *mbuf)
>  {
> -	void *phy_mbuf = va2pa(mbuf);
> +	void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
>  	struct rte_mbuf *next = mbuf->next;
>  	while (next) {
> -		mbuf->next = va2pa(next);
> +		mbuf->userdata = mbuf;
> +		mbuf->next = (void*)rte_mempool_virt2iova(next);
>  		mbuf = next;
>  		next = mbuf->next;
>  	}
> +	mbuf->userdata = mbuf;
>  	return phy_mbuf;
>  }
>  
> @@ -652,6 +654,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
>  			 offsetof(struct rte_kni_mbuf, buf_addr));
>  	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
>  			 offsetof(struct rte_kni_mbuf, next));
> +	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
> +			 offsetof(struct rte_kni_mbuf, va));
>  	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
>  			 offsetof(struct rte_kni_mbuf, data_off));
>  	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v2] kni: properly translate pa2va for cloned mbuf
  2020-02-04 16:17   ` Ferruh Yigit
@ 2020-02-04 16:49     ` yoursunny
  2020-02-04 17:58       ` Ferruh Yigit
  0 siblings, 1 reply; 5+ messages in thread
From: yoursunny @ 2020-02-04 16:49 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: Junxiao Shi, dev, Olivier MATZ, Jerin Jacob, Igor Ryzhov, Vamsi Attunuru

Hi Ferruh

Thanks for your review.

On Tue, Feb 4, 2020 at 11:17 AM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 1/28/2020 4:16 PM, Junxiao Shi wrote:
> > Previously, KNI kernel module uses the difference between m->buf_addr
> > and m->buf_iova to calculate userspace virtual address from physical
> > address. This works for direct mbufs, but does not work for indirect
> > (cloned) mbufs that come from another mempool. Transmitting a cloned
> > mbuf may cause segmentation fault in userspace.
>
> Yes, indirect buffers from another mempool won't work, I think first thing to do
> is detect these mbufs and return error to prevent the crash.
>
> >
> > Now, userspace KNI library writes the virtual address of each mbuf
> > in m->userdata field, and KNI kernel module uses this field to restore
> > virtual address before putting mbuf into free_q. This approach works
> > for both direct and indirect mbufs.
>
> Overwriting the mbuf metadata is not ideal and in some cases it may be
> unacceptable so I think we can't make it default behavior.
>

I believe that after an mbuf has been passed to ethdev for
transmission, it is owned by ethdev, and the user application should
not use it anymore.
This is because, the mbuf could be free'd by ethdev after
transmission, so that it is unsafe for the user application to
dereference its pointer.
Notice that in case the user application has created indirect mbufs
pointing to the same buffer, only the mbuf passed to ethdev would have
its m->userdata field overwritten; other mbufs still held by the
application are unaffected.
This is why I decided to overwrite the m->userdata field.

Can you explain, in what case overwriting mbuf metadata would be unacceptable?

>
> But why not have this as option, need to find a way to pass this option to kni
> library.
>
> And kni library may add a flag to mbuf to say the 'userdata' is used to as 'va'
> before sending packet to kernel, this can make kernel code unified which can use
> 'pa2va()' when there is no flag and 'kva->va' when there is.
> Adding this flag to mbuf.h prevents the any possible conflict in the future if
> @Oliver accepts this.
>
> And in mbuf, not sure if there is flag for mbuf that marks the 'userdata' have
> valid value or not. But detecting it in userspace and not overwriting the value
> but returning error in that case can be better option.
>
> What do you think?
>
> >
> > NOTE TO REVIEWER - DO NOT MERGE
> > The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
> > Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
> > I only modified kni_net_rx_normal function.
> > If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
> > kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.
> >
> > Bugzilla ID: 183
> >
> > Signed-off-by: Junxiao Shi <git@mail1.yoursunny.com>
> > ---
> >  kernel/linux/kni/kni_net.c                        | 4 ++--
> >  lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
> >  lib/librte_kni/rte_kni.c                          | 8 ++++++--
> >  3 files changed, 10 insertions(+), 5 deletions(-)
> >
> > diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> > index 97fe85b..d783545 100644
> > --- a/kernel/linux/kni/kni_net.c
> > +++ b/kernel/linux/kni/kni_net.c
> > @@ -377,7 +377,7 @@ kni_net_rx_normal(struct kni_dev *kni)
> >               kva = get_kva(kni, kni->pa[i]);
> >               len = kva->pkt_len;
> >               data_kva = get_data_kva(kni, kva);
> > -             kni->va[i] = pa2va(kni->pa[i], kva);
> > +             kni->va[i] = kva->va;
> >
> >               skb = netdev_alloc_skb(dev, len);
> >               if (!skb) {
> > @@ -403,7 +403,7 @@ kni_net_rx_normal(struct kni_dev *kni)
> >                               kva = pa2kva(kva->next);
> >                               data_kva = kva2data_kva(kva);
> >                               /* Convert physical address to virtual address */
> > -                             prev_kva->next = pa2va(prev_kva->next, kva);
> > +                             prev_kva->next = kva->va;
> >                       }
> >               }
> >
> > diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > index 7313ef5..c694a1d 100644
> > --- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > +++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > @@ -86,7 +86,8 @@ struct rte_kni_mbuf {
> >       uint16_t data_len;      /**< Amount of data in segment buffer. */
> >
> >       /* fields on second cache line */
> > -     char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
> > +     void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
> > +              /**< Virtual address of this mbuf in userspace (overwrites userdata). */
> >       void *pool;
> >       void *next;             /**< Physical address of next mbuf in kernel. */
> >  };
> > diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> > index e388751..463485f 100644
> > --- a/lib/librte_kni/rte_kni.c
> > +++ b/lib/librte_kni/rte_kni.c
> > @@ -359,13 +359,15 @@ va2pa(struct rte_mbuf *m)
> >  static void *
> >  va2pa_all(struct rte_mbuf *mbuf)
> >  {
> > -     void *phy_mbuf = va2pa(mbuf);
> > +     void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
> >       struct rte_mbuf *next = mbuf->next;
> >       while (next) {
> > -             mbuf->next = va2pa(next);
> > +             mbuf->userdata = mbuf;
> > +             mbuf->next = (void*)rte_mempool_virt2iova(next);
> >               mbuf = next;
> >               next = mbuf->next;
> >       }
> > +     mbuf->userdata = mbuf;
> >       return phy_mbuf;
> >  }
> >
> > @@ -652,6 +654,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
> >                        offsetof(struct rte_kni_mbuf, buf_addr));
> >       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
> >                        offsetof(struct rte_kni_mbuf, next));
> > +     RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
> > +                      offsetof(struct rte_kni_mbuf, va));
> >       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
> >                        offsetof(struct rte_kni_mbuf, data_off));
> >       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
> >
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v2] kni: properly translate pa2va for cloned mbuf
  2020-02-04 16:49     ` yoursunny
@ 2020-02-04 17:58       ` Ferruh Yigit
  0 siblings, 0 replies; 5+ messages in thread
From: Ferruh Yigit @ 2020-02-04 17:58 UTC (permalink / raw)
  To: yoursunny
  Cc: Junxiao Shi, dev, Olivier MATZ, Jerin Jacob, Igor Ryzhov, Vamsi Attunuru

On 2/4/2020 4:49 PM, yoursunny wrote:
> Hi Ferruh
> 
> Thanks for your review.
> 
> On Tue, Feb 4, 2020 at 11:17 AM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>
>> On 1/28/2020 4:16 PM, Junxiao Shi wrote:
>>> Previously, KNI kernel module uses the difference between m->buf_addr
>>> and m->buf_iova to calculate userspace virtual address from physical
>>> address. This works for direct mbufs, but does not work for indirect
>>> (cloned) mbufs that come from another mempool. Transmitting a cloned
>>> mbuf may cause segmentation fault in userspace.
>>
>> Yes, indirect buffers from another mempool won't work, I think first thing to do
>> is detect these mbufs and return error to prevent the crash.
>>
>>>
>>> Now, userspace KNI library writes the virtual address of each mbuf
>>> in m->userdata field, and KNI kernel module uses this field to restore
>>> virtual address before putting mbuf into free_q. This approach works
>>> for both direct and indirect mbufs.
>>
>> Overwriting the mbuf metadata is not ideal and in some cases it may be
>> unacceptable so I think we can't make it default behavior.
>>
> 
> I believe that after an mbuf has been passed to ethdev for
> transmission, it is owned by ethdev, and the user application should
> not use it anymore.
> This is because, the mbuf could be free'd by ethdev after
> transmission, so that it is unsafe for the user application to
> dereference its pointer.
> Notice that in case the user application has created indirect mbufs
> pointing to the same buffer, only the mbuf passed to ethdev would have
> its m->userdata field overwritten; other mbufs still held by the
> application are unaffected.
> This is why I decided to overwrite the m->userdata field.
> 
> Can you explain, in what case overwriting mbuf metadata would be unacceptable?

If the mbuf sent to kernel space returns back to DPDK application and if next
hope was expecting that metadata field.

'kni_net_rx_lo_fifo()' can cause that but that is mainly for debug, not sure if
that counts really.

I am not quite sure if any packet sent to kernel can return back to circulation,
but trying to be more defensive here, and protect the mbuf metada as much as
possible and not overwrite it via KNI data. That is why suggesting to enable it
via an option instead of making it default.

And 'userdata' seems not really used much as of now, but what if it has been
changed to dynamic field, which was mentioned in the past, and that field end up
being something required by kernel side?
Long shot? Probably. In that case we can make that option default, but I think
better than risking breaking some possible usage.

> 
>>
>> But why not have this as option, need to find a way to pass this option to kni
>> library.
>>
>> And kni library may add a flag to mbuf to say the 'userdata' is used to as 'va'
>> before sending packet to kernel, this can make kernel code unified which can use
>> 'pa2va()' when there is no flag and 'kva->va' when there is.
>> Adding this flag to mbuf.h prevents the any possible conflict in the future if
>> @Oliver accepts this.
>>
>> And in mbuf, not sure if there is flag for mbuf that marks the 'userdata' have
>> valid value or not. But detecting it in userspace and not overwriting the value
>> but returning error in that case can be better option.
>>
>> What do you think?
>>
>>>
>>> NOTE TO REVIEWER - DO NOT MERGE
>>> The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
>>> Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
>>> I only modified kni_net_rx_normal function.
>>> If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
>>> kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.
>>>
>>> Bugzilla ID: 183
>>>
>>> Signed-off-by: Junxiao Shi <git@mail1.yoursunny.com>
>>> ---
>>>  kernel/linux/kni/kni_net.c                        | 4 ++--
>>>  lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
>>>  lib/librte_kni/rte_kni.c                          | 8 ++++++--
>>>  3 files changed, 10 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
>>> index 97fe85b..d783545 100644
>>> --- a/kernel/linux/kni/kni_net.c
>>> +++ b/kernel/linux/kni/kni_net.c
>>> @@ -377,7 +377,7 @@ kni_net_rx_normal(struct kni_dev *kni)
>>>               kva = get_kva(kni, kni->pa[i]);
>>>               len = kva->pkt_len;
>>>               data_kva = get_data_kva(kni, kva);
>>> -             kni->va[i] = pa2va(kni->pa[i], kva);
>>> +             kni->va[i] = kva->va;
>>>
>>>               skb = netdev_alloc_skb(dev, len);
>>>               if (!skb) {
>>> @@ -403,7 +403,7 @@ kni_net_rx_normal(struct kni_dev *kni)
>>>                               kva = pa2kva(kva->next);
>>>                               data_kva = kva2data_kva(kva);
>>>                               /* Convert physical address to virtual address */
>>> -                             prev_kva->next = pa2va(prev_kva->next, kva);
>>> +                             prev_kva->next = kva->va;
>>>                       }
>>>               }
>>>
>>> diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
>>> index 7313ef5..c694a1d 100644
>>> --- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
>>> +++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
>>> @@ -86,7 +86,8 @@ struct rte_kni_mbuf {
>>>       uint16_t data_len;      /**< Amount of data in segment buffer. */
>>>
>>>       /* fields on second cache line */
>>> -     char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
>>> +     void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
>>> +              /**< Virtual address of this mbuf in userspace (overwrites userdata). */
>>>       void *pool;
>>>       void *next;             /**< Physical address of next mbuf in kernel. */
>>>  };
>>> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
>>> index e388751..463485f 100644
>>> --- a/lib/librte_kni/rte_kni.c
>>> +++ b/lib/librte_kni/rte_kni.c
>>> @@ -359,13 +359,15 @@ va2pa(struct rte_mbuf *m)
>>>  static void *
>>>  va2pa_all(struct rte_mbuf *mbuf)
>>>  {
>>> -     void *phy_mbuf = va2pa(mbuf);
>>> +     void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
>>>       struct rte_mbuf *next = mbuf->next;
>>>       while (next) {
>>> -             mbuf->next = va2pa(next);
>>> +             mbuf->userdata = mbuf;
>>> +             mbuf->next = (void*)rte_mempool_virt2iova(next);
>>>               mbuf = next;
>>>               next = mbuf->next;
>>>       }
>>> +     mbuf->userdata = mbuf;
>>>       return phy_mbuf;
>>>  }
>>>
>>> @@ -652,6 +654,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
>>>                        offsetof(struct rte_kni_mbuf, buf_addr));
>>>       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
>>>                        offsetof(struct rte_kni_mbuf, next));
>>> +     RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
>>> +                      offsetof(struct rte_kni_mbuf, va));
>>>       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
>>>                        offsetof(struct rte_kni_mbuf, data_off));
>>>       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
>>>
>>


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, back to index

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-10 18:58 [dpdk-dev] [RFC PATCH] kni: properly translate pa2va for cloned mbuf Junxiao Shi
2020-01-28 16:16 ` [dpdk-dev] [RFC PATCH v2] " Junxiao Shi
2020-02-04 16:17   ` Ferruh Yigit
2020-02-04 16:49     ` yoursunny
2020-02-04 17:58       ` Ferruh Yigit

DPDK patches and discussions

Archives are clonable:
	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev


Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox