DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] kni: fix possible kernel crash with va2pa
@ 2019-02-28  7:30 Yangchao Zhou
  2019-03-06 17:31 ` Ferruh Yigit
  2019-03-12  9:22 ` [dpdk-dev] [PATCH v2] " Yangchao Zhou
  0 siblings, 2 replies; 19+ messages in thread
From: Yangchao Zhou @ 2019-02-28  7:30 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

va2pa depends on the physical address and virtual address offset of
current mbuf. It may get the wrong physical address of next mbuf which
allocated in another hugepage segment.

Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
---
 kernel/linux/kni/kni_net.c                       | 16 ++--------------
 .../eal/include/exec-env/rte_kni_common.h        |  4 ++++
 lib/librte_kni/rte_kni.c                         | 15 ++++++++++++++-
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 7371b6d58..caef8754f 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -61,18 +61,6 @@ kva2data_kva(struct rte_kni_mbuf *m)
 	return phys_to_virt(m->buf_physaddr + m->data_off);
 }
 
-/* virtual address to physical address */
-static void *
-va2pa(void *va, struct rte_kni_mbuf *m)
-{
-	void *pa;
-
-	pa = (void *)((unsigned long)va -
-			((unsigned long)m->buf_addr -
-			 (unsigned long)m->buf_physaddr));
-	return pa;
-}
-
 /*
  * It can be called to process the request.
  */
@@ -363,7 +351,7 @@ kni_net_rx_normal(struct kni_dev *kni)
 				if (!kva->next)
 					break;
 
-				kva = pa2kva(va2pa(kva->next, kva));
+				kva = pa2kva(kva->next_pa);
 				data_kva = kva2data_kva(kva);
 			}
 		}
@@ -545,7 +533,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 				if (!kva->next)
 					break;
 
-				kva = pa2kva(va2pa(kva->next, kva));
+				kva = pa2kva(kva->next_pa);
 				data_kva = kva2data_kva(kva);
 			}
 		}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 5afa08713..608f5c13f 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -87,6 +87,10 @@ struct rte_kni_mbuf {
 	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
 	void *pool;
 	void *next;
+	union {
+		uint64_t tx_offload;
+		void *next_pa;      /**< Physical address of next mbuf. */
+	};
 };
 
 /*
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 73aeccccf..1aaebcfa1 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -353,6 +353,17 @@ va2pa(struct rte_mbuf *m)
 			 (unsigned long)m->buf_iova));
 }
 
+static void *
+va2pa_all(struct rte_mbuf *m)
+{
+	struct rte_kni_mbuf *mbuf = (struct rte_kni_mbuf *)m;
+	while (mbuf->next) {
+		mbuf->next_pa = va2pa(mbuf->next);
+		mbuf = mbuf->next;
+	}
+	return va2pa(m);
+}
+
 static void
 obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
 		unsigned obj_idx __rte_unused)
@@ -550,7 +561,7 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
 	unsigned int i;
 
 	for (i = 0; i < num; i++)
-		phy_mbufs[i] = va2pa(mbufs[i]);
+		phy_mbufs[i] = va2pa_all(mbufs[i]);
 
 	ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
 
@@ -607,6 +618,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
 			 offsetof(struct rte_kni_mbuf, pkt_len));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
 			 offsetof(struct rte_kni_mbuf, ol_flags));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, tx_offload) !=
+			 offsetof(struct rte_kni_mbuf, tx_offload));
 
 	/* Check if pktmbuf pool has been configured */
 	if (kni->pktmbuf_pool == NULL) {
-- 
2.19.1.windows.1

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH] kni: fix possible kernel crash with va2pa
  2019-02-28  7:30 [dpdk-dev] [PATCH] kni: fix possible kernel crash with va2pa Yangchao Zhou
@ 2019-03-06 17:31 ` Ferruh Yigit
  2019-06-14 18:41   ` Dey, Souvik
  2019-03-12  9:22 ` [dpdk-dev] [PATCH v2] " Yangchao Zhou
  1 sibling, 1 reply; 19+ messages in thread
From: Ferruh Yigit @ 2019-03-06 17:31 UTC (permalink / raw)
  To: Yangchao Zhou, dev

On 2/28/2019 7:30 AM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.

Hi Yangchao,

The problem you described seems valid, when current mbuf and the mbuf pointed bu
next pointer from different (huge)pages, address calculation will be wrong.

Can you able to reproduce the issue, or recognized the problem theoretically?

> 
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
> ---
>  kernel/linux/kni/kni_net.c                       | 16 ++--------------
>  .../eal/include/exec-env/rte_kni_common.h        |  4 ++++
>  lib/librte_kni/rte_kni.c                         | 15 ++++++++++++++-
>  3 files changed, 20 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> index 7371b6d58..caef8754f 100644
> --- a/kernel/linux/kni/kni_net.c
> +++ b/kernel/linux/kni/kni_net.c
> @@ -61,18 +61,6 @@ kva2data_kva(struct rte_kni_mbuf *m)
>  	return phys_to_virt(m->buf_physaddr + m->data_off);
>  }
>  
> -/* virtual address to physical address */
> -static void *
> -va2pa(void *va, struct rte_kni_mbuf *m)
> -{
> -	void *pa;
> -
> -	pa = (void *)((unsigned long)va -
> -			((unsigned long)m->buf_addr -
> -			 (unsigned long)m->buf_physaddr));
> -	return pa;
> -}
> -
>  /*
>   * It can be called to process the request.
>   */
> @@ -363,7 +351,7 @@ kni_net_rx_normal(struct kni_dev *kni)
>  				if (!kva->next)
>  					break;
>  
> -				kva = pa2kva(va2pa(kva->next, kva));
> +				kva = pa2kva(kva->next_pa);
>  				data_kva = kva2data_kva(kva);
>  			}
>  		}
> @@ -545,7 +533,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
>  				if (!kva->next)
>  					break;
>  
> -				kva = pa2kva(va2pa(kva->next, kva));
> +				kva = pa2kva(kva->next_pa);
>  				data_kva = kva2data_kva(kva);
>  			}
>  		}
> diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> index 5afa08713..608f5c13f 100644
> --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> @@ -87,6 +87,10 @@ struct rte_kni_mbuf {
>  	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
>  	void *pool;
>  	void *next;
> +	union {
> +		uint64_t tx_offload;
> +		void *next_pa;      /**< Physical address of next mbuf. */
> +	};

This will cause overwrite the 'tx_offload' via 'next_pa', we don't use
tx_offload in KNI but not sure about removing potential use for future.

What do you think about converting 'm->next' to physical address before putting
them into 'rx_q', and in kernel side after data copied to skb convert 'm->next'
back to virtual address before putting it into 'free_q' ?
I think both address conversion can be possible to do, a little tricky because
address conversion should be calculated in next mbuf and previous mbuf->next in
the chain should be updated.

>  };
>  
>  /*
> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> index 73aeccccf..1aaebcfa1 100644
> --- a/lib/librte_kni/rte_kni.c
> +++ b/lib/librte_kni/rte_kni.c
> @@ -353,6 +353,17 @@ va2pa(struct rte_mbuf *m)
>  			 (unsigned long)m->buf_iova));
>  }
>  
> +static void *
> +va2pa_all(struct rte_mbuf *m)
> +{
> +	struct rte_kni_mbuf *mbuf = (struct rte_kni_mbuf *)m;
> +	while (mbuf->next) {
> +		mbuf->next_pa = va2pa(mbuf->next);
> +		mbuf = mbuf->next;
> +	}
> +	return va2pa(m);
> +}
> +
>  static void
>  obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
>  		unsigned obj_idx __rte_unused)
> @@ -550,7 +561,7 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
>  	unsigned int i;
>  
>  	for (i = 0; i < num; i++)
> -		phy_mbufs[i] = va2pa(mbufs[i]);
> +		phy_mbufs[i] = va2pa_all(mbufs[i]);
>  
>  	ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
>  
> @@ -607,6 +618,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
>  			 offsetof(struct rte_kni_mbuf, pkt_len));
>  	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
>  			 offsetof(struct rte_kni_mbuf, ol_flags));
> +	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, tx_offload) !=
> +			 offsetof(struct rte_kni_mbuf, tx_offload));
>  
>  	/* Check if pktmbuf pool has been configured */
>  	if (kni->pktmbuf_pool == NULL) {
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [dpdk-dev] [PATCH v2] kni: fix possible kernel crash with va2pa
  2019-02-28  7:30 [dpdk-dev] [PATCH] kni: fix possible kernel crash with va2pa Yangchao Zhou
  2019-03-06 17:31 ` Ferruh Yigit
@ 2019-03-12  9:22 ` Yangchao Zhou
  2019-03-19 18:35   ` Ferruh Yigit
                     ` (3 more replies)
  1 sibling, 4 replies; 19+ messages in thread
From: Yangchao Zhou @ 2019-03-12  9:22 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

va2pa depends on the physical address and virtual address offset of
current mbuf. It may get the wrong physical address of next mbuf which
allocated in another hugepage segment.

In rte_mempool_populate_default(), trying to allocate whole block of
contiguous memory could be failed. Then, it would reserve memory in
several memzones that have different physical address and virtual address
offsets. The rte_mempool_populate_default() is used by
rte_pktmbuf_pool_create().

Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
---
v2: Add an explanation that causes this problem.
    Use m->next to store physical address.
---
 kernel/linux/kni/kni_net.c                    | 42 +++++++++++--------
 .../eal/include/exec-env/rte_kni_common.h     |  2 +-
 lib/librte_kni/rte_kni.c                      | 15 ++++++-
 3 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 7371b6d58..106b5153f 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -61,18 +61,6 @@ kva2data_kva(struct rte_kni_mbuf *m)
 	return phys_to_virt(m->buf_physaddr + m->data_off);
 }
 
-/* virtual address to physical address */
-static void *
-va2pa(void *va, struct rte_kni_mbuf *m)
-{
-	void *pa;
-
-	pa = (void *)((unsigned long)va -
-			((unsigned long)m->buf_addr -
-			 (unsigned long)m->buf_physaddr));
-	return pa;
-}
-
 /*
  * It can be called to process the request.
  */
@@ -173,7 +161,10 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
 	struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
 {
 	uint32_t ret, i, num_dst, num_rx;
-	void *kva;
+	struct rte_kni_mbuf *kva, *_kva;
+	int nb_segs;
+	int kva_nb_segs;
+
 	do {
 		num_dst = kni_fifo_free_count(dst_va);
 		if (num_dst == 0)
@@ -188,6 +179,17 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
 		for (i = 0; i < num_rx; i++) {
 			kva = pa2kva(kni->pa[i]);
 			kni->va[i] = pa2va(kni->pa[i], kva);
+
+			kva_nb_segs = kva->nb_segs;
+			for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+				if (!kva->next)
+					break;
+
+				_kva = kva;
+				kva = pa2kva(kva->next);
+				/* Convert physical address to virtual address */
+				_kva->next = pa2va(_kva->next, kva);
+			}
 		}
 
 		ret = kni_fifo_put(dst_va, kni->va, num_rx);
@@ -313,7 +315,7 @@ kni_net_rx_normal(struct kni_dev *kni)
 	uint32_t ret;
 	uint32_t len;
 	uint32_t i, num_rx, num_fq;
-	struct rte_kni_mbuf *kva;
+	struct rte_kni_mbuf *kva, *_kva;
 	void *data_kva;
 	struct sk_buff *skb;
 	struct net_device *dev = kni->net_dev;
@@ -363,8 +365,11 @@ kni_net_rx_normal(struct kni_dev *kni)
 				if (!kva->next)
 					break;
 
-				kva = pa2kva(va2pa(kva->next, kva));
+				_kva = kva;
+				kva = pa2kva(kva->next);
 				data_kva = kva2data_kva(kva);
+				/* Convert physical address to virtual address */
+				_kva->next = pa2va(_kva->next, kva);
 			}
 		}
 
@@ -481,7 +486,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 	uint32_t ret;
 	uint32_t len;
 	uint32_t i, num_rq, num_fq, num;
-	struct rte_kni_mbuf *kva;
+	struct rte_kni_mbuf *kva, *_kva;
 	void *data_kva;
 	struct sk_buff *skb;
 	struct net_device *dev = kni->net_dev;
@@ -545,8 +550,11 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 				if (!kva->next)
 					break;
 
-				kva = pa2kva(va2pa(kva->next, kva));
+				_kva = kva;
+				kva = pa2kva(kva->next);
 				data_kva = kva2data_kva(kva);
+				/* Convert physical address to virtual address */
+				_kva->next = pa2va(_kva->next, kva);
 			}
 		}
 
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 5afa08713..688db9758 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -86,7 +86,7 @@ struct rte_kni_mbuf {
 	/* fields on second cache line */
 	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
 	void *pool;
-	void *next;
+	void *next;             /**< Physical address of next mbuf in kernel. */
 };
 
 /*
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 73aeccccf..74b1ff5b6 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -353,6 +353,19 @@ va2pa(struct rte_mbuf *m)
 			 (unsigned long)m->buf_iova));
 }
 
+static void *
+va2pa_all(struct rte_mbuf *mbuf)
+{
+	void *phy_mbuf = va2pa(mbuf);
+	struct rte_mbuf *next = mbuf->next;
+	while (next) {
+		mbuf->next = va2pa(next);
+		mbuf = next;
+		next = mbuf->next;
+	}
+	return phy_mbuf;
+}
+
 static void
 obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
 		unsigned obj_idx __rte_unused)
@@ -550,7 +563,7 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
 	unsigned int i;
 
 	for (i = 0; i < num; i++)
-		phy_mbufs[i] = va2pa(mbufs[i]);
+		phy_mbufs[i] = va2pa_all(mbufs[i]);
 
 	ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
 
-- 
2.19.1.windows.1

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH v2] kni: fix possible kernel crash with va2pa
  2019-03-12  9:22 ` [dpdk-dev] [PATCH v2] " Yangchao Zhou
@ 2019-03-19 18:35   ` Ferruh Yigit
  2019-03-19 18:35     ` Ferruh Yigit
  2019-03-22 20:49   ` Ferruh Yigit
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: Ferruh Yigit @ 2019-03-19 18:35 UTC (permalink / raw)
  To: Yangchao Zhou, dev

On 3/12/2019 9:22 AM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
> 
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
> 
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
> ---
> v2: Add an explanation that causes this problem.
>     Use m->next to store physical address.

Overall code looks good to me, thanks for the work, it looks pretty clean.
I would like to do some test before giving an ack.

Meanwhile, can you please update kni documentation, to document for the mbufs
sent to kernel has mbuf->next fields as physical address. It is OK to send as a
new version of the patch with documentation.

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH v2] kni: fix possible kernel crash with va2pa
  2019-03-19 18:35   ` Ferruh Yigit
@ 2019-03-19 18:35     ` Ferruh Yigit
  0 siblings, 0 replies; 19+ messages in thread
From: Ferruh Yigit @ 2019-03-19 18:35 UTC (permalink / raw)
  To: Yangchao Zhou, dev

On 3/12/2019 9:22 AM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
> 
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
> 
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
> ---
> v2: Add an explanation that causes this problem.
>     Use m->next to store physical address.

Overall code looks good to me, thanks for the work, it looks pretty clean.
I would like to do some test before giving an ack.

Meanwhile, can you please update kni documentation, to document for the mbufs
sent to kernel has mbuf->next fields as physical address. It is OK to send as a
new version of the patch with documentation.

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH v2] kni: fix possible kernel crash with va2pa
  2019-03-12  9:22 ` [dpdk-dev] [PATCH v2] " Yangchao Zhou
  2019-03-19 18:35   ` Ferruh Yigit
@ 2019-03-22 20:49   ` Ferruh Yigit
  2019-03-22 20:49     ` Ferruh Yigit
  2019-06-18  4:06     ` Dey, Souvik
  2019-06-18 15:48   ` Stephen Hemminger
  2019-06-25 15:04   ` [dpdk-dev] [PATCH v3] " Yangchao Zhou
  3 siblings, 2 replies; 19+ messages in thread
From: Ferruh Yigit @ 2019-03-22 20:49 UTC (permalink / raw)
  To: Yangchao Zhou, dev

On 3/12/2019 9:22 AM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
> 
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
> 
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
> ---
> v2: Add an explanation that causes this problem.
>     Use m->next to store physical address.

<...>

> @@ -481,7 +486,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
>  	uint32_t ret;
>  	uint32_t len;
>  	uint32_t i, num_rq, num_fq, num;
> -	struct rte_kni_mbuf *kva;
> +	struct rte_kni_mbuf *kva, *_kva;
>  	void *data_kva;
>  	struct sk_buff *skb;
>  	struct net_device *dev = kni->net_dev;
> @@ -545,8 +550,11 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
>  				if (!kva->next)
>  					break;
>  
> -				kva = pa2kva(va2pa(kva->next, kva));
> +				_kva = kva;
> +				kva = pa2kva(kva->next);
>  				data_kva = kva2data_kva(kva);
> +				/* Convert physical address to virtual address */
> +				_kva->next = pa2va(_kva->next, kva);
>  			}
>  		}
>  

Also need to update 'kni_net_rx_lo_fifo()', at worst to update 'next' fields
because it fills 'kni->free_q', without conversion userspace will crash.

<...>

> @@ -550,7 +563,7 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
>  	unsigned int i;
>  
>  	for (i = 0; i < num; i++)
> -		phy_mbufs[i] = va2pa(mbufs[i]);
> +		phy_mbufs[i] = va2pa_all(mbufs[i]);
>  
>  	ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);

There is a problem here.

When fifo 'kni->rx_q' is full, 'rte_kni_tx_burst' will send less mbuf than
requested, than the application needs to handle the remaining packages, most
probably will free them, but now some packages has physical address in their
'next' field, which will cause app to crash.

I don't know really how to solve this.
Perhaps getting free count from 'kni->rx_q' and only convert that much
(va2pa_all) to physical address can work, but I can't estimate performance
effect of it.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH v2] kni: fix possible kernel crash with va2pa
  2019-03-22 20:49   ` Ferruh Yigit
@ 2019-03-22 20:49     ` Ferruh Yigit
  2019-06-18  4:06     ` Dey, Souvik
  1 sibling, 0 replies; 19+ messages in thread
From: Ferruh Yigit @ 2019-03-22 20:49 UTC (permalink / raw)
  To: Yangchao Zhou, dev

On 3/12/2019 9:22 AM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
> 
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
> 
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
> ---
> v2: Add an explanation that causes this problem.
>     Use m->next to store physical address.

<...>

> @@ -481,7 +486,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
>  	uint32_t ret;
>  	uint32_t len;
>  	uint32_t i, num_rq, num_fq, num;
> -	struct rte_kni_mbuf *kva;
> +	struct rte_kni_mbuf *kva, *_kva;
>  	void *data_kva;
>  	struct sk_buff *skb;
>  	struct net_device *dev = kni->net_dev;
> @@ -545,8 +550,11 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
>  				if (!kva->next)
>  					break;
>  
> -				kva = pa2kva(va2pa(kva->next, kva));
> +				_kva = kva;
> +				kva = pa2kva(kva->next);
>  				data_kva = kva2data_kva(kva);
> +				/* Convert physical address to virtual address */
> +				_kva->next = pa2va(_kva->next, kva);
>  			}
>  		}
>  

Also need to update 'kni_net_rx_lo_fifo()', at worst to update 'next' fields
because it fills 'kni->free_q', without conversion userspace will crash.

<...>

> @@ -550,7 +563,7 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
>  	unsigned int i;
>  
>  	for (i = 0; i < num; i++)
> -		phy_mbufs[i] = va2pa(mbufs[i]);
> +		phy_mbufs[i] = va2pa_all(mbufs[i]);
>  
>  	ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);

There is a problem here.

When fifo 'kni->rx_q' is full, 'rte_kni_tx_burst' will send less mbuf than
requested, than the application needs to handle the remaining packages, most
probably will free them, but now some packages has physical address in their
'next' field, which will cause app to crash.

I don't know really how to solve this.
Perhaps getting free count from 'kni->rx_q' and only convert that much
(va2pa_all) to physical address can work, but I can't estimate performance
effect of it.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH] kni: fix possible kernel crash with va2pa
  2019-03-06 17:31 ` Ferruh Yigit
@ 2019-06-14 18:41   ` Dey, Souvik
  0 siblings, 0 replies; 19+ messages in thread
From: Dey, Souvik @ 2019-06-14 18:41 UTC (permalink / raw)
  To: Ferruh Yigit, Yangchao Zhou, dev

Was there any update to this patch , I am also seeing kernel crash in kni_net_rx_normal dueing skb_put which is happening for chained mbufs.

--
Regards,
Souvik


From: dev <dev-bounces@dpdk.org> On Behalf Of Ferruh Yigit
Sent: Wednesday, March 6, 2019 12:31 PM
To: Yangchao Zhou <zhouyates@gmail.com>; dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH] kni: fix possible kernel crash with va2pa

________________________________
NOTICE: This email was received from an EXTERNAL sender
________________________________

On 2/28/2019 7:30 AM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.

Hi Yangchao,

The problem you described seems valid, when current mbuf and the mbuf pointed bu
next pointer from different (huge)pages, address calculation will be wrong.

Can you able to reproduce the issue, or recognized the problem theoretically?

>
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com<mailto:zhouyates@gmail.com>>
> ---
> kernel/linux/kni/kni_net.c | 16 ++--------------
> .../eal/include/exec-env/rte_kni_common.h | 4 ++++
> lib/librte_kni/rte_kni.c | 15 ++++++++++++++-
> 3 files changed, 20 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> index 7371b6d58..caef8754f 100644
> --- a/kernel/linux/kni/kni_net.c
> +++ b/kernel/linux/kni/kni_net.c
> @@ -61,18 +61,6 @@ kva2data_kva(struct rte_kni_mbuf *m)
> return phys_to_virt(m->buf_physaddr + m->data_off);
> }
>
> -/* virtual address to physical address */
> -static void *
> -va2pa(void *va, struct rte_kni_mbuf *m)
> -{
> - void *pa;
> -
> - pa = (void *)((unsigned long)va -
> - ((unsigned long)m->buf_addr -
> - (unsigned long)m->buf_physaddr));
> - return pa;
> -}
> -
> /*
> * It can be called to process the request.
> */
> @@ -363,7 +351,7 @@ kni_net_rx_normal(struct kni_dev *kni)
> if (!kva->next)
> break;
>
> - kva = pa2kva(va2pa(kva->next, kva));
> + kva = pa2kva(kva->next_pa);
> data_kva = kva2data_kva(kva);
> }
> }
> @@ -545,7 +533,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
> if (!kva->next)
> break;
>
> - kva = pa2kva(va2pa(kva->next, kva));
> + kva = pa2kva(kva->next_pa);
> data_kva = kva2data_kva(kva);
> }
> }
> diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> index 5afa08713..608f5c13f 100644
> --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> @@ -87,6 +87,10 @@ struct rte_kni_mbuf {
> char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
> void *pool;
> void *next;
> + union {
> + uint64_t tx_offload;
> + void *next_pa; /**< Physical address of next mbuf. */
> + };

This will cause overwrite the 'tx_offload' via 'next_pa', we don't use
tx_offload in KNI but not sure about removing potential use for future.

What do you think about converting 'm->next' to physical address before putting
them into 'rx_q', and in kernel side after data copied to skb convert 'm->next'
back to virtual address before putting it into 'free_q' ?
I think both address conversion can be possible to do, a little tricky because
address conversion should be calculated in next mbuf and previous mbuf->next in
the chain should be updated.

> };
>
> /*
> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> index 73aeccccf..1aaebcfa1 100644
> --- a/lib/librte_kni/rte_kni.c
> +++ b/lib/librte_kni/rte_kni.c
> @@ -353,6 +353,17 @@ va2pa(struct rte_mbuf *m)
> (unsigned long)m->buf_iova));
> }
>
> +static void *
> +va2pa_all(struct rte_mbuf *m)
> +{
> + struct rte_kni_mbuf *mbuf = (struct rte_kni_mbuf *)m;
> + while (mbuf->next) {
> + mbuf->next_pa = va2pa(mbuf->next);
> + mbuf = mbuf->next;
> + }
> + return va2pa(m);
> +}
> +
> static void
> obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
> unsigned obj_idx __rte_unused)
> @@ -550,7 +561,7 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
> unsigned int i;
>
> for (i = 0; i < num; i++)
> - phy_mbufs[i] = va2pa(mbufs[i]);
> + phy_mbufs[i] = va2pa_all(mbufs[i]);
>
> ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
>
> @@ -607,6 +618,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
> offsetof(struct rte_kni_mbuf, pkt_len));
> RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
> offsetof(struct rte_kni_mbuf, ol_flags));
> + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, tx_offload) !=
> + offsetof(struct rte_kni_mbuf, tx_offload));
>
> /* Check if pktmbuf pool has been configured */
> if (kni->pktmbuf_pool == NULL) {
>


-----------------------------------------------------------------------------------------------------------------------
Notice: This e-mail together with any attachments may contain information of Ribbon Communications Inc. that
is confidential and/or proprietary for the sole use of the intended recipient.  Any review, disclosure, reliance or
distribution by others or forwarding without express permission is strictly prohibited.  If you are not the intended
recipient, please notify the sender immediately and then delete all copies, including any attachments.
-----------------------------------------------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH v2] kni: fix possible kernel crash with va2pa
  2019-03-22 20:49   ` Ferruh Yigit
  2019-03-22 20:49     ` Ferruh Yigit
@ 2019-06-18  4:06     ` Dey, Souvik
  1 sibling, 0 replies; 19+ messages in thread
From: Dey, Souvik @ 2019-06-18  4:06 UTC (permalink / raw)
  To: Ferruh Yigit, Yangchao Zhou, dev

Hi Yigit,
              I was facing the kernel crash issue, at skb_over_put(), using dpdk using 18.11 on 4.19.28 kernel. On checking I did find this  patch and this patch is solving the issue also. But then I saw you comment in the patch and that’s looks scary to have the patch. Is there any improvements/fixes planned for this issue and in which version? is there any performance impact of the below patch ? As this issues is blocking our release any inputs to this asap will be really appreciated.

--
Regards,
Souvik

From: dev <dev-bounces@dpdk.org> On Behalf Of Ferruh Yigit
Sent: Friday, March 22, 2019 4:49 PM
To: Yangchao Zhou <zhouyates@gmail.com>; dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v2] kni: fix possible kernel crash with va2pa

________________________________
NOTICE: This email was received from an EXTERNAL sender
________________________________

On 3/12/2019 9:22 AM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
>
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
>
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com<mailto:zhouyates@gmail.com>>
> ---
> v2: Add an explanation that causes this problem.
> Use m->next to store physical address.

<...>

> @@ -481,7 +486,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
> uint32_t ret;
> uint32_t len;
> uint32_t i, num_rq, num_fq, num;
> - struct rte_kni_mbuf *kva;
> + struct rte_kni_mbuf *kva, *_kva;
> void *data_kva;
> struct sk_buff *skb;
> struct net_device *dev = kni->net_dev;
> @@ -545,8 +550,11 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
> if (!kva->next)
> break;
>
> - kva = pa2kva(va2pa(kva->next, kva));
> + _kva = kva;
> + kva = pa2kva(kva->next);
> data_kva = kva2data_kva(kva);
> + /* Convert physical address to virtual address */
> + _kva->next = pa2va(_kva->next, kva);
> }
> }
>

Also need to update 'kni_net_rx_lo_fifo()', at worst to update 'next' fields
because it fills 'kni->free_q', without conversion userspace will crash.

<...>

> @@ -550,7 +563,7 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
> unsigned int i;
>
> for (i = 0; i < num; i++)
> - phy_mbufs[i] = va2pa(mbufs[i]);
> + phy_mbufs[i] = va2pa_all(mbufs[i]);
>
> ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);

There is a problem here.

When fifo 'kni->rx_q' is full, 'rte_kni_tx_burst' will send less mbuf than
requested, than the application needs to handle the remaining packages, most
probably will free them, but now some packages has physical address in their
'next' field, which will cause app to crash.

I don't know really how to solve this.
Perhaps getting free count from 'kni->rx_q' and only convert that much
(va2pa_all) to physical address can work, but I can't estimate performance
effect of it.


-----------------------------------------------------------------------------------------------------------------------
Notice: This e-mail together with any attachments may contain information of Ribbon Communications Inc. that
is confidential and/or proprietary for the sole use of the intended recipient.  Any review, disclosure, reliance or
distribution by others or forwarding without express permission is strictly prohibited.  If you are not the intended
recipient, please notify the sender immediately and then delete all copies, including any attachments.
-----------------------------------------------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH v2] kni: fix possible kernel crash with va2pa
  2019-03-12  9:22 ` [dpdk-dev] [PATCH v2] " Yangchao Zhou
  2019-03-19 18:35   ` Ferruh Yigit
  2019-03-22 20:49   ` Ferruh Yigit
@ 2019-06-18 15:48   ` Stephen Hemminger
  2019-06-25 15:04   ` [dpdk-dev] [PATCH v3] " Yangchao Zhou
  3 siblings, 0 replies; 19+ messages in thread
From: Stephen Hemminger @ 2019-06-18 15:48 UTC (permalink / raw)
  To: Yangchao Zhou; +Cc: dev, ferruh.yigit

On Tue, 12 Mar 2019 17:22:32 +0800
Yangchao Zhou <zhouyates@gmail.com> wrote:

> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
> 
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
> 
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>

Could you add a Fixes tag?


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [dpdk-dev] [PATCH v3] kni: fix possible kernel crash with va2pa
  2019-03-12  9:22 ` [dpdk-dev] [PATCH v2] " Yangchao Zhou
                     ` (2 preceding siblings ...)
  2019-06-18 15:48   ` Stephen Hemminger
@ 2019-06-25 15:04   ` Yangchao Zhou
  2019-07-02 20:07     ` [dpdk-dev] [v3] " Junxiao Shi
  2019-07-10 20:09     ` [dpdk-dev] [PATCH v3] " Ferruh Yigit
  3 siblings, 2 replies; 19+ messages in thread
From: Yangchao Zhou @ 2019-06-25 15:04 UTC (permalink / raw)
  To: dev; +Cc: stephen, ferruh.yigit, sodey

va2pa depends on the physical address and virtual address offset of
current mbuf. It may get the wrong physical address of next mbuf which
allocated in another hugepage segment.

In rte_mempool_populate_default(), trying to allocate whole block of
contiguous memory could be failed. Then, it would reserve memory in
several memzones that have different physical address and virtual address
offsets. The rte_mempool_populate_default() is used by
rte_pktmbuf_pool_create().

Fixes: 8451269e6d7b ("kni: remove continuous memory restriction")

Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
---
 .../prog_guide/kernel_nic_interface.rst       |  6 ++-
 kernel/linux/kni/kni_net.c                    | 51 ++++++++++++-------
 .../linux/eal/include/rte_kni_common.h        |  2 +-
 lib/librte_kni/rte_kni.c                      | 16 +++++-
 lib/librte_kni/rte_kni_fifo.h                 | 12 +++++
 5 files changed, 65 insertions(+), 22 deletions(-)

diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst b/doc/guides/prog_guide/kernel_nic_interface.rst
index daf87f4a8..2fd3b7983 100644
--- a/doc/guides/prog_guide/kernel_nic_interface.rst
+++ b/doc/guides/prog_guide/kernel_nic_interface.rst
@@ -268,12 +268,14 @@ Use Case: Ingress
 -----------------
 
 On the DPDK RX side, the mbuf is allocated by the PMD in the RX thread context.
-This thread will enqueue the mbuf in the rx_q FIFO.
+This thread will enqueue the mbuf in the rx_q FIFO, and the next pointers in mbuf-chain will convert to physical address.
 The KNI thread will poll all KNI active devices for the rx_q.
 If an mbuf is dequeued, it will be converted to a sk_buff and sent to the net stack via netif_rx().
-The dequeued mbuf must be freed, so the same pointer is sent back in the free_q FIFO.
+The dequeued mbuf must be freed, so the same pointer is sent back in the free_q FIFO,
+and next pointers must convert back to virtual address if exists before put in the free_q FIFO.
 
 The RX thread, in the same main loop, polls this FIFO and frees the mbuf after dequeuing it.
+The address conversion of the next pointer is to prevent the chained mbuf in different hugepage segments from causing kernel crash.
 
 Use Case: Egress
 ----------------
diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index ad8365877..f65233aaa 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -61,18 +61,6 @@ kva2data_kva(struct rte_kni_mbuf *m)
 	return phys_to_virt(m->buf_physaddr + m->data_off);
 }
 
-/* virtual address to physical address */
-static void *
-va2pa(void *va, struct rte_kni_mbuf *m)
-{
-	void *pa;
-
-	pa = (void *)((unsigned long)va -
-			((unsigned long)m->buf_addr -
-			 (unsigned long)m->buf_physaddr));
-	return pa;
-}
-
 /*
  * It can be called to process the request.
  */
@@ -173,7 +161,10 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
 	struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
 {
 	uint32_t ret, i, num_dst, num_rx;
-	void *kva;
+	struct rte_kni_mbuf *kva, *prev_kva;
+	int nb_segs;
+	int kva_nb_segs;
+
 	do {
 		num_dst = kni_fifo_free_count(dst_va);
 		if (num_dst == 0)
@@ -188,6 +179,17 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
 		for (i = 0; i < num_rx; i++) {
 			kva = pa2kva(kni->pa[i]);
 			kni->va[i] = pa2va(kni->pa[i], kva);
+
+			kva_nb_segs = kva->nb_segs;
+			for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+				if (!kva->next)
+					break;
+
+				prev_kva = kva;
+				kva = pa2kva(kva->next);
+				/* Convert physical address to virtual address */
+				prev_kva->next = pa2va(prev_kva->next, kva);
+			}
 		}
 
 		ret = kni_fifo_put(dst_va, kni->va, num_rx);
@@ -313,7 +315,7 @@ kni_net_rx_normal(struct kni_dev *kni)
 	uint32_t ret;
 	uint32_t len;
 	uint32_t i, num_rx, num_fq;
-	struct rte_kni_mbuf *kva;
+	struct rte_kni_mbuf *kva, *prev_kva;
 	void *data_kva;
 	struct sk_buff *skb;
 	struct net_device *dev = kni->net_dev;
@@ -363,8 +365,11 @@ kni_net_rx_normal(struct kni_dev *kni)
 				if (!kva->next)
 					break;
 
-				kva = pa2kva(va2pa(kva->next, kva));
+				prev_kva = kva;
+				kva = pa2kva(kva->next);
 				data_kva = kva2data_kva(kva);
+				/* Convert physical address to virtual address */
+				prev_kva->next = pa2va(prev_kva->next, kva);
 			}
 		}
 
@@ -396,7 +401,7 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
 	uint32_t ret;
 	uint32_t len;
 	uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
-	struct rte_kni_mbuf *kva;
+	struct rte_kni_mbuf *kva, *next_kva;
 	void *data_kva;
 	struct rte_kni_mbuf *alloc_kva;
 	void *alloc_data_kva;
@@ -439,6 +444,13 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
 			data_kva = kva2data_kva(kva);
 			kni->va[i] = pa2va(kni->pa[i], kva);
 
+			while (kva->next) {
+				next_kva = pa2kva(kva->next);
+				/* Convert physical address to virtual address */
+				kva->next = pa2va(kva->next, next_kva);
+				kva = next_kva;
+			}
+
 			alloc_kva = pa2kva(kni->alloc_pa[i]);
 			alloc_data_kva = kva2data_kva(alloc_kva);
 			kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
@@ -481,7 +493,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 	uint32_t ret;
 	uint32_t len;
 	uint32_t i, num_rq, num_fq, num;
-	struct rte_kni_mbuf *kva;
+	struct rte_kni_mbuf *kva, *prev_kva;
 	void *data_kva;
 	struct sk_buff *skb;
 	struct net_device *dev = kni->net_dev;
@@ -545,8 +557,11 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 				if (!kva->next)
 					break;
 
-				kva = pa2kva(va2pa(kva->next, kva));
+				prev_kva = kva;
+				kva = pa2kva(kva->next);
 				data_kva = kva2data_kva(kva);
+				/* Convert physical address to virtual address */
+				prev_kva->next = pa2va(prev_kva->next, kva);
 			}
 		}
 
diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index 91a1c1408..37d9ee8f0 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -86,7 +86,7 @@ struct rte_kni_mbuf {
 	/* fields on second cache line */
 	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
 	void *pool;
-	void *next;
+	void *next;             /**< Physical address of next mbuf in kernel. */
 };
 
 /*
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index e29d0cc7d..6fc36f744 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -344,6 +344,19 @@ va2pa(struct rte_mbuf *m)
 			 (unsigned long)m->buf_iova));
 }
 
+static void *
+va2pa_all(struct rte_mbuf *mbuf)
+{
+	void *phy_mbuf = va2pa(mbuf);
+	struct rte_mbuf *next = mbuf->next;
+	while (next) {
+		mbuf->next = va2pa(next);
+		mbuf = next;
+		next = mbuf->next;
+	}
+	return phy_mbuf;
+}
+
 static void
 obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
 		unsigned obj_idx __rte_unused)
@@ -536,12 +549,13 @@ rte_kni_handle_request(struct rte_kni *kni)
 unsigned
 rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
 {
+	num = RTE_MIN(kni_fifo_free_count(kni->rx_q), num);
 	void *phy_mbufs[num];
 	unsigned int ret;
 	unsigned int i;
 
 	for (i = 0; i < num; i++)
-		phy_mbufs[i] = va2pa(mbufs[i]);
+		phy_mbufs[i] = va2pa_all(mbufs[i]);
 
 	ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
 
diff --git a/lib/librte_kni/rte_kni_fifo.h b/lib/librte_kni/rte_kni_fifo.h
index 287d7deb2..11d7fd6bd 100644
--- a/lib/librte_kni/rte_kni_fifo.h
+++ b/lib/librte_kni/rte_kni_fifo.h
@@ -104,3 +104,15 @@ kni_fifo_count(struct rte_kni_fifo *fifo)
 	unsigned fifo_read = __KNI_LOAD_ACQUIRE(&fifo->read);
 	return (fifo->len + fifo_write - fifo_read) & (fifo->len - 1);
 }
+
+/**
+ * Get the num of available elements in the fifo
+ */
+static inline uint32_t
+kni_fifo_free_count(struct rte_kni_fifo *fifo)
+{
+	uint32_t fifo_write = __KNI_LOAD_ACQUIRE(&fifo->write);
+	uint32_t fifo_read = __KNI_LOAD_ACQUIRE(&fifo->read);
+	return (fifo_read - fifo_write - 1) & (fifo->len - 1);
+}
+
-- 
2.17.1


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [v3] kni: fix possible kernel crash with va2pa
  2019-06-25 15:04   ` [dpdk-dev] [PATCH v3] " Yangchao Zhou
@ 2019-07-02 20:07     ` Junxiao Shi
  2019-07-10 20:11       ` Ferruh Yigit
  2019-07-10 20:09     ` [dpdk-dev] [PATCH v3] " Ferruh Yigit
  1 sibling, 1 reply; 19+ messages in thread
From: Junxiao Shi @ 2019-07-02 20:07 UTC (permalink / raw)
  To: dev; +Cc: Yangchao Zhou, stephen, ferruh.yigit, sodey

I am battling a related problem as reported on
https://bugs.dpdk.org/show_bug.cgi?id=183 and this patch seems
relevant, so I applied this patch on 196a46fab6eeb3ce2039e3bcaca80f8ba43ffc8d

However, this patch does not work for me:
with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled, kni_free_mbufs's invocation of
rte_pktmbuf_free throws "bad mbuf pool" error.

While all mbufs and segments in kni->rx_q now have physical addresses,
the mbufs and segments placed back to kni->free_q still have (mis-)calculated
virtual address. The pa2va function is not working properly.

Consequently, userspace side is passing wrong pointer to rte_pktmbuf_free,
so that application crashes with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH v3] kni: fix possible kernel crash with va2pa
  2019-06-25 15:04   ` [dpdk-dev] [PATCH v3] " Yangchao Zhou
  2019-07-02 20:07     ` [dpdk-dev] [v3] " Junxiao Shi
@ 2019-07-10 20:09     ` Ferruh Yigit
  2019-07-11  7:46       ` Ferruh Yigit
  1 sibling, 1 reply; 19+ messages in thread
From: Ferruh Yigit @ 2019-07-10 20:09 UTC (permalink / raw)
  To: Yangchao Zhou, dev; +Cc: stephen, sodey, Junxiao Shi

On 6/25/2019 4:04 PM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
> 
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
> 
> Fixes: 8451269e6d7b ("kni: remove continuous memory restriction")
> 
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>

Overall looks good to me, not from this patch but can you please check below
comment too.
Also there is a comment from Junxiao, lets clear it before the ack.

<...>

> @@ -396,7 +401,7 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
>  	uint32_t ret;
>  	uint32_t len;
>  	uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
> -	struct rte_kni_mbuf *kva;
> +	struct rte_kni_mbuf *kva, *next_kva;
>  	void *data_kva;
>  	struct rte_kni_mbuf *alloc_kva;
>  	void *alloc_data_kva;
> @@ -439,6 +444,13 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
>  			data_kva = kva2data_kva(kva);
>  			kni->va[i] = pa2va(kni->pa[i], kva);
>  
> +			while (kva->next) {
> +				next_kva = pa2kva(kva->next);
> +				/* Convert physical address to virtual address */
> +				kva->next = pa2va(kva->next, next_kva);
> +				kva = next_kva;
> +			}

Not done in this patch, but in 'kni_net_rx_lo_fifo()' the len calculated as
'len = kva->pkt_len;'

But while copying 'data' to 'alloc_data' the segmentation is not taken into
account and 'len' is used:
memcpy(alloc_data_kva, data_kva, len);

This may lead overflow 'alloc_data_kva' for some 'pkt_len' values.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [v3] kni: fix possible kernel crash with va2pa
  2019-07-02 20:07     ` [dpdk-dev] [v3] " Junxiao Shi
@ 2019-07-10 20:11       ` Ferruh Yigit
  2019-07-10 20:40         ` yoursunny
  0 siblings, 1 reply; 19+ messages in thread
From: Ferruh Yigit @ 2019-07-10 20:11 UTC (permalink / raw)
  To: Junxiao Shi, dev; +Cc: Yangchao Zhou, stephen, sodey

On 7/2/2019 9:07 PM, Junxiao Shi wrote:
> I am battling a related problem as reported on
> https://bugs.dpdk.org/show_bug.cgi?id=183 and this patch seems
> relevant, so I applied this patch on 196a46fab6eeb3ce2039e3bcaca80f8ba43ffc8d
> 
> However, this patch does not work for me:
> with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled, kni_free_mbufs's invocation of
> rte_pktmbuf_free throws "bad mbuf pool" error.
> 
> While all mbufs and segments in kni->rx_q now have physical addresses,
> the mbufs and segments placed back to kni->free_q still have (mis-)calculated
> virtual address. The pa2va function is not working properly.
> 
> Consequently, userspace side is passing wrong pointer to rte_pktmbuf_free,
> so that application crashes with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled.
> 

Hi Junxiao,

I don't see any issue in the code, and as far as I tested not seeing any issue.

Can you please provide more information how to reproduce the issue, is it only
enabling "CONFIG_RTE_LIBRTE_MBUF_DEBUG" config?

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [v3] kni: fix possible kernel crash with va2pa
  2019-07-10 20:11       ` Ferruh Yigit
@ 2019-07-10 20:40         ` yoursunny
  2019-07-10 21:23           ` Ferruh Yigit
  0 siblings, 1 reply; 19+ messages in thread
From: yoursunny @ 2019-07-10 20:40 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: dev, Yangchao Zhou, stephen, sodey

Hi Ferruh

I've uploaded my test case to Bug 183.
You can determine whether it is a bug that should be covered by this
patch, or it should be handled separately.

Yours, Junxiao

On Wed, Jul 10, 2019 at 4:11 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 7/2/2019 9:07 PM, Junxiao Shi wrote:
> > I am battling a related problem as reported on
> > https://bugs.dpdk.org/show_bug.cgi?id=183 and this patch seems
> > relevant, so I applied this patch on 196a46fab6eeb3ce2039e3bcaca80f8ba43ffc8d
> >
> > However, this patch does not work for me:
> > with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled, kni_free_mbufs's invocation of
> > rte_pktmbuf_free throws "bad mbuf pool" error.
> >
> > While all mbufs and segments in kni->rx_q now have physical addresses,
> > the mbufs and segments placed back to kni->free_q still have (mis-)calculated
> > virtual address. The pa2va function is not working properly.
> >
> > Consequently, userspace side is passing wrong pointer to rte_pktmbuf_free,
> > so that application crashes with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled.
> >
>
> Hi Junxiao,
>
> I don't see any issue in the code, and as far as I tested not seeing any issue.
>
> Can you please provide more information how to reproduce the issue, is it only
> enabling "CONFIG_RTE_LIBRTE_MBUF_DEBUG" config?
>
> Thanks,
> ferruh

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [v3] kni: fix possible kernel crash with va2pa
  2019-07-10 20:40         ` yoursunny
@ 2019-07-10 21:23           ` Ferruh Yigit
  2019-07-10 23:52             ` yoursunny
  0 siblings, 1 reply; 19+ messages in thread
From: Ferruh Yigit @ 2019-07-10 21:23 UTC (permalink / raw)
  To: yoursunny; +Cc: dev, Yangchao Zhou, stephen, sodey

On 7/10/2019 9:40 PM, yoursunny wrote:
> Hi Ferruh
> 
> I've uploaded my test case to Bug 183.
> You can determine whether it is a bug that should be covered by this
> patch, or it should be handled separately.

Thanks for clarification,
your use case is about indirect mbufs, I can see why it is not working from your
explanation in the bug report.
I think previously when all mbufs were coming from same physically continuous
memory, indirect mbufs should work. Right now I don't know how to support them.

This defect fixes an issue for multi segment packages, caused by wrong address
calculation for next segments.

So I am for continuing with this patch, independent from indirect mbufs issue.

Do you have any other issue, except than the indirect mbuf issue, if you have
other KNI use cases?


> 
> Yours, Junxiao
> 
> On Wed, Jul 10, 2019 at 4:11 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>
>> On 7/2/2019 9:07 PM, Junxiao Shi wrote:
>>> I am battling a related problem as reported on
>>> https://bugs.dpdk.org/show_bug.cgi?id=183 and this patch seems
>>> relevant, so I applied this patch on 196a46fab6eeb3ce2039e3bcaca80f8ba43ffc8d
>>>
>>> However, this patch does not work for me:
>>> with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled, kni_free_mbufs's invocation of
>>> rte_pktmbuf_free throws "bad mbuf pool" error.
>>>
>>> While all mbufs and segments in kni->rx_q now have physical addresses,
>>> the mbufs and segments placed back to kni->free_q still have (mis-)calculated
>>> virtual address. The pa2va function is not working properly.
>>>
>>> Consequently, userspace side is passing wrong pointer to rte_pktmbuf_free,
>>> so that application crashes with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled.
>>>
>>
>> Hi Junxiao,
>>
>> I don't see any issue in the code, and as far as I tested not seeing any issue.
>>
>> Can you please provide more information how to reproduce the issue, is it only
>> enabling "CONFIG_RTE_LIBRTE_MBUF_DEBUG" config?
>>
>> Thanks,
>> ferruh


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [v3] kni: fix possible kernel crash with va2pa
  2019-07-10 21:23           ` Ferruh Yigit
@ 2019-07-10 23:52             ` yoursunny
  0 siblings, 0 replies; 19+ messages in thread
From: yoursunny @ 2019-07-10 23:52 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: dev, Yangchao Zhou, stephen, sodey

Hi Ferruh

I have no objection of merging the current commit.
I do have some ideas on how to support indirect mbufs. Please review
at https://bugs.dpdk.org/show_bug.cgi?id=183#c4

Yours, Junxiao

On Wed, Jul 10, 2019 at 5:23 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> your use case is about indirect mbufs, I can see why it is not working from your
> explanation in the bug report.
> I think previously when all mbufs were coming from same physically continuous
> memory, indirect mbufs should work. Right now I don't know how to support them.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH v3] kni: fix possible kernel crash with va2pa
  2019-07-10 20:09     ` [dpdk-dev] [PATCH v3] " Ferruh Yigit
@ 2019-07-11  7:46       ` Ferruh Yigit
  2019-07-15 20:50         ` Thomas Monjalon
  0 siblings, 1 reply; 19+ messages in thread
From: Ferruh Yigit @ 2019-07-11  7:46 UTC (permalink / raw)
  To: Yangchao Zhou, dev; +Cc: stephen, sodey, Junxiao Shi

On 7/10/2019 9:09 PM, Ferruh Yigit wrote:
> On 6/25/2019 4:04 PM, Yangchao Zhou wrote:
>> va2pa depends on the physical address and virtual address offset of
>> current mbuf. It may get the wrong physical address of next mbuf which
>> allocated in another hugepage segment.
>>
>> In rte_mempool_populate_default(), trying to allocate whole block of
>> contiguous memory could be failed. Then, it would reserve memory in
>> several memzones that have different physical address and virtual address
>> offsets. The rte_mempool_populate_default() is used by
>> rte_pktmbuf_pool_create().
>>
>> Fixes: 8451269e6d7b ("kni: remove continuous memory restriction")
>>
>> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
> 
> Overall looks good to me, not from this patch but can you please check below
> comment too.
> Also there is a comment from Junxiao, lets clear it before the ack.
> 

Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>

> <...>
> 
>> @@ -396,7 +401,7 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
>>  	uint32_t ret;
>>  	uint32_t len;
>>  	uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
>> -	struct rte_kni_mbuf *kva;
>> +	struct rte_kni_mbuf *kva, *next_kva;
>>  	void *data_kva;
>>  	struct rte_kni_mbuf *alloc_kva;
>>  	void *alloc_data_kva;
>> @@ -439,6 +444,13 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
>>  			data_kva = kva2data_kva(kva);
>>  			kni->va[i] = pa2va(kni->pa[i], kva);
>>  
>> +			while (kva->next) {
>> +				next_kva = pa2kva(kva->next);
>> +				/* Convert physical address to virtual address */
>> +				kva->next = pa2va(kva->next, next_kva);
>> +				kva = next_kva;
>> +			}
> 
> Not done in this patch, but in 'kni_net_rx_lo_fifo()' the len calculated as
> 'len = kva->pkt_len;'
> 
> But while copying 'data' to 'alloc_data' the segmentation is not taken into
> account and 'len' is used:
> memcpy(alloc_data_kva, data_kva, len);
> 
> This may lead overflow 'alloc_data_kva' for some 'pkt_len' values.
> 

I will send separate patch for this.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dpdk-dev] [PATCH v3] kni: fix possible kernel crash with va2pa
  2019-07-11  7:46       ` Ferruh Yigit
@ 2019-07-15 20:50         ` Thomas Monjalon
  0 siblings, 0 replies; 19+ messages in thread
From: Thomas Monjalon @ 2019-07-15 20:50 UTC (permalink / raw)
  To: Yangchao Zhou; +Cc: dev, Ferruh Yigit, stephen, sodey, Junxiao Shi

11/07/2019 09:46, Ferruh Yigit:
> On 7/10/2019 9:09 PM, Ferruh Yigit wrote:
> > On 6/25/2019 4:04 PM, Yangchao Zhou wrote:
> >> va2pa depends on the physical address and virtual address offset of
> >> current mbuf. It may get the wrong physical address of next mbuf which
> >> allocated in another hugepage segment.
> >>
> >> In rte_mempool_populate_default(), trying to allocate whole block of
> >> contiguous memory could be failed. Then, it would reserve memory in
> >> several memzones that have different physical address and virtual address
> >> offsets. The rte_mempool_populate_default() is used by
> >> rte_pktmbuf_pool_create().
> >>
> >> Fixes: 8451269e6d7b ("kni: remove continuous memory restriction")
> >>
> >> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
> > 
> > Overall looks good to me, not from this patch but can you please check below
> > comment too.
> > Also there is a comment from Junxiao, lets clear it before the ack.
> > 
> 
> Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>

The commit log does not really explained neither the use case
nor the solution. But as you acked it...
Applied, thanks




^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2019-07-15 20:50 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-28  7:30 [dpdk-dev] [PATCH] kni: fix possible kernel crash with va2pa Yangchao Zhou
2019-03-06 17:31 ` Ferruh Yigit
2019-06-14 18:41   ` Dey, Souvik
2019-03-12  9:22 ` [dpdk-dev] [PATCH v2] " Yangchao Zhou
2019-03-19 18:35   ` Ferruh Yigit
2019-03-19 18:35     ` Ferruh Yigit
2019-03-22 20:49   ` Ferruh Yigit
2019-03-22 20:49     ` Ferruh Yigit
2019-06-18  4:06     ` Dey, Souvik
2019-06-18 15:48   ` Stephen Hemminger
2019-06-25 15:04   ` [dpdk-dev] [PATCH v3] " Yangchao Zhou
2019-07-02 20:07     ` [dpdk-dev] [v3] " Junxiao Shi
2019-07-10 20:11       ` Ferruh Yigit
2019-07-10 20:40         ` yoursunny
2019-07-10 21:23           ` Ferruh Yigit
2019-07-10 23:52             ` yoursunny
2019-07-10 20:09     ` [dpdk-dev] [PATCH v3] " Ferruh Yigit
2019-07-11  7:46       ` Ferruh Yigit
2019-07-15 20:50         ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).