* [dpdk-dev] [PATCH v15 1/2] kni: support userspace VA
2019-11-17 15:12 [dpdk-dev] [PATCH v15 0/2] kni: support IOVA mode David Marchand
@ 2019-11-17 15:12 ` David Marchand
2019-11-18 14:04 ` Jerin Jacob
2019-11-20 10:47 ` Ferruh Yigit
2019-11-17 15:12 ` [dpdk-dev] [PATCH v15 2/2] eal/linux: remove KNI restriction on IOVA David Marchand
2019-11-18 15:03 ` [dpdk-dev] [PATCH v15 0/2] kni: support IOVA mode David Marchand
2 siblings, 2 replies; 7+ messages in thread
From: David Marchand @ 2019-11-17 15:12 UTC (permalink / raw)
To: dev
Cc: thomas, kirankumark, olivier.matz, ferruh.yigit, anatoly.burakov,
arybchenko, stephen, vattunuru
From: Vamsi Attunuru <vattunuru@marvell.com>
Patch adds support for kernel module to work in IOVA = VA mode by
providing address translation routines to convert userspace VA to
kernel VA.
KNI performance using PA is not changed by this patch.
But comparing KNI using PA to KNI using VA, the latter will have lower
performance due to the cost of the added translation.
This translation is implemented only with kernel versions starting 4.6.0.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
---
Changelog since v14:
- reworded commitlog,
---
kernel/linux/kni/compat.h | 14 +++++
kernel/linux/kni/kni_dev.h | 42 +++++++++++++
kernel/linux/kni/kni_misc.c | 39 +++++++++---
kernel/linux/kni/kni_net.c | 62 +++++++++++++++----
.../linux/eal/include/rte_kni_common.h | 1 +
5 files changed, 136 insertions(+), 22 deletions(-)
diff --git a/kernel/linux/kni/compat.h b/kernel/linux/kni/compat.h
index 562d8bf94..062b170ef 100644
--- a/kernel/linux/kni/compat.h
+++ b/kernel/linux/kni/compat.h
@@ -121,3 +121,17 @@
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
#endif
+
+#if KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE
+
+#define HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+
+#if KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
+#define GET_USER_PAGES_REMOTE_API_V1
+#elif KERNEL_VERSION(4, 9, 0) == LINUX_VERSION_CODE
+#define GET_USER_PAGES_REMOTE_API_V2
+#else
+#define GET_USER_PAGES_REMOTE_API_V3
+#endif
+
+#endif
diff --git a/kernel/linux/kni/kni_dev.h b/kernel/linux/kni/kni_dev.h
index c1ca6789c..fb641b696 100644
--- a/kernel/linux/kni/kni_dev.h
+++ b/kernel/linux/kni/kni_dev.h
@@ -41,6 +41,8 @@ struct kni_dev {
/* kni list */
struct list_head list;
+ uint8_t iova_mode;
+
uint32_t core_id; /* Core ID to bind */
char name[RTE_KNI_NAMESIZE]; /* Network device name */
struct task_struct *pthread;
@@ -84,8 +86,48 @@ struct kni_dev {
void *va[MBUF_BURST_SZ];
void *alloc_pa[MBUF_BURST_SZ];
void *alloc_va[MBUF_BURST_SZ];
+
+ struct task_struct *usr_tsk;
};
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+static inline phys_addr_t iova_to_phys(struct task_struct *tsk,
+ unsigned long iova)
+{
+ phys_addr_t offset, phys_addr;
+ struct page *page = NULL;
+ long ret;
+
+ offset = iova & (PAGE_SIZE - 1);
+
+ /* Read one page struct info */
+#ifdef GET_USER_PAGES_REMOTE_API_V3
+ ret = get_user_pages_remote(tsk, tsk->mm, iova, 1,
+ FOLL_TOUCH, &page, NULL, NULL);
+#endif
+#ifdef GET_USER_PAGES_REMOTE_API_V2
+ ret = get_user_pages_remote(tsk, tsk->mm, iova, 1,
+ FOLL_TOUCH, &page, NULL);
+#endif
+#ifdef GET_USER_PAGES_REMOTE_API_V1
+ ret = get_user_pages_remote(tsk, tsk->mm, iova, 1
+ 0, 0, &page, NULL);
+#endif
+ if (ret < 0)
+ return 0;
+
+ phys_addr = page_to_phys(page) | offset;
+ put_page(page);
+
+ return phys_addr;
+}
+
+static inline void *iova_to_kva(struct task_struct *tsk, unsigned long iova)
+{
+ return phys_to_virt(iova_to_phys(tsk, iova));
+}
+#endif
+
void kni_net_release_fifo_phy(struct kni_dev *kni);
void kni_net_rx(struct kni_dev *kni);
void kni_net_init(struct net_device *dev);
diff --git a/kernel/linux/kni/kni_misc.c b/kernel/linux/kni/kni_misc.c
index 84ef03b5f..cda71bde0 100644
--- a/kernel/linux/kni/kni_misc.c
+++ b/kernel/linux/kni/kni_misc.c
@@ -348,15 +348,36 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
/* Translate user space info into kernel space info */
- kni->tx_q = phys_to_virt(dev_info.tx_phys);
- kni->rx_q = phys_to_virt(dev_info.rx_phys);
- kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
- kni->free_q = phys_to_virt(dev_info.free_phys);
-
- kni->req_q = phys_to_virt(dev_info.req_phys);
- kni->resp_q = phys_to_virt(dev_info.resp_phys);
- kni->sync_va = dev_info.sync_va;
- kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+ if (dev_info.iova_mode) {
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+ kni->tx_q = iova_to_kva(current, dev_info.tx_phys);
+ kni->rx_q = iova_to_kva(current, dev_info.rx_phys);
+ kni->alloc_q = iova_to_kva(current, dev_info.alloc_phys);
+ kni->free_q = iova_to_kva(current, dev_info.free_phys);
+
+ kni->req_q = iova_to_kva(current, dev_info.req_phys);
+ kni->resp_q = iova_to_kva(current, dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = iova_to_kva(current, dev_info.sync_phys);
+ kni->usr_tsk = current;
+ kni->iova_mode = 1;
+#else
+ pr_err("KNI module does not support IOVA to VA translation\n");
+ return -EINVAL;
+#endif
+ } else {
+
+ kni->tx_q = phys_to_virt(dev_info.tx_phys);
+ kni->rx_q = phys_to_virt(dev_info.rx_phys);
+ kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+ kni->free_q = phys_to_virt(dev_info.free_phys);
+
+ kni->req_q = phys_to_virt(dev_info.req_phys);
+ kni->resp_q = phys_to_virt(dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+ kni->iova_mode = 0;
+ }
kni->mbuf_size = dev_info.mbuf_size;
diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index f25b1277b..1ba9b1b99 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -36,6 +36,22 @@ static void kni_net_rx_normal(struct kni_dev *kni);
/* kni rx function pointer, with default to normal rx */
static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+/* iova to kernel virtual address */
+static inline void *
+iova2kva(struct kni_dev *kni, void *iova)
+{
+ return phys_to_virt(iova_to_phys(kni->usr_tsk, (unsigned long)iova));
+}
+
+static inline void *
+iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
+{
+ return phys_to_virt(iova_to_phys(kni->usr_tsk, m->buf_physaddr) +
+ m->data_off);
+}
+#endif
+
/* physical address to kernel virtual address */
static void *
pa2kva(void *pa)
@@ -62,6 +78,26 @@ kva2data_kva(struct rte_kni_mbuf *m)
return phys_to_virt(m->buf_physaddr + m->data_off);
}
+static inline void *
+get_kva(struct kni_dev *kni, void *pa)
+{
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+ if (kni->iova_mode == 1)
+ return iova2kva(kni, pa);
+#endif
+ return pa2kva(pa);
+}
+
+static inline void *
+get_data_kva(struct kni_dev *kni, void *pkt_kva)
+{
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+ if (kni->iova_mode == 1)
+ return iova2data_kva(kni, pkt_kva);
+#endif
+ return kva2data_kva(pkt_kva);
+}
+
/*
* It can be called to process the request.
*/
@@ -178,7 +214,7 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
return;
for (i = 0; i < num_rx; i++) {
- kva = pa2kva(kni->pa[i]);
+ kva = get_kva(kni, kni->pa[i]);
kni->va[i] = pa2va(kni->pa[i], kva);
kva_nb_segs = kva->nb_segs;
@@ -266,8 +302,8 @@ kni_net_tx(struct sk_buff *skb, struct net_device *dev)
if (likely(ret == 1)) {
void *data_kva;
- pkt_kva = pa2kva(pkt_pa);
- data_kva = kva2data_kva(pkt_kva);
+ pkt_kva = get_kva(kni, pkt_pa);
+ data_kva = get_data_kva(kni, pkt_kva);
pkt_va = pa2va(pkt_pa, pkt_kva);
len = skb->len;
@@ -338,9 +374,9 @@ kni_net_rx_normal(struct kni_dev *kni)
/* Transfer received packets to netif */
for (i = 0; i < num_rx; i++) {
- kva = pa2kva(kni->pa[i]);
+ kva = get_kva(kni, kni->pa[i]);
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
+ data_kva = get_data_kva(kni, kva);
kni->va[i] = pa2va(kni->pa[i], kva);
skb = netdev_alloc_skb(dev, len);
@@ -437,9 +473,9 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
num = ret;
/* Copy mbufs */
for (i = 0; i < num; i++) {
- kva = pa2kva(kni->pa[i]);
+ kva = get_kva(kni, kni->pa[i]);
len = kva->data_len;
- data_kva = kva2data_kva(kva);
+ data_kva = get_data_kva(kni, kva);
kni->va[i] = pa2va(kni->pa[i], kva);
while (kva->next) {
@@ -449,8 +485,8 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
kva = next_kva;
}
- alloc_kva = pa2kva(kni->alloc_pa[i]);
- alloc_data_kva = kva2data_kva(alloc_kva);
+ alloc_kva = get_kva(kni, kni->alloc_pa[i]);
+ alloc_data_kva = get_data_kva(kni, alloc_kva);
kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
memcpy(alloc_data_kva, data_kva, len);
@@ -517,9 +553,9 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
/* Copy mbufs to sk buffer and then call tx interface */
for (i = 0; i < num; i++) {
- kva = pa2kva(kni->pa[i]);
+ kva = get_kva(kni, kni->pa[i]);
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
+ data_kva = get_data_kva(kni, kva);
kni->va[i] = pa2va(kni->pa[i], kva);
skb = netdev_alloc_skb(dev, len);
@@ -550,8 +586,8 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
break;
prev_kva = kva;
- kva = pa2kva(kva->next);
- data_kva = kva2data_kva(kva);
+ kva = get_kva(kni, kva->next);
+ data_kva = get_data_kva(kni, kva);
/* Convert physical address to virtual address */
prev_kva->next = pa2va(prev_kva->next, kva);
}
diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index 46f75a710..2427a965c 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -125,6 +125,7 @@ struct rte_kni_device_info {
unsigned int min_mtu;
unsigned int max_mtu;
uint8_t mac_addr[6];
+ uint8_t iova_mode;
};
#define KNI_DEVICE "kni"
--
2.23.0
^ permalink raw reply [flat|nested] 7+ messages in thread
* [dpdk-dev] [PATCH v15 2/2] eal/linux: remove KNI restriction on IOVA
2019-11-17 15:12 [dpdk-dev] [PATCH v15 0/2] kni: support IOVA mode David Marchand
2019-11-17 15:12 ` [dpdk-dev] [PATCH v15 1/2] kni: support userspace VA David Marchand
@ 2019-11-17 15:12 ` David Marchand
2019-11-18 14:04 ` Jerin Jacob
2019-11-18 15:03 ` [dpdk-dev] [PATCH v15 0/2] kni: support IOVA mode David Marchand
2 siblings, 1 reply; 7+ messages in thread
From: David Marchand @ 2019-11-17 15:12 UTC (permalink / raw)
To: dev
Cc: thomas, kirankumark, olivier.matz, ferruh.yigit, anatoly.burakov,
arybchenko, stephen, vattunuru
From: Vamsi Attunuru <vattunuru@marvell.com>
Now that KNI supports VA (with kernel versions starting 4.6.0), we can
accept IOVA as VA, but KNI must be configured for this.
Pass iova_mode when creating KNI netdevs.
So far, IOVA detection policy forced IOVA as PA when KNI is loaded,
whatever the buses IOVA requirements were.
We can now use IOVA as VA, but this comes with a cost in KNI.
When no constraint is expressed by the buses, keep the current behavior
of choosing PA.
Note: this change supposes that dpdk is built on the same kernel than
the target system kernel; no objection has been expressed on this topic.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changelog since v14:
- reworded commitlog,
- added note on kernel version check,
- updated EAL documentation,
- fixed broken LTO link in release note update,
- s/eal/EAL/g,
- inverted kernel version check in KNI,
---
doc/guides/prog_guide/env_abstraction_layer.rst | 3 +++
doc/guides/prog_guide/kernel_nic_interface.rst | 14 ++++++++++++++
doc/guides/rel_notes/release_19_11.rst | 11 +++++++++++
lib/librte_eal/linux/eal/eal.c | 11 +++++++++--
lib/librte_kni/rte_kni.c | 5 +++++
5 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index cd8e3003e..6e7c2080a 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -475,6 +475,9 @@ devices would fail anyway.
``RTE_PCI_DRV_NEED_IOVA_AS_VA`` flag is used to dictate that this PCI
driver can only work in RTE_IOVA_VA mode.
+ When the KNI kernel module is detected, RTE_IOVA_PA mode is preferred as a
+ performance penalty is expected in RTE_IOVA_VA mode.
+
IOVA Mode Configuration
~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst b/doc/guides/prog_guide/kernel_nic_interface.rst
index e12634ddc..c4479ffbf 100644
--- a/doc/guides/prog_guide/kernel_nic_interface.rst
+++ b/doc/guides/prog_guide/kernel_nic_interface.rst
@@ -300,6 +300,20 @@ The sk_buff is then freed and the mbuf sent in the tx_q FIFO.
The DPDK TX thread dequeues the mbuf and sends it to the PMD via ``rte_eth_tx_burst()``.
It then puts the mbuf back in the cache.
+IOVA = VA: Support
+------------------
+
+KNI operates in IOVA_VA scheme when
+
+- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0) and
+- EAL option `iova-mode=va` is passed or bus IOVA scheme in the DPDK is selected
+ as RTE_IOVA_VA.
+
+Due to IOVA to KVA address translations, based on the KNI use case there
+can be a performance impact. For mitigation, forcing IOVA to PA via EAL
+"--iova-mode=pa" option can be used, IOVA_DC bus iommu scheme can also
+result in IOVA as PA.
+
Ethtool
-------
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index c0045a91f..21be600ab 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -294,6 +294,17 @@ New Features
See :doc:`../prog_guide/lto` for more information:
+* **Added IOVA as VA support for KNI.**
+
+ * Added IOVA = VA support for KNI, KNI can operate in IOVA = VA mode when
+ `iova-mode=va` EAL option is passed to the application or when bus IOVA
+ scheme is selected as RTE_IOVA_VA. This mode only works on Linux Kernel
+ versions 4.6.0 and above.
+
+ * Due to IOVA to KVA address translations, based on the KNI use case there
+ can be a performance impact. For mitigation, forcing IOVA to PA via EAL
+ "--iova-mode=pa" option can be used, IOVA_DC bus iommu scheme can also
+ result in IOVA as PA.
Removed Items
diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c
index 9e2d50cfb..b5b71500c 100644
--- a/lib/librte_eal/linux/eal/eal.c
+++ b/lib/librte_eal/linux/eal/eal.c
@@ -1073,6 +1073,11 @@ rte_eal_init(int argc, char **argv)
*/
iova_mode = RTE_IOVA_VA;
RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n");
+#if defined(RTE_LIBRTE_KNI) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+ } else if (rte_eal_check_module("rte_kni") == 1) {
+ iova_mode = RTE_IOVA_PA;
+ RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI perfomance.\n");
+#endif
} else if (is_iommu_enabled()) {
/* we have an IOMMU, pick IOVA as VA mode */
iova_mode = RTE_IOVA_VA;
@@ -1085,8 +1090,10 @@ rte_eal_init(int argc, char **argv)
RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n");
}
}
-#ifdef RTE_LIBRTE_KNI
- /* Workaround for KNI which requires physical address to work */
+#if defined(RTE_LIBRTE_KNI) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+ /* Workaround for KNI which requires physical address to work
+ * in kernels < 4.6
+ */
if (iova_mode == RTE_IOVA_VA &&
rte_eal_check_module("rte_kni") == 1) {
if (phys_addrs) {
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 7fbcf2201..86995fc81 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -10,6 +10,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
+#include <linux/version.h>
#include <rte_spinlock.h>
#include <rte_string_fns.h>
@@ -97,10 +98,12 @@ static volatile int kni_fd = -1;
int
rte_kni_init(unsigned int max_kni_ifaces __rte_unused)
{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
if (rte_eal_iova_mode() != RTE_IOVA_PA) {
RTE_LOG(ERR, KNI, "KNI requires IOVA as PA\n");
return -1;
}
+#endif
/* Check FD and open */
if (kni_fd < 0) {
@@ -302,6 +305,8 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
kni->group_id = conf->group_id;
kni->mbuf_size = conf->mbuf_size;
+ dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1 : 0;
+
ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
if (ret < 0)
goto ioctl_fail;
--
2.23.0
^ permalink raw reply [flat|nested] 7+ messages in thread