DPDK patches and discussions
 help / color / mirror / Atom feed
From: Huawei Xie <huawei.xie@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH 2/2] vhost: realloc virtio_net and virtqueue to the same node of vring desc table
Date: Fri,  5 Jun 2015 11:13:25 +0800	[thread overview]
Message-ID: <1433474005-597-3-git-send-email-huawei.xie@intel.com> (raw)
In-Reply-To: <1433474005-597-1-git-send-email-huawei.xie@intel.com>

When we get the address of vring descriptor table in VHOST_SET_VRING_ADDR message,
will try to reallocate virtio_net device and virtqueue to the same numa node.

Signed-off-by: Huawei Xie <huawei.xie@intel.com>
---
 config/common_linuxapp        |  1 +
 lib/librte_vhost/Makefile     |  4 ++
 lib/librte_vhost/virtio-net.c | 93 +++++++++++++++++++++++++++++++++++++++++++
 mk/rte.app.mk                 |  3 ++
 4 files changed, 101 insertions(+)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0078dc9..4ace24e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -421,6 +421,7 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
 #
 CONFIG_RTE_LIBRTE_VHOST=n
 CONFIG_RTE_LIBRTE_VHOST_USER=y
+CONFIG_RTE_LIBRTE_VHOST_NUMA=n
 CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
 
 #
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index a8645a6..6681f22 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -46,6 +46,10 @@ CFLAGS += -I vhost_cuse -lfuse
 LDFLAGS += -lfuse
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
+LDFLAGS += -lnuma
+endif
+
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 19b74d6..8a80f5e 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -38,6 +38,9 @@
 #include <stdlib.h>
 #include <sys/mman.h>
 #include <unistd.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
 
 #include <sys/socket.h>
 
@@ -481,6 +484,93 @@ set_vring_num(struct vhost_device_ctx ctx, struct vhost_vring_state *state)
 }
 
 /*
+ * Reallocate virtio_det and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+	int oldnode, newnode;
+	struct virtio_net_config_ll *old_ll_dev, *new_ll_dev;
+	struct vhost_virtqueue *old_vq, *new_vq;
+	int ret;
+	int realloc_dev = 0, realloc_vq = 0;
+
+	old_ll_dev = (struct virtio_net_config_ll *)dev;
+	old_vq = dev->virtqueue[index];
+
+	ret  = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
+			MPOL_F_NODE | MPOL_F_ADDR);
+	ret = ret | get_mempolicy(&oldnode, NULL, 0, old_ll_dev,
+			MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get vring desc or dev numa information.\n");
+		return dev;
+	}
+	if (oldnode != newnode)
+		realloc_dev = 1;
+
+	ret = get_mempolicy(&oldnode, NULL, 0, old_vq,
+			MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get vq numa information.\n");
+		return dev;
+	}
+	if (oldnode != newnode)
+		realloc_vq = 1;
+
+	if (realloc_dev == 0 && realloc_vq == 0)
+		return dev;
+
+	if (realloc_dev)
+		new_ll_dev = rte_malloc_socket(NULL,
+			sizeof(struct virtio_net_config_ll), 0, newnode);
+	if (realloc_vq)
+		new_vq = rte_malloc_socket(NULL,
+			sizeof(struct vhost_virtqueue), 0, newnode);
+	if (!new_ll_dev || !new_vq) {
+		if (new_ll_dev)
+			rte_free(new_ll_dev);
+		if (new_vq)
+			rte_free(new_vq);
+		return dev;
+	}
+
+	if (realloc_vq)
+		memcpy(new_vq, old_vq, sizeof(*new_vq));
+	if (realloc_dev)
+		memcpy(new_ll_dev, old_ll_dev, sizeof(*new_ll_dev));
+	(new_ll_dev ? new_ll_dev : old_ll_dev)->dev.virtqueue[index] =
+		new_vq ? new_vq : old_vq;
+	if (realloc_vq)
+		rte_free(old_vq);
+	if (realloc_dev) {
+		if (ll_root == old_ll_dev)
+			ll_root = new_ll_dev;
+		else {
+			struct virtio_net_config_ll *prev = ll_root;
+			while (prev->next != old_ll_dev)
+				prev = prev->next;
+			prev->next = new_ll_dev;
+			new_ll_dev->next = old_ll_dev->next;
+		}
+		rte_free(old_ll_dev);
+	}
+
+	return &new_ll_dev->dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+	return dev;
+}
+#endif
+
+/*
  * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR
  * The virtio device sends us the desc, used and avail ring addresses.
  * This function then converts these to our address space.
@@ -508,6 +598,9 @@ set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
 		return -1;
 	}
 
+	dev = numa_realloc(dev, addr->index);
+	vq = dev->virtqueue[addr->index];
+
 	vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
 			addr->avail_user_addr);
 	if (vq->avail == 0) {
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 1a2043a..5aba56a 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -92,6 +92,9 @@ endif # ! CONFIG_RTE_BUILD_COMBINE_LIBS
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lpcap
 
+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lnuma
+
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),n)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lfuse
 endif
-- 
1.8.1.4

  parent reply	other threads:[~2015-06-05  3:13 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-28  2:04 [dpdk-dev] [PATCH RFC 0/2] vhost: numa aware allocation of virtio_net device and vhost virt queue Huawei Xie
2015-05-28  2:04 ` [dpdk-dev] [PATCH RFC 1/2] vhost: malloc -> rte_malloc for virtio_net and virt queue allocation Huawei Xie
2015-05-28  2:04 ` [dpdk-dev] [PATCH RFC 2/2] vhost: realloc virtio_net and virtqueue to the same node of vring desc table Huawei Xie
2015-06-03 23:38 ` [dpdk-dev] [PATCH RFC 0/2] vhost: numa aware allocation of virtio_net device and vhost virt queue Long, Thomas
2015-06-05  3:13 ` [dpdk-dev] [PATCH " Huawei Xie
2015-06-05  3:13   ` [dpdk-dev] [PATCH 1/2] vhost: malloc -> rte_malloc for virtio_net and virt queue allocation Huawei Xie
2015-06-17 17:06     ` Thomas Monjalon
2015-06-05  3:13   ` Huawei Xie [this message]
2015-06-17 16:47     ` [dpdk-dev] [PATCH 2/2] vhost: realloc virtio_net and virtqueue to the same node of vring desc table Thomas Monjalon
2015-06-17 17:02       ` Thomas Monjalon
2015-06-09  4:05   ` [dpdk-dev] [PATCH 0/2] vhost: numa aware allocation of virtio_net device and vhost virt queue Long, Thomas
2015-06-18 16:59   ` [dpdk-dev] [PATCH v2 0/2] vhost: numa aware allocation of vhost device and queues Huawei Xie
2015-06-18 16:59     ` [dpdk-dev] [PATCH v2 1/2] vhost: use rte_malloc to allocate " Huawei Xie
2015-06-18 16:59     ` [dpdk-dev] [PATCH v2 2/2] vhost: realloc vhost device and queues to the same numa node of vring desc table Huawei Xie
2015-06-25  5:47     ` [dpdk-dev] [PATCH v3 0/2] vhost: numa aware allocation of vhost device and queues Huawei Xie
2015-06-25  5:47       ` [dpdk-dev] [PATCH v3 1/2] vhost: use rte_malloc to allocate " Huawei Xie
2015-06-25  5:47       ` [dpdk-dev] [PATCH v3 2/2] vhost: realloc vhost device and queues to the same numa node of vring desc table Huawei Xie
2015-06-29 17:04       ` [dpdk-dev] [PATCH v3 0/2] vhost: numa aware allocation of vhost device and queues Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1433474005-597-3-git-send-email-huawei.xie@intel.com \
    --to=huawei.xie@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).