DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v3] eal_interrupts: add option for pending callback unregister
@ 2018-12-10  9:14 Jakub Grajciar
  2018-12-10  9:42 ` [dpdk-dev] [RFC] /net: memory interface (memif) Jakub Grajciar
  2018-12-10 10:06 ` [dpdk-dev] [RFC v2] " Jakub Grajciar
  0 siblings, 2 replies; 13+ messages in thread
From: Jakub Grajciar @ 2018-12-10  9:14 UTC (permalink / raw)
  To: dev; +Cc: Jakub Grajciar

use case: if callback is used to receive message form socket,
and the message received is disconnect/error, this callback needs
to be unregistered, but cannot because it is still active.

With this patch it is possible to mark the callback to be
unregistered once the interrupt process is done with this
interrupt source.

Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
---
 .../common/include/rte_interrupts.h           | 30 +++++++
 lib/librte_eal/linuxapp/eal/eal_interrupts.c  | 85 ++++++++++++++++++-
 2 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h
index d751a6378..3946742ad 100644
--- a/lib/librte_eal/common/include/rte_interrupts.h
+++ b/lib/librte_eal/common/include/rte_interrupts.h
@@ -24,6 +24,13 @@ struct rte_intr_handle;
 /** Function to be registered for the specific interrupt */
 typedef void (*rte_intr_callback_fn)(void *cb_arg);
 
+/**
+ * Function to call after a callback is unregistered.
+ * Can be used to close fd and free cb_arg.
+ */
+typedef void (*rte_intr_unregister_callback_fn)(struct rte_intr_handle *intr_handle,
+						void *cb_arg);
+
 #include "rte_eal_interrupts.h"
 
 /**
@@ -61,6 +68,29 @@ int rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
 				rte_intr_callback_fn cb, void *cb_arg);
 
+/**
+ * It unregisters the callback according to the specified interrupt handle,
+ * after it's no longer acive. Failes if source is not active.
+ *
+ * @param intr_handle
+ *  pointer to the interrupt handle.
+ * @param cb
+ *  callback address.
+ * @param cb_arg
+ *  address of parameter for callback, (void *)-1 means to remove all
+ *  registered which has the same callback address.
+ * @param ucb_fn
+ *  callback to call before cb is unregistered (optional).
+ *  can be used to close fd and free cb_arg.
+ *
+ * @return
+ *  - On success, return the number of callback entities marked for remove.
+ *  - On failure, a negative value.
+ */
+int rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle,
+				rte_intr_callback_fn cb_fn, void *cb_arg,
+				rte_intr_unregister_callback_fn ucb_fn);
+
 /**
  * It enables the interrupt for the specified handle.
  *
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index cbac451e1..79ad5e8d7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -76,6 +76,8 @@ struct rte_intr_callback {
 	TAILQ_ENTRY(rte_intr_callback) next;
 	rte_intr_callback_fn cb_fn;  /**< callback address */
 	void *cb_arg;                /**< parameter for callback */
+	uint8_t pending_delete;      /**< delete after callback is called */
+	rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */
 };
 
 struct rte_intr_source {
@@ -472,6 +474,8 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 	}
 	callback->cb_fn = cb;
 	callback->cb_arg = cb_arg;
+	callback->pending_delete = 0;
+	callback->ucb_fn = NULL;
 
 	rte_spinlock_lock(&intr_lock);
 
@@ -518,6 +522,57 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 	return ret;
 }
 
+int
+rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle,
+				rte_intr_callback_fn cb_fn, void *cb_arg,
+				rte_intr_unregister_callback_fn ucb_fn)
+{
+	int ret;
+	struct rte_intr_source *src;
+	struct rte_intr_callback *cb, *next;
+
+	/* do parameter checking first */
+	if (intr_handle == NULL || intr_handle->fd < 0) {
+		RTE_LOG(ERR, EAL,
+		"Unregistering with invalid input parameter\n");
+		return -EINVAL;
+	}
+
+	rte_spinlock_lock(&intr_lock);
+
+	/* check if the insterrupt source for the fd is existent */
+	TAILQ_FOREACH(src, &intr_sources, next)
+		if (src->intr_handle.fd == intr_handle->fd)
+			break;
+
+	/* No interrupt source registered for the fd */
+	if (src == NULL) {
+		ret = -ENOENT;
+
+	/* only usable if the source is active */
+	} else if (src->active == 0) {
+		ret = -EAGAIN;
+
+	} else {
+		ret = 0;
+
+		/* walk through the callbacks and mark all that match. */
+		for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+			next = TAILQ_NEXT(cb, next);
+			if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+					cb->cb_arg == cb_arg)) {
+				cb->pending_delete = 1;
+				cb->ucb_fn = ucb_fn;
+				ret++;
+			}
+		}
+	}
+
+	rte_spinlock_unlock(&intr_lock);
+
+	return ret;
+}
+
 int
 rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
 			rte_intr_callback_fn cb_fn, void *cb_arg)
@@ -698,7 +753,7 @@ static int
 eal_intr_process_interrupts(struct epoll_event *events, int nfds)
 {
 	bool call = false;
-	int n, bytes_read;
+	int n, bytes_read, rv;
 	struct rte_intr_source *src;
 	struct rte_intr_callback *cb, *next;
 	union rte_intr_read_buffer buf;
@@ -823,9 +878,35 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds)
 				rte_spinlock_lock(&intr_lock);
 			}
 		}
-
 		/* we done with that interrupt source, release it. */
 		src->active = 0;
+
+		rv = 0;
+
+		/* check if any callback are supposed to be removed */
+		for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+			next = TAILQ_NEXT(cb, next);
+			if (cb->pending_delete) {
+				TAILQ_REMOVE(&src->callbacks, cb, next);
+				if (cb->ucb_fn)
+					cb->ucb_fn(&src->intr_handle, cb->cb_arg);
+				free(cb);
+				rv++;
+			}
+		}
+
+		/* all callbacks for that source are removed. */
+		if (TAILQ_EMPTY(&src->callbacks)) {
+			TAILQ_REMOVE(&intr_sources, src, next);
+			free(src);
+		}
+
+		/* notify the pipe fd waited by epoll_wait to rebuild the wait list */
+		if (rv >= 0 && write(intr_pipe.writefd, "1", 1) < 0) {
+			rte_spinlock_unlock(&intr_lock);
+			return -EPIPE;
+		}
+
 		rte_spinlock_unlock(&intr_lock);
 	}
 
-- 
2.17.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [dpdk-dev] [RFC] /net: memory interface (memif)
  2018-12-10  9:14 [dpdk-dev] [PATCH v3] eal_interrupts: add option for pending callback unregister Jakub Grajciar
@ 2018-12-10  9:42 ` Jakub Grajciar
  2018-12-10 10:06 ` [dpdk-dev] [RFC v2] " Jakub Grajciar
  1 sibling, 0 replies; 13+ messages in thread
From: Jakub Grajciar @ 2018-12-10  9:42 UTC (permalink / raw)
  To: dev; +Cc: Jakub Grajciar

Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
---
 config/common_base                          |    5 +
 config/common_linuxapp                      |    1 +
 drivers/net/Makefile                        |    1 +
 drivers/net/memif/Makefile                  |   29 +
 drivers/net/memif/memif.h                   |  171 +++
 drivers/net/memif/memif_socket.c            | 1070 +++++++++++++++++
 drivers/net/memif/memif_socket.h            |   57 +
 drivers/net/memif/meson.build               |    8 +
 drivers/net/memif/rte_eth_memif.c           | 1159 +++++++++++++++++++
 drivers/net/memif/rte_eth_memif.h           |  191 +++
 drivers/net/memif/rte_pmd_memif_version.map |    4 +
 drivers/net/meson.build                     |    1 +
 mk/rte.app.mk                               |    1 +
 13 files changed, 2698 insertions(+)
 create mode 100644 drivers/net/memif/Makefile
 create mode 100644 drivers/net/memif/memif.h
 create mode 100644 drivers/net/memif/memif_socket.c
 create mode 100644 drivers/net/memif/memif_socket.h
 create mode 100644 drivers/net/memif/meson.build
 create mode 100644 drivers/net/memif/rte_eth_memif.c
 create mode 100644 drivers/net/memif/rte_eth_memif.h
 create mode 100644 drivers/net/memif/rte_pmd_memif_version.map

diff --git a/config/common_base b/config/common_base
index d12ae98bc..b8ed10ae5 100644
--- a/config/common_base
+++ b/config/common_base
@@ -403,6 +403,11 @@ CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n
 #
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n
 
+#
+# Compile Memory Interface PMD driver (Linux only)
+#
+CONFIG_RTE_LIBRTE_PMD_MEMIF=n
+
 #
 # Compile link bonding PMD library
 #
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 6c1c8d0f4..42cbde8f5 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -18,6 +18,7 @@ CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n
 CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_IFC_PMD=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
+CONFIG_RTE_LIBRTE_PMD_MEMIF=y
 CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y
 CONFIG_RTE_LIBRTE_PMD_TAP=y
 CONFIG_RTE_LIBRTE_AVP_PMD=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index c0386feb9..0feab5241 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -32,6 +32,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
 DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
 DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_MVNETA_PMD) += mvneta
diff --git a/drivers/net/memif/Makefile b/drivers/net/memif/Makefile
new file mode 100644
index 000000000..a82448423
--- /dev/null
+++ b/drivers/net/memif/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_memif.a
+
+EXPORT_MAP := rte_pmd_memif_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += -I$(SRCDIR)
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-pointer-arith
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vdev
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += rte_eth_memif.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif_socket.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/memif/memif.h b/drivers/net/memif/memif.h
new file mode 100644
index 000000000..b2e993ac7
--- /dev/null
+++ b/drivers/net/memif/memif.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _MEMIF_H_
+#define _MEMIF_H_
+
+#ifndef MEMIF_CACHELINE_SIZE
+#define MEMIF_CACHELINE_SIZE 64
+#endif
+
+#define MEMIF_COOKIE		0x3E31F20
+#define MEMIF_VERSION_MAJOR	2
+#define MEMIF_VERSION_MINOR	0
+#define MEMIF_VERSION		((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR)
+
+/*
+ *  Type definitions
+ */
+
+typedef enum memif_msg_type
+{
+  MEMIF_MSG_TYPE_NONE = 0,
+  MEMIF_MSG_TYPE_ACK = 1,
+  MEMIF_MSG_TYPE_HELLO = 2,
+  MEMIF_MSG_TYPE_INIT = 3,
+  MEMIF_MSG_TYPE_ADD_REGION = 4,
+  MEMIF_MSG_TYPE_ADD_RING = 5,
+  MEMIF_MSG_TYPE_CONNECT = 6,
+  MEMIF_MSG_TYPE_CONNECTED = 7,
+  MEMIF_MSG_TYPE_DISCONNECT = 8,
+} memif_msg_type_t;
+
+typedef enum
+{
+  MEMIF_RING_S2M = 0,
+  MEMIF_RING_M2S = 1
+} memif_ring_type_t;
+
+typedef enum
+{
+  MEMIF_INTERFACE_MODE_ETHERNET = 0,
+  MEMIF_INTERFACE_MODE_IP = 1,
+  MEMIF_INTERFACE_MODE_PUNT_INJECT = 2,
+} memif_interface_mode_t;
+
+typedef uint16_t memif_region_index_t;
+typedef uint32_t memif_region_offset_t;
+typedef uint64_t memif_region_size_t;
+typedef uint16_t memif_ring_index_t;
+typedef uint32_t memif_interface_id_t;
+typedef uint16_t memif_version_t;
+typedef uint8_t memif_log2_ring_size_t;
+
+/*
+ *  Socket messages
+ */
+
+typedef struct __attribute__ ((packed))
+{
+  uint8_t name[32];
+  memif_version_t min_version;
+  memif_version_t max_version;
+  memif_region_index_t max_region;
+  memif_ring_index_t max_m2s_ring;
+  memif_ring_index_t max_s2m_ring;
+  memif_log2_ring_size_t max_log2_ring_size;
+} memif_msg_hello_t;
+
+typedef struct __attribute__ ((packed))
+{
+  memif_version_t version;
+  memif_interface_id_t id;
+  memif_interface_mode_t mode:8;
+  uint8_t secret[24];
+  uint8_t name[32];
+} memif_msg_init_t;
+
+typedef struct __attribute__ ((packed))
+{
+  memif_region_index_t index;
+  memif_region_size_t size;
+} memif_msg_add_region_t;
+
+typedef struct __attribute__ ((packed))
+{
+  uint16_t flags;
+#define MEMIF_MSG_ADD_RING_FLAG_S2M	(1 << 0)
+  memif_ring_index_t index;
+  memif_region_index_t region;
+  memif_region_offset_t offset;
+  memif_log2_ring_size_t log2_ring_size;
+  uint16_t private_hdr_size;	/* used for private metadata */
+} memif_msg_add_ring_t;
+
+typedef struct __attribute__ ((packed))
+{
+  uint8_t if_name[32];
+} memif_msg_connect_t;
+
+typedef struct __attribute__ ((packed))
+{
+  uint8_t if_name[32];
+} memif_msg_connected_t;
+
+typedef struct __attribute__ ((packed))
+{
+  uint32_t code;
+  uint8_t string[96];
+} memif_msg_disconnect_t;
+
+typedef struct __attribute__ ((packed, aligned (128)))
+{
+  memif_msg_type_t type:16;
+  union
+  {
+    memif_msg_hello_t hello;
+    memif_msg_init_t init;
+    memif_msg_add_region_t add_region;
+    memif_msg_add_ring_t add_ring;
+    memif_msg_connect_t connect;
+    memif_msg_connected_t connected;
+    memif_msg_disconnect_t disconnect;
+  };
+} memif_msg_t;
+
+_Static_assert (sizeof (memif_msg_t) == 128,
+		"Size of memif_msg_t must be 128");
+
+/*
+ *  Ring and Descriptor Layout
+ */
+
+typedef struct __attribute__ ((packed))
+{
+  uint16_t flags;
+#define MEMIF_DESC_FLAG_NEXT (1 << 0)
+  memif_region_index_t region;
+  uint32_t length;
+  memif_region_offset_t offset;
+  uint32_t metadata;
+} memif_desc_t;
+
+_Static_assert (sizeof (memif_desc_t) == 16,
+		"Size of memif_dsct_t must be 16 bytes");
+
+#define MEMIF_CACHELINE_ALIGN_MARK(mark) \
+  uint8_t mark[0] __attribute__((aligned(MEMIF_CACHELINE_SIZE)))
+
+typedef struct
+{
+  MEMIF_CACHELINE_ALIGN_MARK (cacheline0);
+  uint32_t cookie;
+  uint16_t flags;
+#define MEMIF_RING_FLAG_MASK_INT 1
+  volatile uint16_t head;
+    MEMIF_CACHELINE_ALIGN_MARK (cacheline1);
+  volatile uint16_t tail;
+    MEMIF_CACHELINE_ALIGN_MARK (cacheline2);
+  memif_desc_t desc[0];
+} memif_ring_t;
+
+#endif /* _MEMIF_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
new file mode 100644
index 000000000..718386ae4
--- /dev/null
+++ b/drivers/net/memif/memif_socket.c
@@ -0,0 +1,1070 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include <rte_version.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_bus_vdev.h>
+#include <rte_hash.h>
+#include <rte_jhash.h>
+
+#include <rte_eth_memif.h>
+#include <memif_socket.h>
+
+static void memif_intr_handler(void *arg);
+
+static inline ssize_t
+memif_msg_send(int fd, memif_msg_t *msg, int afd)
+{
+	struct msghdr mh = { 0 };
+	struct iovec iov[1];
+	char ctl[CMSG_SPACE (sizeof (int))];
+
+	iov[0].iov_base = (void *) msg;
+	iov[0].iov_len = sizeof (memif_msg_t);
+	mh.msg_iov = iov;
+	mh.msg_iovlen = 1;
+
+	if (afd > 0) {
+		struct cmsghdr *cmsg;
+		memset (&ctl, 0, sizeof (ctl));
+		mh.msg_control = ctl;
+		mh.msg_controllen = sizeof (ctl);
+		cmsg = CMSG_FIRSTHDR (&mh);
+		cmsg->cmsg_len = CMSG_LEN (sizeof (int));
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		rte_memcpy (CMSG_DATA (cmsg), &afd, sizeof (int));
+	}
+
+	return sendmsg(fd, &mh, 0);
+}
+
+static inline int
+memif_msg_send_from_queue(struct memif_control_channel *cc)
+{
+	ssize_t size;
+	int ret = 0;
+	struct memif_msg_queue_elt *e;
+	e = TAILQ_FIRST(&cc->msg_queue);
+	if (e == NULL)
+		return 0;
+
+	size = memif_msg_send(cc->intr_handle.fd, &e->msg, e->fd);
+	if (size != sizeof(memif_msg_t)) {
+		MIF_LOG(ERR, "sendmsg fail: %s.", strerror(errno));
+		ret = -1;
+	} else {
+		MIF_LOG(DEBUG, "%s: Sent msg type %u.", (cc->pmd != NULL) ? rte_vdev_device_name(cc->pmd->vdev) : "memif_driver", e->msg.type);
+	}
+	TAILQ_REMOVE(&cc->msg_queue, e, next);
+	rte_free(e);
+
+	return ret;
+}
+
+static inline struct memif_msg_queue_elt *
+memif_msg_enq(struct memif_control_channel *cc)
+{
+	struct memif_msg_queue_elt *e = rte_zmalloc("memif_msg",
+					sizeof(struct memif_msg_queue_elt), 0);
+	if (e == NULL) {
+		MIF_LOG(ERR, "Failed to allocate control message.");
+		return NULL;
+	}
+
+	e->fd = -1;
+	TAILQ_INSERT_TAIL(&cc->msg_queue, e, next);
+
+	return e;
+}
+
+void
+memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason, int err_code)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(cc);
+	if (e == NULL) {
+		MIF_LOG(WARNING, "%s: Failed to enqueue disconnect message.", (cc->pmd != NULL) ? rte_vdev_device_name(cc->pmd->vdev) : "memif_driver");
+		return;
+	}
+
+	memif_msg_disconnect_t *d = &e->msg.disconnect;
+
+	e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
+	d->code = err_code;
+
+	if (reason != NULL) {
+		strncpy((char *) d->string, reason, strlen(reason));
+		if (cc->pmd != NULL) {
+			strncpy(cc->pmd->local_disc_string, reason, strlen(reason));
+		}
+	}
+}
+
+static int
+memif_msg_enq_hello(struct memif_control_channel *cc)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_hello_t *h = &e->msg.hello;
+
+	e->msg.type = MEMIF_MSG_TYPE_HELLO;
+	h->min_version = MEMIF_VERSION;
+	h->max_version = MEMIF_VERSION;
+	h->max_s2m_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
+	h->max_m2s_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
+	h->max_region = ETH_MEMIF_MAX_REGION_IDX;
+	h->max_log2_ring_size = ETH_MEMIF_MAX_LOG2_RING_SIZE;
+
+	strncpy((char *) h->name, rte_version(), strlen(rte_version()));
+
+	return 0;
+}
+
+static int
+memif_msg_receive_hello(struct pmd_internals *pmd, memif_msg_t *msg)
+{
+	memif_msg_hello_t *h = &msg->hello;
+
+	if (h->min_version > MEMIF_VERSION || h->max_version < MEMIF_VERSION) {
+		memif_msg_enq_disconnect(pmd->cc, "Incompatible memif version", 0);
+		return -1;
+	}
+
+	/* Set parameters for active connection */
+	pmd->run.num_s2m_rings = memif_min(h->max_s2m_ring + 1,
+						pmd->cfg.num_s2m_rings);
+	pmd->run.num_m2s_rings = memif_min(h->max_m2s_ring + 1,
+						pmd->cfg.num_m2s_rings);
+	pmd->run.log2_ring_size = memif_min(h->max_log2_ring_size,
+						pmd->cfg.log2_ring_size);
+	pmd->run.buffer_size = pmd->cfg.buffer_size;
+
+	strncpy(pmd->remote_name, (char *)h->name,
+		strlen((char *)h->name));
+
+	MIF_LOG(DEBUG, "%s: Connecting to %s.",
+		rte_vdev_device_name(pmd->vdev), pmd->remote_name);
+
+	return 0;
+}
+
+static int
+memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t *msg)
+{
+	memif_msg_init_t *i = &msg->init;
+	struct memif_socket_pmd_list_elt *elt;
+	struct pmd_internals *pmd;
+
+	if (i->version != MEMIF_VERSION) {
+		memif_msg_enq_disconnect(cc, "Incompatible memif version", 0);
+		return -1;
+	}
+
+	if (cc->socket == NULL) {
+		memif_msg_enq_disconnect(cc, "Device error", 0);
+		return -1;
+	}
+
+	/* Find device with requested ID */
+	TAILQ_FOREACH(elt, &cc->socket->pmd_queue, next) {
+		pmd = elt->pmd;
+		if (((pmd->flags & ETH_MEMIF_FLAG_DISABLED) == 0) && (pmd->id == i->id)) {
+			/* assign control channel to device */
+			cc->pmd = pmd;
+			pmd->cc = cc;
+
+			if (i->mode != MEMIF_INTERFACE_MODE_ETHERNET) {
+				memif_msg_enq_disconnect(pmd->cc, "Only ethernet mode supported", 0);
+				return -1;
+			}
+
+			if (pmd->flags && (ETH_MEMIF_FLAG_CONNECTING |
+					ETH_MEMIF_FLAG_CONNECTED)) {
+				memif_msg_enq_disconnect(pmd->cc, "Already connected", 0);
+				return -1;
+			}
+			strncpy(pmd->remote_name, (char *)i->name,
+				strlen((char *)i->name));
+
+			if (*pmd->secret != '\0') {
+				if (*i->secret == '\0') {
+					memif_msg_enq_disconnect(pmd->cc, "Secret required", 0);
+					return -1;
+				}
+				if (strcmp(pmd->secret, (char *) i->secret) != 0) {
+					memif_msg_enq_disconnect(pmd->cc, "Incorrect secret", 0);
+					return -1;
+				}
+			}
+
+			pmd->flags |= ETH_MEMIF_FLAG_CONNECTING;
+			return 0;
+		}
+	}
+
+	/* ID not found on this socket */
+	MIF_LOG(DEBUG, "ID %u not found.", i->id);
+	memif_msg_enq_disconnect(cc, "ID not found", 0);
+	return -1;
+}
+
+static int
+memif_msg_receive_add_region(struct pmd_internals *pmd, memif_msg_t *msg,
+			     int fd)
+{
+	memif_msg_add_region_t *ar = &msg->add_region;
+
+	if (fd < 0) {
+		memif_msg_enq_disconnect(pmd->cc, "Missing region fd", 0);
+		return -1;
+	}
+
+	struct memif_region *mr;
+
+	if (ar->index > ETH_MEMIF_MAX_REGION_IDX) {
+		memif_msg_enq_disconnect(pmd->cc, "Invalid region index", 0);
+		return -1;
+	}
+
+	mr = rte_realloc(pmd->regions, sizeof(struct memif_region) *
+		(ar->index + 1), 0);
+	if (mr == NULL) {
+		memif_msg_enq_disconnect(pmd->cc, "Device error", 0);
+		return -1;
+	}
+
+	pmd->regions = mr;
+	pmd->regions[ar->index].fd = fd;
+	pmd->regions[ar->index].region_size = ar->size;
+	pmd->regions[ar->index].addr = NULL;
+	pmd->regions_num++;
+
+	return 0;
+}
+
+static int
+memif_msg_receive_add_ring(struct pmd_internals *pmd, memif_msg_t *msg,
+			   int fd)
+{
+	memif_msg_add_ring_t *ar = &msg->add_ring;
+
+	if (fd < 0) {
+		memif_msg_enq_disconnect(pmd->cc, "Missing interrupt fd", 0);
+		return -1;
+	}
+
+	struct memif_queue *mq;
+
+	/* check if we have enough queues */
+	if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) {
+		if (ar->index >= pmd->cfg.num_s2m_rings) {
+			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0);
+			return -1;
+		}
+		pmd->run.num_s2m_rings++;
+	} else {
+		if (ar->index >= pmd->cfg.num_m2s_rings) {
+			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0);
+			return -1;
+		}
+		pmd->run.num_m2s_rings++;
+	}
+
+	mq = (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) ?
+		&pmd->rx_queues[ar->index] : &pmd->tx_queues[ar->index];
+
+	mq->intr_handle.fd = fd;
+	mq->log2_ring_size = ar->log2_ring_size;
+	mq->region = ar->region;
+	mq->offset = ar->offset;
+
+	return 0;
+}
+
+static int
+memif_msg_receive_connect(struct pmd_internals *pmd, memif_msg_t *msg)
+{
+	memif_msg_connect_t *c = &msg->connect;
+	int ret;
+
+	ret = memif_connect(pmd);
+	if (ret < 0)
+		return ret;
+
+	strncpy(pmd->remote_if_name, (char *)c->if_name,
+		strlen((char *)c->if_name));
+	MIF_LOG(INFO, "%s: Remote interface %s connected.",
+		rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
+
+	return 0;
+}
+
+static int
+memif_msg_receive_connected(struct pmd_internals *pmd, memif_msg_t *msg)
+{
+	memif_msg_connected_t *c = &msg->connected;
+	int ret;
+
+	ret = memif_connect(pmd);
+	if (ret < 0)
+		return ret;
+
+	strncpy (pmd->remote_if_name, (char *) c->if_name,
+		strlen ((char *) c->if_name));
+	MIF_LOG(INFO, "%s: Remote interface %s connected.",
+		rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
+
+	return 0;
+}
+
+static int
+memif_msg_receive_disconnect(struct pmd_internals *pmd, memif_msg_t *msg)
+{
+	memif_msg_disconnect_t *d = &msg->disconnect;
+
+	memset(pmd->remote_disc_string, 0,
+		sizeof(pmd->remote_disc_string));
+	strncpy(pmd->remote_disc_string, (char *)d->string,
+		strlen((char *)d->string));
+
+	MIF_LOG(INFO, "%s: Disconnect received: %s",
+		rte_vdev_device_name(pmd->vdev), pmd->remote_disc_string);
+
+	memset(pmd->local_disc_string, 0, 96);
+	memif_disconnect(rte_eth_dev_allocated(
+		rte_vdev_device_name(pmd->vdev)));
+	return 0;
+}
+
+static int
+memif_msg_enq_ack(struct pmd_internals *pmd)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	e->msg.type = MEMIF_MSG_TYPE_ACK;
+
+	return 0;
+}
+
+static int
+memif_msg_enq_init(struct pmd_internals *pmd)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_init_t *i = &e->msg.init;
+
+	e->msg.type = MEMIF_MSG_TYPE_INIT;
+	i->version = MEMIF_VERSION;
+	i->id = pmd->id;
+	i->mode = MEMIF_INTERFACE_MODE_ETHERNET;
+
+	strncpy((char *) i->name, rte_version(), strlen(rte_version()));
+
+	if (pmd->secret) {
+		strncpy((char *) i->secret, pmd->secret,
+			sizeof(i->secret) - 1);
+	}
+
+	return 0;
+}
+
+static int
+memif_msg_enq_add_region(struct pmd_internals *pmd, uint8_t idx)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_add_region_t *ar = &e->msg.add_region;
+	struct memif_region *mr = &pmd->regions[idx];
+
+	e->msg.type = MEMIF_MSG_TYPE_ADD_REGION;
+	e->fd = mr->fd;
+	ar->index = idx;
+	ar->size = mr->region_size;
+
+	return 0;
+}
+
+static int
+memif_msg_enq_add_ring(struct pmd_internals *pmd, uint8_t idx,
+		       memif_ring_type_t type)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_add_ring_t *ar = &e->msg.add_ring;
+	struct memif_queue *mq;
+
+	mq = (type == MEMIF_RING_S2M) ? &pmd->tx_queues[idx] :
+		&pmd->rx_queues[idx];
+
+	e->msg.type = MEMIF_MSG_TYPE_ADD_RING;
+	e->fd = mq->intr_handle.fd;
+	ar->index = idx;
+	ar->offset = mq->offset;
+	ar->region = mq->region;
+	ar->log2_ring_size = mq->log2_ring_size;
+	ar->flags = (type == MEMIF_RING_S2M) ?
+		MEMIF_MSG_ADD_RING_FLAG_S2M : 0;
+	ar->private_hdr_size = 0;
+
+	return 0;
+}
+
+static int
+memif_msg_enq_connect(struct pmd_internals *pmd)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_connect_t *c = &e->msg.connect;
+	const char *name = rte_vdev_device_name(pmd->vdev);
+
+	e->msg.type = MEMIF_MSG_TYPE_CONNECT;
+	strncpy((char *) c->if_name, name, strlen(name));
+
+	return 0;
+}
+
+static int
+memif_msg_enq_connected(struct pmd_internals *pmd)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_connected_t *c = &e->msg.connected;
+
+	const char *name = rte_vdev_device_name(pmd->vdev);
+
+	e->msg.type = MEMIF_MSG_TYPE_CONNECTED;
+	strncpy((char *) c->if_name, name, strlen(name));
+
+	return 0;
+}
+
+static void
+memif_intr_unregister_handler(struct rte_intr_handle *intr_handle,
+			      void *arg)
+{
+	struct memif_msg_queue_elt *elt;
+	struct memif_control_channel *cc = arg;
+	/* close control channel fd */
+	close(intr_handle->fd);
+	/* clear message queue */
+	while((elt = TAILQ_FIRST(&cc->msg_queue)) != NULL) {
+		TAILQ_REMOVE(&cc->msg_queue, elt, next);
+		free(elt);
+	}
+	/* free control channel */
+	rte_free(cc);
+}
+
+void
+memif_disconnect(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_msg_queue_elt *elt;
+	int i;
+	int ret;
+
+	if (pmd->cc != NULL) {
+		/* Clear control message queue (except disconnect message if any). */
+		while((elt = TAILQ_FIRST(&pmd->cc->msg_queue)) != NULL) {
+			if (elt->msg.type != MEMIF_MSG_TYPE_DISCONNECT) {
+				TAILQ_REMOVE(&pmd->cc->msg_queue, elt, next);
+				free(elt);
+			}
+		}
+		/* send disconnect message (if there is any in queue) */
+		memif_msg_send_from_queue(pmd->cc);
+
+		/* at this point, there should be no more messages in queue */
+		if (TAILQ_FIRST(&pmd->cc->msg_queue) != NULL) {
+			MIF_LOG(WARNING, "%s: Unexpected message(s) in message queue.", rte_vdev_device_name(pmd->vdev));
+		}
+
+		if (pmd->cc->intr_handle.fd > 0) {
+			ret = rte_intr_callback_unregister(
+				&pmd->cc->intr_handle, memif_intr_handler, pmd->cc);
+			/*
+			 * If callback is active (disconnecting based on
+			 * received control message).
+			 */
+			if (ret == -EAGAIN) {
+				ret = rte_intr_callback_unregister_pending(
+					&pmd->cc->intr_handle,
+					memif_intr_handler, pmd->cc,
+					memif_intr_unregister_handler);
+			} else if (ret > 0) {
+				close(pmd->cc->intr_handle.fd);
+				rte_free(pmd->cc);
+			}
+			if (ret <= 0)
+				MIF_LOG(WARNING, "%s: Failed to unregister control channel callback.", rte_vdev_device_name(pmd->vdev));
+		}
+	}
+
+	/* unconfig interrupts */
+	struct memif_queue *mq;
+	for (i = 0; i < pmd->cfg.num_s2m_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+			&pmd->tx_queues[i] : &pmd->rx_queues[i];
+		if (mq->intr_handle.fd > 0) {
+			rte_intr_disable(&mq->intr_handle);
+			close(mq->intr_handle.fd);
+			mq->intr_handle.fd = -1;
+		}
+		mq->ring = NULL;
+	}
+	for (i = 0; i < pmd->cfg.num_m2s_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+			&pmd->rx_queues[i] : &pmd->tx_queues[i];
+		if (mq->intr_handle.fd > 0) {
+			rte_intr_disable(&mq->intr_handle);
+			close(mq->intr_handle.fd);
+			mq->intr_handle.fd = -1;
+		}
+		mq->ring = NULL;
+	}
+
+	memif_free_regions(pmd);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
+	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTED;
+	MIF_LOG(DEBUG, "%s: Disconnected.", rte_vdev_device_name(pmd->vdev));
+}
+
+static int
+memif_msg_receive(struct memif_control_channel *cc)
+{
+	char ctl[CMSG_SPACE(sizeof(int)) +
+		CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+	struct msghdr mh = { 0 };
+	struct iovec iov[1];
+	memif_msg_t msg = { 0 };
+	ssize_t size;
+	int ret = 0;
+	struct ucred *cr __rte_unused;
+	cr = 0;
+	struct cmsghdr *cmsg;
+	int afd = -1;
+	int i;
+
+	iov[0].iov_base = (void *) &msg;
+	iov[0].iov_len = sizeof(memif_msg_t);
+	mh.msg_iov = iov;
+	mh.msg_iovlen = 1;
+	mh.msg_control = ctl;
+	mh.msg_controllen = sizeof(ctl);
+
+	size = recvmsg(cc->intr_handle.fd, &mh, 0);
+	if (size != sizeof(memif_msg_t)) {
+		MIF_LOG(DEBUG, "Invalid message size.");
+		memif_msg_enq_disconnect(cc, "Invalid message size", 0);
+		return -1;
+	}
+	MIF_LOG(DEBUG, "Received msg type: %u.", msg.type);
+
+	cmsg = CMSG_FIRSTHDR(&mh);
+	while (cmsg) {
+		if (cmsg->cmsg_level == SOL_SOCKET) {
+			if (cmsg->cmsg_type == SCM_CREDENTIALS) {
+				cr = (struct ucred *)CMSG_DATA(cmsg);
+			}
+			else if (cmsg->cmsg_type == SCM_RIGHTS) {
+				afd = *(int *)CMSG_DATA(cmsg);
+			}
+		}
+		cmsg = CMSG_NXTHDR(&mh,cmsg);
+	}
+
+	if ((cc->pmd == NULL) && msg.type != MEMIF_MSG_TYPE_INIT) {
+		MIF_LOG(DEBUG, "Unexpected message.");
+		memif_msg_enq_disconnect(cc, "Unexpected message", 0);
+		return -1;
+	}
+
+	/* get device from hash data */
+	switch(msg.type) {
+	case MEMIF_MSG_TYPE_ACK:
+		break;
+	case MEMIF_MSG_TYPE_HELLO:
+		ret = memif_msg_receive_hello(cc->pmd, &msg);
+		if (ret < 0)
+			goto exit;
+		ret = memif_init_regions_and_queues(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_init(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		for (i = 0; i < cc->pmd->regions_num; i++) {
+			ret = memif_msg_enq_add_region(cc->pmd, i);
+			if (ret < 0)
+				goto exit;
+		}
+		for (i = 0; i < cc->pmd->run.num_s2m_rings; i++) {
+			ret = memif_msg_enq_add_ring(cc->pmd, i,
+				MEMIF_RING_S2M);
+			if (ret < 0)
+				goto exit;
+		}
+		for (i = 0; i < cc->pmd->run.num_m2s_rings; i++) {
+			ret = memif_msg_enq_add_ring(cc->pmd, i,
+				MEMIF_RING_M2S);
+			if (ret < 0)
+				goto exit;
+		}
+		ret = memif_msg_enq_connect(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_INIT:
+		ret = memif_msg_receive_init(cc, &msg);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_ack(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_ADD_REGION:
+		ret = memif_msg_receive_add_region(cc->pmd, &msg, afd);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_ack(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_ADD_RING:
+		ret = memif_msg_receive_add_ring(cc->pmd, &msg,	afd);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_ack(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_CONNECT:
+		ret = memif_msg_receive_connect(cc->pmd, &msg);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_connected(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_CONNECTED:
+		ret = memif_msg_receive_connected(cc->pmd, &msg);
+		break;
+	case MEMIF_MSG_TYPE_DISCONNECT:
+		ret = memif_msg_receive_disconnect(cc->pmd, &msg);
+		if (ret < 0)
+			goto exit;
+		break;
+	default:
+		memif_msg_enq_disconnect(cc, "Unknown message type", 0);
+		ret = -1;
+		goto exit;
+	}
+
+exit:
+	return ret;
+}
+
+static void
+memif_intr_handler(void *arg)
+{
+	struct memif_control_channel *cc = arg;
+	struct rte_eth_dev *dev;
+	int ret;
+
+	ret = memif_msg_receive(cc);
+	/* if driver failed to assign device */
+	if (cc->pmd == NULL) {
+		ret = rte_intr_callback_unregister_pending(&cc->intr_handle,
+			memif_intr_handler, cc,
+			memif_intr_unregister_handler);
+		if (ret < 0)
+			MIF_LOG(WARNING, "Failed to unregister control channel callback.");
+		return;
+	}
+	/* if memif_msg_receive failed */
+	if (ret < 0)
+		goto disconnect;
+
+	ret = memif_msg_send_from_queue(cc);
+	if (ret < 0)
+		goto disconnect;
+
+	return;
+
+disconnect:
+	dev = rte_eth_dev_allocated(rte_vdev_device_name(cc->pmd->vdev));
+	if (dev == NULL) {
+		MIF_LOG(WARNING, "%s: eth dev not allocated",
+			rte_vdev_device_name(cc->pmd->vdev));
+		return;
+	}
+	memif_disconnect(dev);
+}
+
+static void
+memif_listener_handler(void *arg)
+{
+	struct memif_socket *socket = arg;
+	int sockfd;
+	int addr_len;
+	struct sockaddr_un client;
+	struct memif_control_channel *cc;
+	int ret;
+
+	addr_len = sizeof(client);
+	sockfd = accept(socket->intr_handle.fd, (struct sockaddr *)&client,
+			(socklen_t *)&addr_len);
+	if (sockfd < 0) {
+		MIF_LOG(ERR, "Failed to accept connection request on socket fd %d",
+			socket->intr_handle.fd);
+		return;
+	}
+
+	MIF_LOG(DEBUG, "%s: Connection request accepted.",
+		socket->filename);
+
+	cc = rte_zmalloc("memif-cc",
+		sizeof(struct memif_control_channel), 0);
+	if (cc == NULL) {
+		MIF_LOG(ERR, "Failed to allocate control channel.");
+		goto error;
+	}
+
+	cc->intr_handle.fd = sockfd;
+	cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
+	cc->socket = socket;
+	cc->pmd = NULL;
+	TAILQ_INIT(&cc->msg_queue);
+
+	ret = rte_intr_callback_register(&cc->intr_handle, memif_intr_handler, cc);
+	if (ret < 0) {
+		MIF_LOG(ERR, "Failed to register control channel callback.");
+		goto error;
+	}
+
+	ret = memif_msg_enq_hello(cc);
+	if (ret < 0) {
+		MIF_LOG(ERR, "Failed to enqueue hello message.");
+		goto error;
+	}
+	ret = memif_msg_send_from_queue(cc);
+	if (ret < 0)
+		goto error;
+
+	return;
+
+error:
+	if (sockfd > 0) {
+		close(sockfd);
+		sockfd = -1;
+	}
+	if (cc != NULL) {
+		rte_free(cc);
+		cc = NULL;
+	}
+}
+
+static inline struct memif_socket *
+memif_socket_create(struct pmd_internals *pmd, char *key, uint8_t listener)
+{
+	struct memif_socket *sock;
+	struct sockaddr_un un;
+	int sockfd;
+	int ret;
+	int on = 1;
+
+	sock = rte_zmalloc("memif-socket", sizeof(struct memif_socket), 0);
+	if (sock == NULL) {
+		MIF_LOG(ERR, "Failed to allocate memory for memif socket");
+		return NULL;
+	}
+
+	sock->listener = listener;
+	rte_memcpy(sock->filename, key, 256);
+	TAILQ_INIT(&sock->pmd_queue);
+
+	if (listener != 0) {
+		sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+		if (sockfd < 0)
+			goto error;;
+
+		un.sun_family = AF_UNIX;
+		strncpy((char *) un.sun_path, (char *) sock->filename,
+			sizeof(un.sun_path) - 1);
+
+		ret = setsockopt(sockfd, SOL_SOCKET, SO_PASSCRED, &on,
+			sizeof(on));
+		if (ret < 0) {
+			goto error;
+		}
+		ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
+		if (ret < 0) {
+			goto error;
+		}
+		ret = listen(sockfd, 1);
+		if (ret < 0) {
+			goto error;
+		}
+
+		MIF_LOG(DEBUG, "%s: Memif listener socket %s created.",
+			rte_vdev_device_name(pmd->vdev), sock->filename);
+
+		sock->intr_handle.fd = sockfd;
+		sock->intr_handle.type = RTE_INTR_HANDLE_EXT;
+		ret = rte_intr_callback_register(&sock->intr_handle,
+			memif_listener_handler, sock);
+		if (ret < 0) {
+			MIF_LOG(ERR, "%s: Failed to register interrupt "
+				"callback for listener socket",
+				rte_vdev_device_name(pmd->vdev));
+			return NULL;
+		}
+	}
+
+	return sock;
+
+error:
+	MIF_LOG(ERR, "%s: Failed to setup socket %s: %s",
+		rte_vdev_device_name(pmd->vdev), key, strerror(errno));
+	if (sock != NULL)
+		rte_free(sock);
+	return NULL;
+}
+
+static inline struct rte_hash *
+memif_create_socket_hash(void)
+{
+	struct rte_hash_parameters params = { 0 };
+	params.name = MEMIF_SOCKET_HASH_NAME;
+	params.entries = 256;
+	params.key_len = 256;
+	params.hash_func = rte_jhash;
+	params.hash_func_init_val = 0;
+	return rte_hash_create(&params);
+}
+
+int
+memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_socket *socket = NULL;
+	struct memif_socket_pmd_list_elt *elt;
+	int ret;
+	char key[256];
+
+	struct rte_hash *hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
+	if (hash == NULL) {
+		hash = memif_create_socket_hash();
+		if (hash == NULL) {
+			MIF_LOG(ERR, "Failed to create memif socket hash.");
+			return -1;
+		}
+	}
+
+	memset(key, 0, 256);
+	rte_memcpy(key, socket_filename, strlen(socket_filename));
+	ret = rte_hash_lookup_data(hash, key, (void **)&socket);
+	if (ret < 0) {
+		socket = memif_socket_create(pmd, key,
+			(pmd->role == MEMIF_ROLE_SLAVE) ? 0 : 1);
+		if (socket == NULL){
+			return -1;
+		}
+		ret = rte_hash_add_key_data(hash, key, socket);
+		if (ret < 0) {
+			MIF_LOG(ERR, "Failed to add socket to socket hash.");
+			return ret;
+		}
+	}
+	pmd->socket_filename = socket->filename;
+
+	if ((socket->listener != 0) && (pmd->role == MEMIF_ROLE_SLAVE)) {
+		MIF_LOG(ERR, "Socket is a listener.");
+		return -1;
+	}
+	else if ((socket->listener == 0) &&
+			(pmd->role == MEMIF_ROLE_MASTER)) {
+		MIF_LOG(ERR, "Socket is not a listener.");
+		return -1;
+	}
+
+	TAILQ_FOREACH(elt, &socket->pmd_queue, next) {
+		if (elt->pmd->id == pmd->id) {
+			MIF_LOG(ERR, "Memif device with id %d already "
+				"exists on socket %s",
+				pmd->id, socket->filename);
+			return -1;
+		}
+	}
+
+	elt = rte_malloc("pmd-queue", sizeof(struct memif_socket_pmd_list_elt), 0);
+	if (elt == NULL) {
+		MIF_LOG(ERR, "%s: Failed to add device to socket device list.",
+			rte_vdev_device_name(pmd->vdev));
+		return -1;
+	}
+	elt->pmd = pmd;
+	TAILQ_INSERT_TAIL(&socket->pmd_queue, elt, next);
+
+	return 0;
+}
+
+void
+memif_socket_remove_device(struct pmd_internals *pmd)
+{
+	struct memif_socket *socket = NULL;
+	struct memif_socket_pmd_list_elt *elt, *next;
+
+	struct rte_hash *hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
+	if (hash == NULL) {
+		return;
+	}
+
+	if (rte_hash_lookup_data(hash, pmd->socket_filename, (void **)&socket) < 0)
+		return;
+
+	for (elt = TAILQ_FIRST(&socket->pmd_queue); elt != NULL; elt = next) {
+		next = TAILQ_NEXT(elt, next);
+		if (elt->pmd == pmd) {
+			TAILQ_REMOVE(&socket->pmd_queue, elt, next);
+			free(elt);
+			pmd->socket_filename = NULL;
+		}
+	}
+
+	/* remove socket, if this was the last device using it */
+	if (TAILQ_EMPTY(&socket->pmd_queue)) {
+		rte_hash_del_key(hash, socket->filename);
+		if (socket->listener) {
+			/* remove listener socket file,
+			 * so we can create new one later.
+			 */
+			remove(socket->filename);
+		}
+		rte_free(socket);
+	}
+}
+
+int
+memif_connect_master(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	if ((pmd->rx_queues == NULL) || (pmd->tx_queues == NULL) ||
+			(pmd->socket_filename == NULL)) {
+		MIF_LOG(ERR, "%s: Device not configured!",
+			rte_vdev_device_name(pmd->vdev));
+		return -1;
+	}
+	memset(pmd->local_disc_string, 0, 96);
+	memset(pmd->remote_disc_string, 0, 96);
+	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
+	return 0;
+}
+
+int
+memif_connect_slave(struct rte_eth_dev *dev)
+{
+	int sockfd;
+	int ret;
+	struct sockaddr_un sun;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	if ((pmd->rx_queues == NULL) || (pmd->tx_queues == NULL) ||
+			(pmd->socket_filename == NULL)) {
+		MIF_LOG(ERR, "%s: Device not configured!",
+			rte_vdev_device_name(pmd->vdev));
+		return -1;
+	}
+
+	memset(pmd->local_disc_string, 0, 96);
+	memset(pmd->remote_disc_string, 0, 96);
+	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
+
+	sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (sockfd < 0) {
+		MIF_LOG(ERR, "%s: Failed to open socket.", rte_vdev_device_name(pmd->vdev));
+		return -1;
+	}
+
+	sun.sun_family = AF_UNIX;
+
+	strncpy(sun.sun_path, pmd->socket_filename,
+		sizeof (sun.sun_path) - 1);
+
+	ret = connect(sockfd, (struct sockaddr *) &sun,
+		sizeof(struct sockaddr_un));
+	if (ret < 0) {
+		MIF_LOG(ERR, "%s: Failed to connect socket: %s.",
+			rte_vdev_device_name(pmd->vdev),
+			pmd->socket_filename);
+		goto error;
+	}
+
+	MIF_LOG(DEBUG, "%s: Memif socket: %s connected.",
+		rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
+
+	pmd->cc = rte_zmalloc("memif-cc",
+		sizeof(struct memif_control_channel), 0);
+	if (pmd->cc ==NULL) {
+		MIF_LOG(ERR, "%s: Failed to allocate control channel.", rte_vdev_device_name(pmd->vdev));
+		goto error;
+	}
+
+	pmd->cc->intr_handle.fd = sockfd;
+	pmd->cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
+	pmd->cc->socket = NULL;
+	pmd->cc->pmd = pmd;
+	TAILQ_INIT(&pmd->cc->msg_queue);
+
+	ret = rte_intr_callback_register(&pmd->cc->intr_handle,
+		memif_intr_handler, pmd->cc);
+	if (ret < 0) {
+		MIF_LOG(ERR, "%s: Failed to register interrupt callback "
+			"for controll fd", rte_vdev_device_name(pmd->vdev));
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (sockfd > 0) {
+		close(sockfd);
+		sockfd = -1;
+	}
+	if (pmd->cc != NULL) {
+		rte_free(pmd->cc);
+		pmd->cc = NULL;
+	}
+	return -1;
+}
diff --git a/drivers/net/memif/memif_socket.h b/drivers/net/memif/memif_socket.h
new file mode 100644
index 000000000..f48f3e606
--- /dev/null
+++ b/drivers/net/memif/memif_socket.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _MEMIF_SOCKET_H_
+#define _MEMIF_SOCKET_H_
+
+#include <sys/queue.h>
+
+/*
+ * Remove device from socket device list. If no device is left on the socket,
+ * remove the socket as well.
+ */
+void memif_socket_remove_device(struct pmd_internals *pmd);
+
+void memif_msg_enq_disconnect(struct memif_control_channel *cc,
+			      const char *reason, int err_code);
+
+int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename);
+
+void memif_disconnect(struct rte_eth_dev *dev);
+
+/* Allow master to receive connection requests. */
+int memif_connect_master(struct rte_eth_dev *dev);
+
+/* Send connection request. */
+int memif_connect_slave(struct rte_eth_dev *dev);
+
+struct memif_socket_pmd_list_elt {
+	TAILQ_ENTRY(memif_socket_pmd_list_elt) next;
+	struct pmd_internals *pmd;
+};
+
+#define MEMIF_SOCKET_HASH_NAME			"memif-sh"
+struct memif_socket {
+	struct rte_intr_handle intr_handle;
+	uint8_t listener;
+	char filename[256];
+
+	TAILQ_HEAD(, memif_socket_pmd_list_elt) pmd_queue;
+};
+
+/* Control mesage queue. */
+struct memif_msg_queue_elt {
+	TAILQ_ENTRY(memif_msg_queue_elt) next;
+	memif_msg_t msg;
+	int fd;
+};
+
+struct memif_control_channel {
+	struct rte_intr_handle intr_handle;
+	TAILQ_HEAD(, memif_msg_queue_elt) msg_queue;
+	struct memif_socket *socket;
+	struct pmd_internals *pmd;
+};
+
+#endif /* MEMIF_SOCKET_H */
diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
new file mode 100644
index 000000000..ea18394fd
--- /dev/null
+++ b/drivers/net/memif/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+
+if host_machine.system() != 'linux'
+        build = false
+endif
+sources = files('rte_eth_memif.c',
+		'memif_socket.c')
diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c
new file mode 100644
index 000000000..d7b1295a7
--- /dev/null
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -0,0 +1,1159 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/if_ether.h>
+#include <errno.h>
+#include <sys/eventfd.h>
+
+#include <rte_version.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_bus_vdev.h>
+
+#include <rte_eth_memif.h>
+#include <memif_socket.h>
+
+#define ETH_MEMIF_ID_ARG		"id"
+#define ETH_MEMIF_ROLE_ARG		"role"
+#define ETH_MEMIF_BUFFER_SIZE_ARG	"bsize"
+#define ETH_MEMIF_RING_SIZE_ARG		"rsize"
+#define ETH_MEMIF_NRXQ_ARG		"nrxq"
+#define ETH_MEMIF_NTXQ_ARG		"ntxq"
+#define ETH_MEMIF_SOCKET_ARG		"socket"
+#define ETH_MEMIF_MAC_ARG		"mac"
+#define ETH_MEMIF_ZC_ARG		"zero-copy"
+#define ETH_MEMIF_SECRET_ARG		"secret"
+
+static const char* valid_arguments[] = {
+	ETH_MEMIF_ID_ARG,
+	ETH_MEMIF_ROLE_ARG,
+	ETH_MEMIF_BUFFER_SIZE_ARG,
+	ETH_MEMIF_RING_SIZE_ARG,
+	ETH_MEMIF_NRXQ_ARG,
+	ETH_MEMIF_NTXQ_ARG,
+	ETH_MEMIF_SOCKET_ARG,
+	ETH_MEMIF_MAC_ARG,
+	ETH_MEMIF_ZC_ARG,
+	ETH_MEMIF_SECRET_ARG,
+	NULL
+};
+
+static struct rte_vdev_driver pmd_memif_drv;
+
+const char *
+memif_version(void) {
+#define STR_HELP(s)	#s
+#define STR(s)		STR_HELP(s)
+	return ("memif-"STR(MEMIF_VERSION_MAJOR)"."STR(MEMIF_VERSION_MINOR));
+#undef STR
+#undef STR_HELP
+}
+
+static void
+memif_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	dev_info->if_index = pmd->if_index;
+	dev_info->max_mac_addrs= 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
+	dev_info->max_rx_queues = (pmd->role == MEMIF_ROLE_SLAVE) ?
+				  pmd->cfg.num_m2s_rings : pmd->cfg.num_s2m_rings;
+	dev_info->max_tx_queues = (pmd->role == MEMIF_ROLE_SLAVE) ?
+				  pmd->cfg.num_s2m_rings : pmd->cfg.num_m2s_rings;
+	dev_info->min_rx_bufsize = 0;
+}
+
+static inline memif_ring_t *
+memif_get_ring(struct pmd_internals *pmd, memif_ring_type_t type,
+	       uint16_t ring_num)
+{
+	/* rings only in region 0 */
+	void *p = pmd->regions[0].addr;
+	int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
+			(1 << pmd->run.log2_ring_size);
+	p += (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
+
+	return (memif_ring_t *) p;
+}
+
+static inline void *
+memif_get_buffer(struct pmd_internals *pmd, memif_desc_t *d)
+{
+	return (pmd->regions[d->region].addr + d->offset);
+}
+
+static uint16_t
+eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct memif_queue *mq = queue;
+	struct pmd_internals *pmd = mq->pmd;
+	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
+		return 0;
+	memif_ring_t *ring = mq->ring;
+	if (unlikely(ring == NULL))
+		return 0;
+	uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
+	uint16_t n_rx_pkts = 0;
+	uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
+			     RTE_PKTMBUF_HEADROOM;
+	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
+	memif_ring_type_t type = mq->type;
+	memif_desc_t *d0;
+	struct rte_mbuf *mbuf;
+	struct rte_mbuf *mbuf_head = NULL;
+
+	/* consume interrupt */
+	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
+		uint64_t b;
+		ssize_t size __rte_unused;
+		size = read (mq->intr_handle.fd, &b, sizeof (b));
+	}
+
+	ring_size = 1 << mq->log2_ring_size;
+	mask = ring_size - 1;
+
+	cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail;
+	last_slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
+	if (cur_slot == last_slot)
+		goto refill;
+	n_slots = last_slot - cur_slot;
+
+	while (n_slots && n_rx_pkts < nb_pkts) {
+		mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+		if (unlikely(mbuf_head == NULL))
+			goto no_free_bufs;
+		mbuf = mbuf_head;
+		mbuf->port = mq->in_port;
+
+	next_slot:
+		s0 = cur_slot & mask;
+		d0 = &ring->desc[s0];
+
+		src_len = d0->length;
+		dst_off = 0;
+		src_off = 0;
+
+		do {
+			dst_len = mbuf_size - dst_off;
+			if (dst_len == 0) {
+				dst_off = 0;
+				dst_len = mbuf_size + RTE_PKTMBUF_HEADROOM;
+
+				mbuf = rte_pktmbuf_alloc(mq->mempool);
+				if (unlikely(mbuf == NULL))
+					goto no_free_bufs;
+				mbuf->port = mq->in_port;
+				rte_pktmbuf_chain(mbuf_head, mbuf);
+			}
+			cp_len = memif_min(dst_len, src_len);
+
+			rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) += cp_len;
+
+			memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
+			       memif_get_buffer(pmd, d0) + src_off, cp_len);
+
+			mq->n_bytes += cp_len;
+			src_off += cp_len;
+			dst_off += cp_len;
+			src_len -= cp_len;
+		} while (src_len);
+
+		cur_slot++;
+		n_slots--;
+		if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
+			goto next_slot;
+		}
+
+		*bufs++ = mbuf_head;
+		n_rx_pkts++;
+
+	}
+
+no_free_bufs:
+	if (type == MEMIF_RING_S2M) {
+		rte_mb();
+		ring->tail = mq->last_head = cur_slot;
+	} else {
+		mq->last_tail = cur_slot;
+	}
+
+refill:
+	if (type == MEMIF_RING_M2S) {
+		uint16_t head = ring->head;
+		n_slots = ring_size - head + mq->last_tail;
+
+		while (n_slots--) {
+			s0 = head++ & mask;
+			d0 = &ring->desc[s0];
+			d0->length = pmd->run.buffer_size;
+		}
+		rte_mb();
+		ring->head = head;
+	}
+
+	mq->n_pkts += n_rx_pkts;
+	return n_rx_pkts;
+}
+
+static uint16_t
+eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct memif_queue *mq = queue;
+	struct pmd_internals *pmd = mq->pmd;
+	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
+		return 0;
+	memif_ring_t *ring = mq->ring;
+	if (unlikely(ring == NULL))
+		return 0;
+	uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
+	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
+	memif_ring_type_t type = mq->type;
+	memif_desc_t *d0;
+	struct rte_mbuf *mbuf;
+	struct rte_mbuf *mbuf_head;
+
+	ring_size = 1 << mq->log2_ring_size;
+	mask = ring_size - 1;
+
+	n_free = ring->tail - mq->last_tail;
+	mq->last_tail += n_free;
+	slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
+
+	if (type == MEMIF_RING_S2M)
+		n_free = ring_size - ring->head + mq->last_tail;
+	else
+		n_free = ring->head - ring->tail;
+
+	while (n_free && n_tx_pkts < nb_pkts) {
+		mbuf_head = *bufs++;
+		mbuf = mbuf_head;
+
+		saved_slot = slot;
+		d0 = &ring->desc[slot & mask];
+		dst_off = 0;
+		dst_len = (type == MEMIF_RING_S2M) ? pmd->run.buffer_size : d0->length;
+
+	next_in_chain:
+		src_off = 0;
+		src_len = rte_pktmbuf_data_len(mbuf);
+
+		while (src_len) {
+			if (dst_len == 0) {
+				if (n_free) {
+					slot++;
+					n_free--;
+					d0->flags |= MEMIF_DESC_FLAG_NEXT;
+					d0 = &ring->desc[slot & mask];
+					dst_off = 0;
+					dst_len = (type == MEMIF_RING_S2M) ?
+						  pmd->run.buffer_size : d0->length;
+					d0->flags = 0;
+				} else {
+					slot = saved_slot;
+					goto no_free_slots;
+				}
+			}
+			cp_len = memif_min(dst_len, src_len);
+
+			memcpy(memif_get_buffer(pmd, d0) + dst_off,
+			       rte_pktmbuf_mtod_offset(mbuf, void *, src_off), cp_len);
+
+			mq->n_bytes += cp_len;
+			src_off += cp_len;
+			dst_off += cp_len;
+			src_len -= cp_len;
+			dst_len -= cp_len;
+
+			d0->length = dst_off;
+		}
+
+		if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
+			mbuf = mbuf->next;
+			goto next_in_chain;
+		}
+
+		n_tx_pkts++;
+		slot++;
+		n_free--;
+		rte_pktmbuf_free(mbuf_head);
+	}
+
+no_free_slots:
+	rte_mb();
+	if (type == MEMIF_RING_S2M)
+		ring->head = slot;
+	else
+		ring->tail = slot;
+
+	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
+		uint64_t a = 1;
+		ssize_t size = write (mq->intr_handle.fd, &a, sizeof (a));
+		if (unlikely(size < 0)) {
+			MIF_LOG(WARNING, "%s: Failed to send interrupt on qid %ld: %s",
+			rte_vdev_device_name(pmd->vdev), mq - pmd->tx_queues, strerror(errno));
+		}
+	}
+
+	mq->n_err += nb_pkts - n_tx_pkts;
+	mq->n_pkts += n_tx_pkts;
+	return n_tx_pkts;
+}
+
+void
+memif_free_regions(struct pmd_internals *pmd)
+{
+	int i;
+	struct memif_region *r;
+
+	for (i = 0; i < pmd->regions_num; i++) {
+		r = pmd->regions + i;
+		if (r == NULL)
+			return;
+		if (r->addr == NULL)
+			return;
+		munmap(r->addr, r->region_size);
+		if (r->fd > 0) {
+			close(r->fd);
+			r->fd = -1;
+		}
+	}
+	rte_free(pmd->regions);
+}
+
+static int
+memif_alloc_regions(struct pmd_internals *pmd, uint8_t brn)
+{
+	struct memif_region *r;
+	char shm_name[32];
+	int i;
+	int ret = 0;
+
+	r = rte_zmalloc("memif_region", sizeof(struct memif_region) * (brn + 1), 0);
+	if (r == NULL) {
+		MIF_LOG(ERR, "%s: Failed to allocate regions.",
+			rte_vdev_device_name(pmd->vdev));
+		return -ENOMEM;
+	}
+
+	pmd->regions = r;
+	pmd->regions_num = brn + 1;
+
+	/*
+	 * Create shm for every region. Region 0 is reserved for descriptors.
+	 * Other regions contain buffers.
+	*/
+	for (i = 0; i < (brn + 1); i++) {
+		r = &pmd->regions[i];
+
+		r->buffer_offset = (i == 0) ? (pmd->run.num_s2m_rings +
+					pmd->run.num_m2s_rings) *
+					(sizeof (memif_ring_t) +
+					sizeof (memif_desc_t) *
+					(1 << pmd->run.log2_ring_size)) : 0;
+		r->region_size = (i == 0) ? r->buffer_offset :
+					(uint32_t)(pmd->run.buffer_size *
+					(1 << pmd->run.log2_ring_size) *
+					(pmd->run.num_s2m_rings +
+					pmd->run.num_m2s_rings));
+
+		memset(shm_name, 0, sizeof(char) * 32);
+		sprintf(shm_name, "memif region %d", i);
+
+		r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
+		if (r->fd < 0) {
+			MIF_LOG(ERR, "%s: Failed to create shm file: %s.",
+				rte_vdev_device_name(pmd->vdev),
+				strerror(errno));
+			return -1;
+		}
+
+		ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
+		if (ret < 0) {
+			MIF_LOG(ERR, "%s: Failed to add seals to shm file: %s.",
+				rte_vdev_device_name(pmd->vdev),
+				strerror(errno));
+			return -1;
+		}
+
+		ret = ftruncate(r->fd, r->region_size);
+		if (ret < 0) {
+			MIF_LOG(ERR, "%s: Failed to truncate shm file: %s.",
+				rte_vdev_device_name(pmd->vdev),
+				strerror(errno));
+			return -1;
+		}
+
+		r->addr = mmap(NULL, r->region_size, PROT_READ |
+			PROT_WRITE, MAP_SHARED, r->fd, 0);
+		if (r->addr == NULL) {
+			MIF_LOG(ERR, "%s: Failed to mmap shm region: %s.",
+				rte_vdev_device_name(pmd->vdev),
+				strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static void
+memif_init_rings(struct pmd_internals *pmd)
+{
+	memif_ring_t *ring;
+	int i, j;
+
+	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+		ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
+		ring->head = ring->tail = 0;
+		ring->cookie = MEMIF_COOKIE;
+		ring->flags = 0;
+		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
+			uint16_t slot = i * (1 << pmd->run.log2_ring_size) + j;
+			ring->desc[j].region = 1;
+			ring->desc[j].offset = pmd->regions[1].buffer_offset +
+				(uint32_t) (slot * pmd->run.buffer_size);
+			ring->desc[j].length = pmd->run.buffer_size;
+		}
+	}
+
+	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+		ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
+		ring->head = ring->tail = 0;
+		ring->cookie = MEMIF_COOKIE;
+		ring->flags = 0;
+		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
+			uint16_t slot = (i + pmd->run.num_s2m_rings) *
+				(1 << pmd->run.log2_ring_size) + j;
+			ring->desc[j].region = 1;
+			ring->desc[j].offset = pmd->regions[1].buffer_offset +
+				(uint32_t) (slot * pmd->run.buffer_size);
+			ring->desc[j].length = pmd->run.buffer_size;
+		}
+	}
+}
+
+static void
+memif_init_queues(struct pmd_internals *pmd)
+{
+	struct memif_queue *mq;
+	int i;
+
+	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+		mq = &pmd->tx_queues[i];
+		mq->ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
+		mq->log2_ring_size = pmd->run.log2_ring_size;
+		/* queues located only in region 0 */
+		mq->region = 0;
+		mq->offset = (void *) mq->ring - (void *) pmd->regions[0].addr;
+		mq->last_head = mq->last_tail = 0;
+		mq->intr_handle.fd = eventfd (0, EFD_NONBLOCK);
+		if (mq->intr_handle.fd < 0) {
+			MIF_LOG(WARNING,"%s: Failed to create eventfd for tx queue %d: %s.",
+				rte_vdev_device_name(pmd->vdev), i, strerror(errno));
+		}
+	}
+
+	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+		mq = &pmd->rx_queues[i];
+		mq->ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
+		mq->log2_ring_size = pmd->run.log2_ring_size;
+		/* queues located only in region 0 */
+		mq->region = 0;
+		mq->offset = (void *) mq->ring - (void *) pmd->regions[0].addr;
+		mq->last_head = mq->last_tail = 0;
+		mq->intr_handle.fd = eventfd (0, EFD_NONBLOCK);
+		if (mq->intr_handle.fd < 0) {
+			MIF_LOG(WARNING, "%s: Failed to create eventfd for rx queue %d: %s.",
+				rte_vdev_device_name(pmd->vdev), i, strerror(errno));
+		}
+	}
+}
+
+int
+memif_init_regions_and_queues(struct pmd_internals *pmd)
+{
+	int ret;
+
+	ret = memif_alloc_regions(pmd, /* num of buffer regions */ 1);
+	if (ret < 0) {
+		return ret;
+	}
+
+	memif_init_rings(pmd);
+
+	memif_init_queues(pmd);
+
+	return 0;
+}
+
+int
+memif_connect(struct pmd_internals *pmd)
+{
+	struct rte_eth_dev *eth_dev = rte_eth_dev_allocated(
+		rte_vdev_device_name(pmd->vdev));
+	struct memif_region *mr;
+	struct memif_queue *mq;
+	int i;
+
+	for (i = 0; i < pmd->regions_num; i++){
+		mr = pmd->regions + i;
+		if (mr != NULL) {
+			if (mr->addr == NULL) {
+				if (mr->fd < 0)
+					return -1;
+				mr->addr = mmap(NULL, mr->region_size,
+					PROT_READ | PROT_WRITE, MAP_SHARED,
+					mr->fd, 0);
+				if (mr->addr == NULL)
+					return -1;
+			}
+		}
+	}
+
+	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+			&pmd->tx_queues[i] : &pmd->rx_queues[i];
+		mq->ring = pmd->regions[mq->region].addr + mq->offset;
+		if (mq->ring->cookie != MEMIF_COOKIE) {
+			MIF_LOG(ERR, "%s: Wrong cookie", rte_vdev_device_name(pmd->vdev));
+			return -1;
+		}
+		mq->ring->head = mq->ring->tail = mq->last_head =
+			mq->last_tail = 0;
+		/* polling mode by default */
+		if (pmd->role == MEMIF_ROLE_MASTER) {
+			mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
+		}
+	}
+	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+			&pmd->rx_queues[i] : &pmd->tx_queues[i];
+		mq->ring = pmd->regions[mq->region].addr + mq->offset;
+		if (mq->ring->cookie != MEMIF_COOKIE) {
+			MIF_LOG(ERR, "%s: Wrong cookie", rte_vdev_device_name(pmd->vdev));
+			return -1;
+		}
+		mq->ring->head = mq->ring->tail = mq->last_head =
+			mq->last_tail = 0;
+		/* polling mode by default */
+		if (pmd->role == MEMIF_ROLE_SLAVE) {
+			mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
+		}
+	}
+
+	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
+	pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
+	eth_dev->data->dev_link.link_status = ETH_LINK_UP;
+	MIF_LOG(INFO, "%s: Connected.", rte_vdev_device_name(pmd->vdev));
+	return 0;
+}
+
+static int
+memif_dev_start(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	int ret = 0;
+
+	switch(pmd->role) {
+	case MEMIF_ROLE_SLAVE:
+		ret = memif_connect_slave(dev);
+		break;
+	case MEMIF_ROLE_MASTER:
+		ret = memif_connect_master(dev);
+		break;
+	default:
+		MIF_LOG(ERR, "%s: Unknown role: %d.",
+			rte_vdev_device_name(pmd->vdev), pmd->role);
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+static int
+memif_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static int
+memif_tx_queue_setup(struct rte_eth_dev *dev,
+		     uint16_t qid,
+                     uint16_t nb_tx_desc __rte_unused,
+                     unsigned int socket_id __rte_unused,
+                     const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_queue *mq;
+
+	mq = rte_realloc(pmd->tx_queues, sizeof(struct memif_queue) * (qid + 1), 0);
+	if (mq == NULL) {
+		MIF_LOG(ERR, "%s: Failed to alloc tx queue %u.",
+			rte_vdev_device_name(pmd->vdev), qid);
+		return -ENOMEM;
+	}
+
+	pmd->tx_queues = mq;
+
+	mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
+	mq->n_pkts = 0;
+	mq->n_bytes = 0;
+	mq->n_err = 0;
+	mq->intr_handle.fd = -1;
+	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
+	mq->pmd = pmd;
+	dev->data->tx_queues[qid] = mq;
+
+	return 0;
+}
+
+static int
+memif_rx_queue_setup(struct rte_eth_dev *dev,
+                     uint16_t qid,
+                     uint16_t nb_rx_desc __rte_unused,
+                     unsigned int socket_id __rte_unused,
+                     const struct rte_eth_rxconf *rx_conf __rte_unused,
+                     struct rte_mempool *mb_pool)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_queue *mq;
+
+	mq = rte_realloc(pmd->rx_queues, sizeof(struct memif_queue) * (qid + 1), 0);
+	if (mq == NULL) {
+		MIF_LOG(ERR, "%s: Failed to alloc rx queue %u.",
+			rte_vdev_device_name(pmd->vdev), qid);
+		return -ENOMEM;
+	}
+
+	pmd->rx_queues = mq;
+
+	mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
+	mq->n_pkts = 0;
+	mq->n_bytes = 0;
+	mq->n_err = 0;
+	mq->intr_handle.fd = -1;
+	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
+	mq->mempool = mb_pool;
+	mq->in_port = dev->data->port_id;
+	mq->pmd = pmd;
+	dev->data->rx_queues[qid] = mq;
+
+	return 0;
+}
+
+static int
+memif_link_update(struct rte_eth_dev *dev __rte_unused,
+		  int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_queue *mq;
+	int i;
+
+	stats->ipackets = 0;
+	stats->ibytes = 0;
+	stats->opackets = 0;
+	stats->obytes = 0;
+	stats->oerrors = 0;
+
+	uint8_t tmp = (pmd->role == MEMIF_ROLE_SLAVE) ?	pmd->run.num_s2m_rings :
+							pmd->run.num_m2s_rings;
+	uint8_t nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
+		     RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	/* RX stats */
+	for (i = 0; i < nq; i++) {
+		mq = &pmd->rx_queues[i];
+		stats->q_ipackets[i] = mq->n_pkts;
+		stats->q_ibytes[i] = mq->n_bytes;
+		stats->ipackets += mq->n_pkts;
+		stats->ibytes += mq->n_bytes;
+	}
+
+	tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
+						pmd->run.num_s2m_rings;
+	nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
+	     RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	/* TX stats */
+	for (i = 0; i < nq; i++) {
+		mq = &pmd->tx_queues[i];
+		stats->q_opackets[i] = mq->n_pkts;
+		stats->q_obytes[i] = mq->n_bytes;
+		stats->q_errors[i] = mq->n_err;
+		stats->opackets += mq->n_pkts;
+		stats->obytes += mq->n_bytes;
+		stats->oerrors += mq->n_err;
+	}
+	return 0;
+}
+
+static void
+memif_stats_reset(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	int i;
+	struct memif_queue *mq;
+
+	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ? &pmd->tx_queues[i] :
+				&pmd->rx_queues[i];
+		mq->n_pkts = 0;
+		mq->n_bytes = 0;
+		mq->n_err = 0;
+	}
+	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ? &pmd->rx_queues[i] :
+				&pmd->tx_queues[i];
+		mq->n_pkts = 0;
+		mq->n_bytes = 0;
+		mq->n_err = 0;
+	}
+}
+
+static int
+memif_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	MIF_LOG(WARNING, "%s: Interrupt mode not supported.",
+		rte_vdev_device_name(pmd->vdev));
+
+	/* Enable MEMIF interrupts. */
+	/* pmd->rx_queues[qid].ring->flags  &= ~MEMIF_RING_FLAG_MASK_INT; */
+
+	/*
+	 * TODO: Tell dpdk to use interrupt mode.
+	 *
+	 * return rte_intr_enable(&pmd->rx_queues[qid].intr_handle);
+	 */
+	return -1;
+}
+
+static int
+memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
+{
+	struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
+
+	/* Disable MEMIF interrupts. */
+	/* pmd->rx_queues[qid].ring->flags |= MEMIF_RING_FLAG_MASK_INT; */
+
+	/*
+	 * TODO: Tell dpdk to use polling mode.
+	 *
+	 * return rte_intr_disable(&pmd->rx_queues[qid].intr_handle);
+	 */
+	return 0;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start = memif_dev_start,
+	.dev_infos_get = memif_dev_info,
+	.dev_configure = memif_dev_configure,
+	.tx_queue_setup = memif_tx_queue_setup,
+	.rx_queue_setup = memif_rx_queue_setup,
+	.rx_queue_intr_enable = memif_rx_queue_intr_enable,
+	.rx_queue_intr_disable = memif_rx_queue_intr_disable,
+	.link_update = memif_link_update,
+	.stats_get = memif_stats_get,
+	.stats_reset = memif_stats_reset,
+};
+
+static int
+memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
+	     memif_interface_id_t id, uint32_t flags,
+	     const char *socket_filename,
+	     memif_log2_ring_size_t log2_ring_size, uint8_t nrxq,
+	     uint8_t ntxq, uint16_t buffer_size, const char *secret,
+	     const char *eth_addr)
+{
+	int ret = 0;
+	struct rte_eth_dev *eth_dev;
+	struct rte_eth_dev_data *data;
+	struct pmd_internals *pmd;
+	const unsigned int numa_node = vdev->device.numa_node;
+	const char *name = rte_vdev_device_name(vdev);
+
+	if (flags & ETH_MEMIF_FLAG_ZERO_COPY) {
+		MIF_LOG(ERR, "Zero-copy not supported.");
+		return -1;
+	}
+
+	eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
+	if (eth_dev == NULL) {
+		MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
+		return -1;
+	}
+
+	pmd = eth_dev->data->dev_private;
+	memset(pmd, 0, sizeof(*pmd));
+
+	pmd->if_index = id;
+	pmd->vdev = vdev;
+	pmd->id = id;
+	pmd->flags = flags;
+	pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
+	pmd->role = role;
+	ret = memif_socket_init(eth_dev, socket_filename);
+	if (ret < 0)
+		return ret;
+
+	memset(pmd->secret, 0, sizeof(char) * 24);
+	if (secret != NULL)
+		strncpy(pmd->secret, secret, (strlen(secret) >= 24) ? 24 : strlen(secret));
+
+	pmd->cfg.log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
+	if (log2_ring_size != 0)
+		pmd->cfg.log2_ring_size = log2_ring_size;
+	pmd->cfg.num_s2m_rings = ETH_MEMIF_DEFAULT_NRXQ;
+	pmd->cfg.num_m2s_rings = ETH_MEMIF_DEFAULT_NTXQ;
+
+	if (nrxq != 0) {
+		if (role == MEMIF_ROLE_SLAVE)
+			pmd->cfg.num_m2s_rings = nrxq;
+		else
+			pmd->cfg.num_s2m_rings = nrxq;
+	}
+	if (ntxq != 0) {
+		if (role == MEMIF_ROLE_SLAVE)
+			pmd->cfg.num_s2m_rings = ntxq;
+		else
+			pmd->cfg.num_m2s_rings = ntxq;
+	}
+
+	pmd->cfg.buffer_size = ETH_MEMIF_DEFAULT_BUFFER_SIZE;
+	if (buffer_size != 0)
+		pmd->cfg.buffer_size = buffer_size;
+
+	/* FIXME: generate mac? */
+	if (eth_addr == NULL)
+		eth_addr = ETH_MEMIF_DEFAULT_ETH_ADDR;
+
+	sscanf(eth_addr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+	       &pmd->eth_addr.addr_bytes[0], &pmd->eth_addr.addr_bytes[1],
+	       &pmd->eth_addr.addr_bytes[2], &pmd->eth_addr.addr_bytes[3],
+	       &pmd->eth_addr.addr_bytes[4], &pmd->eth_addr.addr_bytes[5]);
+
+	data = eth_dev->data;
+	data->dev_private = pmd;
+	data->numa_node = numa_node;
+	data->mac_addrs = &pmd->eth_addr;
+
+	eth_dev->dev_ops = &ops;
+	eth_dev->device = &vdev->device;
+	eth_dev->rx_pkt_burst = eth_memif_rx;
+	eth_dev->tx_pkt_burst = eth_memif_tx;
+
+	rte_eth_dev_probing_finish(eth_dev);
+
+	return ret;
+}
+
+static int
+memif_set_role(const char *key __rte_unused, const char *value,
+		void *extra_args)
+{
+	enum memif_role_t *role = (enum memif_role_t *)extra_args;
+	if (strstr(value, "master") != NULL) {
+		*role = MEMIF_ROLE_MASTER;
+	} else if (strstr(value, "slave") != NULL) {
+		*role = MEMIF_ROLE_SLAVE;
+	} else {
+		MIF_LOG(ERR, "Unknown role: %s.", value);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int
+memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	uint32_t *flags = (uint32_t *)extra_args;
+
+	if (strstr(value, "yes") != NULL) {
+		*flags |= ETH_MEMIF_FLAG_ZERO_COPY;
+	} else if (strstr(value, "no") != NULL) {
+		*flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
+	} else {
+		MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int
+memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
+	/* even if parsing fails, 0 is a valid id */
+	*id = strtoul(value, NULL, 10);
+	return 0;
+}
+
+static int
+memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	unsigned long int tmp;
+	uint16_t *buffer_size = (uint16_t *)extra_args;
+
+	tmp = strtoul(value, NULL, 10);
+	if ((tmp == 0) || (tmp > 0xFFFF)) {
+		MIF_LOG(ERR, "Invalid buffer size: %s.", value);
+		return -EINVAL;
+	}
+	*buffer_size = tmp;
+	return 0;
+}
+
+static int
+memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	unsigned long int tmp;
+	memif_log2_ring_size_t *log2_ring_size = (memif_log2_ring_size_t *)extra_args;
+
+	tmp = strtoul(value, NULL, 10);
+	if ((tmp == 0) || (tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE)) {
+		MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
+			value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
+		return -EINVAL;
+	}
+	*log2_ring_size = tmp;
+	return 0;
+}
+
+static int
+memif_set_nq(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	unsigned long int tmp;
+	uint16_t *nq = (uint16_t *)extra_args;
+
+	tmp = strtoul(value, NULL, 10);
+	if ((tmp == 0) || (tmp > 0xFF)) {
+		MIF_LOG(ERR, "Invalid number of queues: %s.", value);
+		return -EINVAL;
+	}
+	*nq = tmp;
+	return 0;
+}
+
+/* check if directory exists and if we have permission to read/write */
+static inline int
+memif_check_socket_filename(const char *filename)
+{
+	char *dir = NULL, *tmp;
+	uint32_t idx;
+	int ret = 0;
+
+	tmp = strrchr(filename, '/');
+	if (tmp != NULL) {
+		idx = tmp - filename;
+		dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 2), 0);
+		if (dir == NULL) {
+			MIF_LOG(ERR, "Failed to allocate memory.");
+			return -1;
+		}
+		strncpy(dir, filename, idx);
+	}
+
+	if ((dir == NULL) || (faccessat(-1, dir, F_OK | R_OK |
+			W_OK, AT_EACCESS) < 0)) {
+		MIF_LOG(ERR, "Invalid directory: %s.", dir);
+		ret = -EINVAL;
+	}
+
+	if (dir != NULL)
+		rte_free(dir);
+
+	return ret;
+}
+
+static int
+rte_pmd_memif_probe(struct rte_vdev_device *vdev)
+{
+	int ret = 0;
+	unsigned int i;
+	struct rte_kvargs *kvlist;
+	const struct rte_kvargs_pair *pair;
+
+	const char *name = rte_vdev_device_name(vdev);
+
+	enum memif_role_t role;
+	memif_interface_id_t id;
+
+	uint16_t buffer_size;
+	memif_log2_ring_size_t log2_ring_size;
+	uint8_t nrxq, ntxq;
+	const char *socket_filename;
+	const char *eth_addr;
+	uint32_t flags;
+	const char *secret;
+
+	MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
+
+	kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
+
+	/* set default values */
+	role = MEMIF_ROLE_SLAVE;
+	flags = 0;
+	id = 0;
+	buffer_size = 2048;
+	log2_ring_size = 10;
+	nrxq = 1;
+	ntxq = 1;
+	socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
+	secret = NULL;
+	eth_addr = NULL;
+
+	/* parse parameters */
+	if (kvlist != NULL) {
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_ROLE_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
+				&memif_set_role, &role);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_ID_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
+				&memif_set_id, &id);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_BUFFER_SIZE_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_BUFFER_SIZE_ARG,
+						 &memif_set_bs, &buffer_size);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_RING_SIZE_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
+						 &memif_set_rs, &log2_ring_size);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_NRXQ_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_NRXQ_ARG,
+						 &memif_set_nq, &nrxq);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_NTXQ_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_NTXQ_ARG,
+						 &memif_set_nq, &ntxq);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_SOCKET_ARG) == 1) {
+			for (i = 0; i < kvlist->count; i++) {
+				pair = &kvlist->pairs[i];
+				if (strcmp(pair->key, ETH_MEMIF_SOCKET_ARG) == 0) {
+					socket_filename = pair->value;
+					ret = memif_check_socket_filename(socket_filename);
+					if (ret < 0)
+						goto exit;
+				}
+			}
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_MAC_ARG) == 1) {
+			for (i = 0; i < kvlist->count; i++) {
+				pair = &kvlist->pairs[i];
+				if (strcmp(pair->key, ETH_MEMIF_MAC_ARG) == 0) {
+					eth_addr = pair->value;
+				}
+			}
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_ZC_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
+				&memif_set_zc, &flags);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_SECRET_ARG) == 1) {
+			for (i = 0; i < kvlist->count; i++) {
+				pair = &kvlist->pairs[i];
+				if (strcmp(pair->key, ETH_MEMIF_SECRET_ARG) == 0) {
+					secret = pair->value;
+				}
+			}
+		}
+	}
+
+	/* create interface */
+	ret = memif_create(vdev, role, id, flags, socket_filename, log2_ring_size,
+			   nrxq, ntxq, buffer_size, secret, eth_addr);
+
+exit:
+	if (kvlist != NULL)
+		rte_kvargs_free(kvlist);
+	return ret;
+}
+
+static int
+rte_pmd_memif_remove(struct rte_vdev_device *vdev)
+{
+	struct rte_eth_dev *eth_dev;
+
+	eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
+	if (eth_dev == NULL)
+		return 0;
+
+	struct pmd_internals *pmd = eth_dev->data->dev_private;
+
+	memif_msg_enq_disconnect(pmd->cc, "Invalid message size", 0);
+	memif_disconnect(eth_dev);
+
+	memif_socket_remove_device(pmd);
+
+	pmd->vdev = NULL;
+
+	rte_free(eth_dev->data->dev_private);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_memif_drv = {
+	.probe = rte_pmd_memif_probe,
+	.remove = rte_pmd_memif_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
+RTE_PMD_REGISTER_ALIAS(net_memif, eth_memif);
+RTE_PMD_REGISTER_PARAM_STRING(net_memif,
+	ETH_MEMIF_ID_ARG "=<int>"
+	ETH_MEMIF_ROLE_ARG "=<string>"
+	ETH_MEMIF_BUFFER_SIZE_ARG "=<int>"
+	ETH_MEMIF_RING_SIZE_ARG "=<int>"
+	ETH_MEMIF_NRXQ_ARG "=<int>"
+	ETH_MEMIF_NTXQ_ARG "=<int>"
+	ETH_MEMIF_SOCKET_ARG "=<string>"
+	ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
+	ETH_MEMIF_ZC_ARG "=<string>"
+	ETH_MEMIF_SECRET_ARG "=<string>");
+
+RTE_INIT(memif_init_log)
+{
+	memif_logtype = rte_log_register("pmd.net.memif");
+	if (memif_logtype >= 0)
+		rte_log_set_level(memif_logtype, RTE_LOG_NOTICE);
+}
diff --git a/drivers/net/memif/rte_eth_memif.h b/drivers/net/memif/rte_eth_memif.h
new file mode 100644
index 000000000..14a8d5f07
--- /dev/null
+++ b/drivers/net/memif/rte_eth_memif.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _RTE_ETH_MEMIF_H_
+#define _RTE_ETH_MEMIF_H_
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif /* GNU_SOURCE */
+
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_ethdev_driver.h>
+#include <rte_ether.h>
+#include <rte_timer.h>
+#include <rte_interrupts.h>
+
+#include <memif.h>
+
+/* generate mac? */
+#define ETH_MEMIF_DEFAULT_ETH_ADDR		"01:ab:23:cd:45:ef"
+
+#define ETH_MEMIF_DEFAULT_SOCKET_FILENAME	"/tmp/memif.sock"
+#define ETH_MEMIF_DEFAULT_RING_SIZE		10
+#define ETH_MEMIF_DEFAULT_NRXQ			1
+#define ETH_MEMIF_DEFAULT_NTXQ			1
+#define ETH_MEMIF_DEFAULT_BUFFER_SIZE		2048
+
+#define ETH_MEMIF_MAX_NUM_Q_PAIRS		256
+#define ETH_MEMIF_MAX_LOG2_RING_SIZE		14
+#define ETH_MEMIF_MAX_REGION_IDX		255
+
+int memif_logtype;
+
+#define memif_min(a,b) (((a) < (b)) ? (a) : (b))
+
+#define MIF_LOG(level, fmt, args...) \
+do {							\
+	rte_log(RTE_LOG_ ## level, memif_logtype,	\
+		"%s(): " fmt "\n", __func__, ##args);	\
+} while (0)
+
+enum memif_role_t {
+	MEMIF_ROLE_MASTER = 0,
+	MEMIF_ROLE_SLAVE = 1,
+};
+
+/* Shared memory region. */
+struct memif_region {
+	void *addr;
+	memif_region_size_t region_size;
+	int fd;
+	uint32_t buffer_offset;
+};
+
+struct memif_queue {
+	struct rte_mempool *mempool;
+	uint16_t in_port;
+
+	struct pmd_internals *pmd;
+
+	struct rte_intr_handle intr_handle;
+
+	/* ring info */
+	memif_ring_type_t type;
+	memif_ring_t *ring;
+	memif_log2_ring_size_t log2_ring_size;
+
+	memif_region_index_t region;
+	memif_region_offset_t offset;
+
+	uint16_t last_head;
+	uint16_t last_tail;
+	uint32_t *buffers;
+
+	/* rx/tx info */
+	uint64_t n_pkts;
+	uint64_t n_bytes;
+	uint64_t n_err;
+};
+
+struct pmd_internals {
+	int if_index;
+	memif_interface_id_t id;
+	enum memif_role_t role;
+	uint32_t flags;
+#define ETH_MEMIF_FLAG_CONNECTING	(1 << 0)
+#define ETH_MEMIF_FLAG_CONNECTED	(1 << 1)
+#define ETH_MEMIF_FLAG_ZERO_COPY	(1 << 2)
+/* device has not been configured and can not accept connection requests */
+#define ETH_MEMIF_FLAG_DISABLED		(1 << 3)
+
+	struct ether_addr eth_addr;
+	char *socket_filename;
+	char secret[24];
+
+	struct memif_control_channel *cc;
+
+	struct memif_region *regions;
+	uint8_t regions_num;
+
+	struct memif_queue *rx_queues;
+	struct memif_queue *tx_queues;
+
+	/* remote info */
+	char remote_name[64];
+	char remote_if_name[64];
+
+	/* Configured parameters (max values) */
+	struct {
+		memif_log2_ring_size_t log2_ring_size;
+		uint8_t num_s2m_rings;
+		uint8_t num_m2s_rings;
+		uint16_t buffer_size;
+	} cfg;
+
+	/* Parameters used in active connection */
+	struct {
+		memif_log2_ring_size_t log2_ring_size;
+		uint8_t num_s2m_rings;
+		uint8_t num_m2s_rings;
+		uint16_t buffer_size;
+	} run;
+
+	char local_disc_string[96];
+	char remote_disc_string[96];
+
+	/* vdev handle */
+	struct rte_vdev_device *vdev;
+};
+
+void memif_free_regions(struct pmd_internals *pmd);
+
+/*
+ * Finalize connection establishment process. Map shared memory file
+ * (master role), initialize ring queue, set link status up.
+ */
+int memif_connect(struct pmd_internals *pmd);
+
+/*
+ * Create shared memory file and initialize ring queue.
+ * Only called by slave when establishing connection
+ */
+int memif_init_regions_and_queues(struct pmd_internals *pmd);
+
+const char * memif_version(void);
+
+#ifndef MFD_HUGETLB
+#ifndef __NR_memfd_create
+
+#if defined __x86_64__
+#define __NR_memfd_create 319
+#elif defined __arm__
+#define __NR_memfd_create 385
+#elif defined __aarch64__
+#define __NR_memfd_create 279
+#else
+#error "__NR_memfd_create unknown for this architecture"
+#endif
+
+#endif /* __NR_memfd_create */
+
+static inline int
+memfd_create (const char *name, unsigned int flags)
+{
+  return syscall (__NR_memfd_create, name, flags);
+}
+#endif /* MFD_HUGETLB */
+
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING       0x0002U
+#endif
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL     0x0001	/* prevent further seals from being set */
+#define F_SEAL_SHRINK   0x0002	/* prevent file from shrinking */
+#define F_SEAL_GROW     0x0004	/* prevent file from growing */
+#define F_SEAL_WRITE    0x0008	/* prevent writes */
+#endif
+
+#endif /* RTE_ETH_MEMIF_H */
diff --git a/drivers/net/memif/rte_pmd_memif_version.map b/drivers/net/memif/rte_pmd_memif_version.map
new file mode 100644
index 000000000..aee560afa
--- /dev/null
+++ b/drivers/net/memif/rte_pmd_memif_version.map
@@ -0,0 +1,4 @@
+DPDK_2.0 {
+
+        local: *;
+};
diff --git a/drivers/net/meson.build b/drivers/net/meson.build
index 980eec233..b0becbf31 100644
--- a/drivers/net/meson.build
+++ b/drivers/net/meson.build
@@ -21,6 +21,7 @@ drivers = ['af_packet',
 	'ixgbe',
 	'kni',
 	'liquidio',
+	'memif',
 	'mlx4',
 	'mlx5',
 	'mvneta',
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 5699d979d..f236c5ebc 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -168,6 +168,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
 endif
 _LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD)        += -lrte_pmd_lio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF)      += -lrte_pmd_memif
 ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -ldl
 else
-- 
2.17.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-10  9:14 [dpdk-dev] [PATCH v3] eal_interrupts: add option for pending callback unregister Jakub Grajciar
  2018-12-10  9:42 ` [dpdk-dev] [RFC] /net: memory interface (memif) Jakub Grajciar
@ 2018-12-10 10:06 ` Jakub Grajciar
  2018-12-10 10:42   ` Burakov, Anatoly
                     ` (3 more replies)
  1 sibling, 4 replies; 13+ messages in thread
From: Jakub Grajciar @ 2018-12-10 10:06 UTC (permalink / raw)
  To: dev; +Cc: Jakub Grajciar

Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
---
 config/common_base                          |    5 +
 config/common_linuxapp                      |    1 +
 drivers/net/Makefile                        |    1 +
 drivers/net/memif/Makefile                  |   29 +
 drivers/net/memif/memif.h                   |  156 +++
 drivers/net/memif/memif_socket.c            | 1085 +++++++++++++++++
 drivers/net/memif/memif_socket.h            |   57 +
 drivers/net/memif/meson.build               |    8 +
 drivers/net/memif/rte_eth_memif.c           | 1172 +++++++++++++++++++
 drivers/net/memif/rte_eth_memif.h           |  189 +++
 drivers/net/memif/rte_pmd_memif_version.map |    4 +
 drivers/net/meson.build                     |    1 +
 mk/rte.app.mk                               |    1 +
 13 files changed, 2709 insertions(+)
 create mode 100644 drivers/net/memif/Makefile
 create mode 100644 drivers/net/memif/memif.h
 create mode 100644 drivers/net/memif/memif_socket.c
 create mode 100644 drivers/net/memif/memif_socket.h
 create mode 100644 drivers/net/memif/meson.build
 create mode 100644 drivers/net/memif/rte_eth_memif.c
 create mode 100644 drivers/net/memif/rte_eth_memif.h
 create mode 100644 drivers/net/memif/rte_pmd_memif_version.map

diff --git a/config/common_base b/config/common_base
index d12ae98bc..b8ed10ae5 100644
--- a/config/common_base
+++ b/config/common_base
@@ -403,6 +403,11 @@ CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n
 #
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n
 
+#
+# Compile Memory Interface PMD driver (Linux only)
+#
+CONFIG_RTE_LIBRTE_PMD_MEMIF=n
+
 #
 # Compile link bonding PMD library
 #
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 6c1c8d0f4..42cbde8f5 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -18,6 +18,7 @@ CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n
 CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_IFC_PMD=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
+CONFIG_RTE_LIBRTE_PMD_MEMIF=y
 CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y
 CONFIG_RTE_LIBRTE_PMD_TAP=y
 CONFIG_RTE_LIBRTE_AVP_PMD=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index c0386feb9..0feab5241 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -32,6 +32,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
 DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
 DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_MVNETA_PMD) += mvneta
diff --git a/drivers/net/memif/Makefile b/drivers/net/memif/Makefile
new file mode 100644
index 000000000..a82448423
--- /dev/null
+++ b/drivers/net/memif/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_memif.a
+
+EXPORT_MAP := rte_pmd_memif_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += -I$(SRCDIR)
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-pointer-arith
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vdev
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += rte_eth_memif.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif_socket.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/memif/memif.h b/drivers/net/memif/memif.h
new file mode 100644
index 000000000..6a23dbad7
--- /dev/null
+++ b/drivers/net/memif/memif.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _MEMIF_H_
+#define _MEMIF_H_
+
+#ifndef MEMIF_CACHELINE_SIZE
+#define MEMIF_CACHELINE_SIZE 64
+#endif
+
+#define MEMIF_COOKIE		0x3E31F20
+#define MEMIF_VERSION_MAJOR	2
+#define MEMIF_VERSION_MINOR	0
+#define MEMIF_VERSION		((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR)
+
+/*
+ *  Type definitions
+ */
+
+typedef enum memif_msg_type {
+	MEMIF_MSG_TYPE_NONE = 0,
+	MEMIF_MSG_TYPE_ACK = 1,
+	MEMIF_MSG_TYPE_HELLO = 2,
+	MEMIF_MSG_TYPE_INIT = 3,
+	MEMIF_MSG_TYPE_ADD_REGION = 4,
+	MEMIF_MSG_TYPE_ADD_RING = 5,
+	MEMIF_MSG_TYPE_CONNECT = 6,
+	MEMIF_MSG_TYPE_CONNECTED = 7,
+	MEMIF_MSG_TYPE_DISCONNECT = 8,
+} memif_msg_type_t;
+
+typedef enum {
+	MEMIF_RING_S2M = 0,
+	MEMIF_RING_M2S = 1
+} memif_ring_type_t;
+
+typedef enum {
+	MEMIF_INTERFACE_MODE_ETHERNET = 0,
+	MEMIF_INTERFACE_MODE_IP = 1,
+	MEMIF_INTERFACE_MODE_PUNT_INJECT = 2,
+} memif_interface_mode_t;
+
+typedef uint16_t memif_region_index_t;
+typedef uint32_t memif_region_offset_t;
+typedef uint64_t memif_region_size_t;
+typedef uint16_t memif_ring_index_t;
+typedef uint32_t memif_interface_id_t;
+typedef uint16_t memif_version_t;
+typedef uint8_t memif_log2_ring_size_t;
+
+/*
+ *  Socket messages
+ */
+
+typedef struct __attribute__ ((packed)) {
+	uint8_t name[32];
+	memif_version_t min_version;
+	memif_version_t max_version;
+	memif_region_index_t max_region;
+	memif_ring_index_t max_m2s_ring;
+	memif_ring_index_t max_s2m_ring;
+	memif_log2_ring_size_t max_log2_ring_size;
+} memif_msg_hello_t;
+
+typedef struct __attribute__ ((packed)) {
+	memif_version_t version;
+	memif_interface_id_t id;
+	memif_interface_mode_t mode:8;
+	uint8_t secret[24];
+	uint8_t name[32];
+} memif_msg_init_t;
+
+typedef struct __attribute__ ((packed)) {
+	memif_region_index_t index;
+	memif_region_size_t size;
+} memif_msg_add_region_t;
+
+typedef struct __attribute__ ((packed)) {
+	uint16_t flags;
+#define MEMIF_MSG_ADD_RING_FLAG_S2M	(1 << 0)
+	memif_ring_index_t index;
+	memif_region_index_t region;
+	memif_region_offset_t offset;
+	memif_log2_ring_size_t log2_ring_size;
+	uint16_t private_hdr_size;	/* used for private metadata */
+} memif_msg_add_ring_t;
+
+typedef struct __attribute__ ((packed)) {
+	uint8_t if_name[32];
+} memif_msg_connect_t;
+
+typedef struct __attribute__ ((packed)) {
+	uint8_t if_name[32];
+} memif_msg_connected_t;
+
+typedef struct __attribute__ ((packed)) {
+	uint32_t code;
+	uint8_t string[96];
+} memif_msg_disconnect_t;
+
+typedef struct __attribute__ ((packed, aligned(128))) {
+	memif_msg_type_t type:16;
+	union {
+		memif_msg_hello_t hello;
+		memif_msg_init_t init;
+		memif_msg_add_region_t add_region;
+		memif_msg_add_ring_t add_ring;
+		memif_msg_connect_t connect;
+		memif_msg_connected_t connected;
+		memif_msg_disconnect_t disconnect;
+	};
+} memif_msg_t;
+
+_Static_assert(sizeof(memif_msg_t) == 128, "Size of memif_msg_t must be 128");
+
+/*
+ *  Ring and Descriptor Layout
+ */
+
+typedef struct __attribute__ ((packed)) {
+	uint16_t flags;
+#define MEMIF_DESC_FLAG_NEXT (1 << 0)
+	memif_region_index_t region;
+	uint32_t length;
+	memif_region_offset_t offset;
+	uint32_t metadata;
+} memif_desc_t;
+
+_Static_assert(sizeof(memif_desc_t) == 16,
+	       "Size of memif_dsct_t must be 16 bytes");
+
+#define MEMIF_CACHELINE_ALIGN_MARK(mark) \
+  uint8_t mark[0] __attribute__((aligned(MEMIF_CACHELINE_SIZE)))
+
+typedef struct {
+	MEMIF_CACHELINE_ALIGN_MARK(cacheline0);
+	uint32_t cookie;
+	uint16_t flags;
+#define MEMIF_RING_FLAG_MASK_INT 1
+	volatile uint16_t head;
+	 MEMIF_CACHELINE_ALIGN_MARK(cacheline1);
+	volatile uint16_t tail;
+	 MEMIF_CACHELINE_ALIGN_MARK(cacheline2);
+	memif_desc_t desc[0];
+} memif_ring_t;
+
+#endif				/* _MEMIF_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
new file mode 100644
index 000000000..afd4ac888
--- /dev/null
+++ b/drivers/net/memif/memif_socket.c
@@ -0,0 +1,1085 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include <rte_version.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_bus_vdev.h>
+#include <rte_hash.h>
+#include <rte_jhash.h>
+
+#include <rte_eth_memif.h>
+#include <memif_socket.h>
+
+static void memif_intr_handler(void *arg);
+
+static inline ssize_t memif_msg_send(int fd, memif_msg_t * msg, int afd)
+{
+	struct msghdr mh = { 0 };
+	struct iovec iov[1];
+	char ctl[CMSG_SPACE(sizeof(int))];
+
+	iov[0].iov_base = (void *)msg;
+	iov[0].iov_len = sizeof(memif_msg_t);
+	mh.msg_iov = iov;
+	mh.msg_iovlen = 1;
+
+	if (afd > 0) {
+		struct cmsghdr *cmsg;
+		memset(&ctl, 0, sizeof(ctl));
+		mh.msg_control = ctl;
+		mh.msg_controllen = sizeof(ctl);
+		cmsg = CMSG_FIRSTHDR(&mh);
+		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		rte_memcpy(CMSG_DATA(cmsg), &afd, sizeof(int));
+	}
+
+	return sendmsg(fd, &mh, 0);
+}
+
+static inline int memif_msg_send_from_queue(struct memif_control_channel *cc)
+{
+	ssize_t size;
+	int ret = 0;
+	struct memif_msg_queue_elt *e;
+	e = TAILQ_FIRST(&cc->msg_queue);
+	if (e == NULL)
+		return 0;
+
+	size = memif_msg_send(cc->intr_handle.fd, &e->msg, e->fd);
+	if (size != sizeof(memif_msg_t)) {
+		MIF_LOG(ERR, "sendmsg fail: %s.", strerror(errno));
+		ret = -1;
+	} else {
+		MIF_LOG(DEBUG, "%s: Sent msg type %u.",
+			(cc->pmd !=
+			 NULL) ? rte_vdev_device_name(cc->pmd->
+						      vdev) : "memif_driver",
+			e->msg.type);
+	}
+	TAILQ_REMOVE(&cc->msg_queue, e, next);
+	rte_free(e);
+
+	return ret;
+}
+
+static inline struct memif_msg_queue_elt *memif_msg_enq(struct
+							memif_control_channel
+							*cc)
+{
+	struct memif_msg_queue_elt *e = rte_zmalloc("memif_msg",
+						    sizeof(struct
+							   memif_msg_queue_elt),
+						    0);
+	if (e == NULL) {
+		MIF_LOG(ERR, "Failed to allocate control message.");
+		return NULL;
+	}
+
+	e->fd = -1;
+	TAILQ_INSERT_TAIL(&cc->msg_queue, e, next);
+
+	return e;
+}
+
+void
+memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason,
+			 int err_code)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(cc);
+	if (e == NULL) {
+		MIF_LOG(WARNING, "%s: Failed to enqueue disconnect message.",
+			(cc->pmd !=
+			 NULL) ? rte_vdev_device_name(cc->pmd->
+						      vdev) : "memif_driver");
+		return;
+	}
+
+	memif_msg_disconnect_t *d = &e->msg.disconnect;
+
+	e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
+	d->code = err_code;
+
+	if (reason != NULL) {
+		strncpy((char *)d->string, reason, strlen(reason));
+		if (cc->pmd != NULL) {
+			strncpy(cc->pmd->local_disc_string, reason,
+				strlen(reason));
+		}
+	}
+}
+
+static int memif_msg_enq_hello(struct memif_control_channel *cc)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_hello_t *h = &e->msg.hello;
+
+	e->msg.type = MEMIF_MSG_TYPE_HELLO;
+	h->min_version = MEMIF_VERSION;
+	h->max_version = MEMIF_VERSION;
+	h->max_s2m_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
+	h->max_m2s_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
+	h->max_region = ETH_MEMIF_MAX_REGION_IDX;
+	h->max_log2_ring_size = ETH_MEMIF_MAX_LOG2_RING_SIZE;
+
+	strncpy((char *)h->name, rte_version(), strlen(rte_version()));
+
+	return 0;
+}
+
+static int memif_msg_receive_hello(struct pmd_internals *pmd, memif_msg_t * msg)
+{
+	memif_msg_hello_t *h = &msg->hello;
+
+	if (h->min_version > MEMIF_VERSION || h->max_version < MEMIF_VERSION) {
+		memif_msg_enq_disconnect(pmd->cc, "Incompatible memif version",
+					 0);
+		return -1;
+	}
+
+	/* Set parameters for active connection */
+	pmd->run.num_s2m_rings = memif_min(h->max_s2m_ring + 1,
+					   pmd->cfg.num_s2m_rings);
+	pmd->run.num_m2s_rings = memif_min(h->max_m2s_ring + 1,
+					   pmd->cfg.num_m2s_rings);
+	pmd->run.log2_ring_size = memif_min(h->max_log2_ring_size,
+					    pmd->cfg.log2_ring_size);
+	pmd->run.buffer_size = pmd->cfg.buffer_size;
+
+	strncpy(pmd->remote_name, (char *)h->name, strlen((char *)h->name));
+
+	MIF_LOG(DEBUG, "%s: Connecting to %s.",
+		rte_vdev_device_name(pmd->vdev), pmd->remote_name);
+
+	return 0;
+}
+
+static int
+memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t * msg)
+{
+	memif_msg_init_t *i = &msg->init;
+	struct memif_socket_pmd_list_elt *elt;
+	struct pmd_internals *pmd;
+
+	if (i->version != MEMIF_VERSION) {
+		memif_msg_enq_disconnect(cc, "Incompatible memif version", 0);
+		return -1;
+	}
+
+	if (cc->socket == NULL) {
+		memif_msg_enq_disconnect(cc, "Device error", 0);
+		return -1;
+	}
+
+	/* Find device with requested ID */
+	TAILQ_FOREACH(elt, &cc->socket->pmd_queue, next) {
+		pmd = elt->pmd;
+		if (((pmd->flags & ETH_MEMIF_FLAG_DISABLED) == 0)
+		    && (pmd->id == i->id)) {
+			/* assign control channel to device */
+			cc->pmd = pmd;
+			pmd->cc = cc;
+
+			if (i->mode != MEMIF_INTERFACE_MODE_ETHERNET) {
+				memif_msg_enq_disconnect(pmd->cc,
+							 "Only ethernet mode supported",
+							 0);
+				return -1;
+			}
+
+			if (pmd->flags && (ETH_MEMIF_FLAG_CONNECTING |
+					   ETH_MEMIF_FLAG_CONNECTED)) {
+				memif_msg_enq_disconnect(pmd->cc,
+							 "Already connected",
+							 0);
+				return -1;
+			}
+			strncpy(pmd->remote_name, (char *)i->name,
+				strlen((char *)i->name));
+
+			if (*pmd->secret != '\0') {
+				if (*i->secret == '\0') {
+					memif_msg_enq_disconnect(pmd->cc,
+								 "Secret required",
+								 0);
+					return -1;
+				}
+				if (strcmp(pmd->secret, (char *)i->secret) != 0) {
+					memif_msg_enq_disconnect(pmd->cc,
+								 "Incorrect secret",
+								 0);
+					return -1;
+				}
+			}
+
+			pmd->flags |= ETH_MEMIF_FLAG_CONNECTING;
+			return 0;
+		}
+	}
+
+	/* ID not found on this socket */
+	MIF_LOG(DEBUG, "ID %u not found.", i->id);
+	memif_msg_enq_disconnect(cc, "ID not found", 0);
+	return -1;
+}
+
+static int
+memif_msg_receive_add_region(struct pmd_internals *pmd, memif_msg_t * msg,
+			     int fd)
+{
+	memif_msg_add_region_t *ar = &msg->add_region;
+
+	if (fd < 0) {
+		memif_msg_enq_disconnect(pmd->cc, "Missing region fd", 0);
+		return -1;
+	}
+
+	struct memif_region *mr;
+
+	if (ar->index > ETH_MEMIF_MAX_REGION_IDX) {
+		memif_msg_enq_disconnect(pmd->cc, "Invalid region index", 0);
+		return -1;
+	}
+
+	mr = rte_realloc(pmd->regions, sizeof(struct memif_region) *
+			 (ar->index + 1), 0);
+	if (mr == NULL) {
+		memif_msg_enq_disconnect(pmd->cc, "Device error", 0);
+		return -1;
+	}
+
+	pmd->regions = mr;
+	pmd->regions[ar->index].fd = fd;
+	pmd->regions[ar->index].region_size = ar->size;
+	pmd->regions[ar->index].addr = NULL;
+	pmd->regions_num++;
+
+	return 0;
+}
+
+static int
+memif_msg_receive_add_ring(struct pmd_internals *pmd, memif_msg_t * msg, int fd)
+{
+	memif_msg_add_ring_t *ar = &msg->add_ring;
+
+	if (fd < 0) {
+		memif_msg_enq_disconnect(pmd->cc, "Missing interrupt fd", 0);
+		return -1;
+	}
+
+	struct memif_queue *mq;
+
+	/* check if we have enough queues */
+	if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) {
+		if (ar->index >= pmd->cfg.num_s2m_rings) {
+			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index",
+						 0);
+			return -1;
+		}
+		pmd->run.num_s2m_rings++;
+	} else {
+		if (ar->index >= pmd->cfg.num_m2s_rings) {
+			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index",
+						 0);
+			return -1;
+		}
+		pmd->run.num_m2s_rings++;
+	}
+
+	mq = (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) ?
+	    &pmd->rx_queues[ar->index] : &pmd->tx_queues[ar->index];
+
+	mq->intr_handle.fd = fd;
+	mq->log2_ring_size = ar->log2_ring_size;
+	mq->region = ar->region;
+	mq->offset = ar->offset;
+
+	return 0;
+}
+
+static int
+memif_msg_receive_connect(struct pmd_internals *pmd, memif_msg_t * msg)
+{
+	memif_msg_connect_t *c = &msg->connect;
+	int ret;
+
+	ret = memif_connect(pmd);
+	if (ret < 0)
+		return ret;
+
+	strncpy(pmd->remote_if_name, (char *)c->if_name,
+		strlen((char *)c->if_name));
+	MIF_LOG(INFO, "%s: Remote interface %s connected.",
+		rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
+
+	return 0;
+}
+
+static int
+memif_msg_receive_connected(struct pmd_internals *pmd, memif_msg_t * msg)
+{
+	memif_msg_connected_t *c = &msg->connected;
+	int ret;
+
+	ret = memif_connect(pmd);
+	if (ret < 0)
+		return ret;
+
+	strncpy(pmd->remote_if_name, (char *)c->if_name,
+		strlen((char *)c->if_name));
+	MIF_LOG(INFO, "%s: Remote interface %s connected.",
+		rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
+
+	return 0;
+}
+
+static int
+memif_msg_receive_disconnect(struct pmd_internals *pmd, memif_msg_t * msg)
+{
+	memif_msg_disconnect_t *d = &msg->disconnect;
+
+	memset(pmd->remote_disc_string, 0, sizeof(pmd->remote_disc_string));
+	strncpy(pmd->remote_disc_string, (char *)d->string,
+		strlen((char *)d->string));
+
+	MIF_LOG(INFO, "%s: Disconnect received: %s",
+		rte_vdev_device_name(pmd->vdev), pmd->remote_disc_string);
+
+	memset(pmd->local_disc_string, 0, 96);
+	memif_disconnect(rte_eth_dev_allocated
+			 (rte_vdev_device_name(pmd->vdev)));
+	return 0;
+}
+
+static int memif_msg_enq_ack(struct pmd_internals *pmd)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	e->msg.type = MEMIF_MSG_TYPE_ACK;
+
+	return 0;
+}
+
+static int memif_msg_enq_init(struct pmd_internals *pmd)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_init_t *i = &e->msg.init;
+
+	e->msg.type = MEMIF_MSG_TYPE_INIT;
+	i->version = MEMIF_VERSION;
+	i->id = pmd->id;
+	i->mode = MEMIF_INTERFACE_MODE_ETHERNET;
+
+	strncpy((char *)i->name, rte_version(), strlen(rte_version()));
+
+	if (pmd->secret) {
+		strncpy((char *)i->secret, pmd->secret, sizeof(i->secret) - 1);
+	}
+
+	return 0;
+}
+
+static int memif_msg_enq_add_region(struct pmd_internals *pmd, uint8_t idx)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_add_region_t *ar = &e->msg.add_region;
+	struct memif_region *mr = &pmd->regions[idx];
+
+	e->msg.type = MEMIF_MSG_TYPE_ADD_REGION;
+	e->fd = mr->fd;
+	ar->index = idx;
+	ar->size = mr->region_size;
+
+	return 0;
+}
+
+static int
+memif_msg_enq_add_ring(struct pmd_internals *pmd, uint8_t idx,
+		       memif_ring_type_t type)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_add_ring_t *ar = &e->msg.add_ring;
+	struct memif_queue *mq;
+
+	mq = (type == MEMIF_RING_S2M) ? &pmd->tx_queues[idx] :
+	    &pmd->rx_queues[idx];
+
+	e->msg.type = MEMIF_MSG_TYPE_ADD_RING;
+	e->fd = mq->intr_handle.fd;
+	ar->index = idx;
+	ar->offset = mq->offset;
+	ar->region = mq->region;
+	ar->log2_ring_size = mq->log2_ring_size;
+	ar->flags = (type == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0;
+	ar->private_hdr_size = 0;
+
+	return 0;
+}
+
+static int memif_msg_enq_connect(struct pmd_internals *pmd)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_connect_t *c = &e->msg.connect;
+	const char *name = rte_vdev_device_name(pmd->vdev);
+
+	e->msg.type = MEMIF_MSG_TYPE_CONNECT;
+	strncpy((char *)c->if_name, name, strlen(name));
+
+	return 0;
+}
+
+static int memif_msg_enq_connected(struct pmd_internals *pmd)
+{
+	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+	if (e == NULL)
+		return -1;
+
+	memif_msg_connected_t *c = &e->msg.connected;
+
+	const char *name = rte_vdev_device_name(pmd->vdev);
+
+	e->msg.type = MEMIF_MSG_TYPE_CONNECTED;
+	strncpy((char *)c->if_name, name, strlen(name));
+
+	return 0;
+}
+
+static void
+memif_intr_unregister_handler(struct rte_intr_handle *intr_handle, void *arg)
+{
+	struct memif_msg_queue_elt *elt;
+	struct memif_control_channel *cc = arg;
+	/* close control channel fd */
+	close(intr_handle->fd);
+	/* clear message queue */
+	while ((elt = TAILQ_FIRST(&cc->msg_queue)) != NULL) {
+		TAILQ_REMOVE(&cc->msg_queue, elt, next);
+		free(elt);
+	}
+	/* free control channel */
+	rte_free(cc);
+}
+
+void memif_disconnect(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_msg_queue_elt *elt;
+	int i;
+	int ret;
+
+	if (pmd->cc != NULL) {
+		/* Clear control message queue (except disconnect message if any). */
+		while ((elt = TAILQ_FIRST(&pmd->cc->msg_queue)) != NULL) {
+			if (elt->msg.type != MEMIF_MSG_TYPE_DISCONNECT) {
+				TAILQ_REMOVE(&pmd->cc->msg_queue, elt, next);
+				free(elt);
+			}
+		}
+		/* send disconnect message (if there is any in queue) */
+		memif_msg_send_from_queue(pmd->cc);
+
+		/* at this point, there should be no more messages in queue */
+		if (TAILQ_FIRST(&pmd->cc->msg_queue) != NULL) {
+			MIF_LOG(WARNING,
+				"%s: Unexpected message(s) in message queue.",
+				rte_vdev_device_name(pmd->vdev));
+		}
+
+		if (pmd->cc->intr_handle.fd > 0) {
+			ret =
+			    rte_intr_callback_unregister(&pmd->cc->intr_handle,
+							 memif_intr_handler,
+							 pmd->cc);
+			/*
+			 * If callback is active (disconnecting based on
+			 * received control message).
+			 */
+			if (ret == -EAGAIN) {
+				ret =
+				    rte_intr_callback_unregister_pending(&pmd->
+									 cc->
+									 intr_handle,
+									 memif_intr_handler,
+									 pmd->
+									 cc,
+									 memif_intr_unregister_handler);
+			} else if (ret > 0) {
+				close(pmd->cc->intr_handle.fd);
+				rte_free(pmd->cc);
+			}
+			if (ret <= 0)
+				MIF_LOG(WARNING,
+					"%s: Failed to unregister control channel callback.",
+					rte_vdev_device_name(pmd->vdev));
+		}
+	}
+
+	/* unconfig interrupts */
+	struct memif_queue *mq;
+	for (i = 0; i < pmd->cfg.num_s2m_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+		    &pmd->tx_queues[i] : &pmd->rx_queues[i];
+		if (mq->intr_handle.fd > 0) {
+			rte_intr_disable(&mq->intr_handle);
+			close(mq->intr_handle.fd);
+			mq->intr_handle.fd = -1;
+		}
+		mq->ring = NULL;
+	}
+	for (i = 0; i < pmd->cfg.num_m2s_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+		    &pmd->rx_queues[i] : &pmd->tx_queues[i];
+		if (mq->intr_handle.fd > 0) {
+			rte_intr_disable(&mq->intr_handle);
+			close(mq->intr_handle.fd);
+			mq->intr_handle.fd = -1;
+		}
+		mq->ring = NULL;
+	}
+
+	memif_free_regions(pmd);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
+	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTED;
+	MIF_LOG(DEBUG, "%s: Disconnected.", rte_vdev_device_name(pmd->vdev));
+}
+
+static int memif_msg_receive(struct memif_control_channel *cc)
+{
+	char ctl[CMSG_SPACE(sizeof(int)) +
+		 CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+	struct msghdr mh = { 0 };
+	struct iovec iov[1];
+	memif_msg_t msg = { 0 };
+	ssize_t size;
+	int ret = 0;
+	struct ucred *cr __rte_unused;
+	cr = 0;
+	struct cmsghdr *cmsg;
+	int afd = -1;
+	int i;
+
+	iov[0].iov_base = (void *)&msg;
+	iov[0].iov_len = sizeof(memif_msg_t);
+	mh.msg_iov = iov;
+	mh.msg_iovlen = 1;
+	mh.msg_control = ctl;
+	mh.msg_controllen = sizeof(ctl);
+
+	size = recvmsg(cc->intr_handle.fd, &mh, 0);
+	if (size != sizeof(memif_msg_t)) {
+		MIF_LOG(DEBUG, "Invalid message size.");
+		memif_msg_enq_disconnect(cc, "Invalid message size", 0);
+		return -1;
+	}
+	MIF_LOG(DEBUG, "Received msg type: %u.", msg.type);
+
+	cmsg = CMSG_FIRSTHDR(&mh);
+	while (cmsg) {
+		if (cmsg->cmsg_level == SOL_SOCKET) {
+			if (cmsg->cmsg_type == SCM_CREDENTIALS) {
+				cr = (struct ucred *)CMSG_DATA(cmsg);
+			} else if (cmsg->cmsg_type == SCM_RIGHTS) {
+				afd = *(int *)CMSG_DATA(cmsg);
+			}
+		}
+		cmsg = CMSG_NXTHDR(&mh, cmsg);
+	}
+
+	if ((cc->pmd == NULL) && msg.type != MEMIF_MSG_TYPE_INIT) {
+		MIF_LOG(DEBUG, "Unexpected message.");
+		memif_msg_enq_disconnect(cc, "Unexpected message", 0);
+		return -1;
+	}
+
+	/* get device from hash data */
+	switch (msg.type) {
+	case MEMIF_MSG_TYPE_ACK:
+		break;
+	case MEMIF_MSG_TYPE_HELLO:
+		ret = memif_msg_receive_hello(cc->pmd, &msg);
+		if (ret < 0)
+			goto exit;
+		ret = memif_init_regions_and_queues(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_init(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		for (i = 0; i < cc->pmd->regions_num; i++) {
+			ret = memif_msg_enq_add_region(cc->pmd, i);
+			if (ret < 0)
+				goto exit;
+		}
+		for (i = 0; i < cc->pmd->run.num_s2m_rings; i++) {
+			ret = memif_msg_enq_add_ring(cc->pmd, i,
+						     MEMIF_RING_S2M);
+			if (ret < 0)
+				goto exit;
+		}
+		for (i = 0; i < cc->pmd->run.num_m2s_rings; i++) {
+			ret = memif_msg_enq_add_ring(cc->pmd, i,
+						     MEMIF_RING_M2S);
+			if (ret < 0)
+				goto exit;
+		}
+		ret = memif_msg_enq_connect(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_INIT:
+		ret = memif_msg_receive_init(cc, &msg);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_ack(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_ADD_REGION:
+		ret = memif_msg_receive_add_region(cc->pmd, &msg, afd);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_ack(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_ADD_RING:
+		ret = memif_msg_receive_add_ring(cc->pmd, &msg, afd);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_ack(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_CONNECT:
+		ret = memif_msg_receive_connect(cc->pmd, &msg);
+		if (ret < 0)
+			goto exit;
+		ret = memif_msg_enq_connected(cc->pmd);
+		if (ret < 0)
+			goto exit;
+		break;
+	case MEMIF_MSG_TYPE_CONNECTED:
+		ret = memif_msg_receive_connected(cc->pmd, &msg);
+		break;
+	case MEMIF_MSG_TYPE_DISCONNECT:
+		ret = memif_msg_receive_disconnect(cc->pmd, &msg);
+		if (ret < 0)
+			goto exit;
+		break;
+	default:
+		memif_msg_enq_disconnect(cc, "Unknown message type", 0);
+		ret = -1;
+		goto exit;
+	}
+
+ exit:
+	return ret;
+}
+
+static void memif_intr_handler(void *arg)
+{
+	struct memif_control_channel *cc = arg;
+	struct rte_eth_dev *dev;
+	int ret;
+
+	ret = memif_msg_receive(cc);
+	/* if driver failed to assign device */
+	if (cc->pmd == NULL) {
+		ret = rte_intr_callback_unregister_pending(&cc->intr_handle,
+							   memif_intr_handler,
+							   cc,
+							   memif_intr_unregister_handler);
+		if (ret < 0)
+			MIF_LOG(WARNING,
+				"Failed to unregister control channel callback.");
+		return;
+	}
+	/* if memif_msg_receive failed */
+	if (ret < 0)
+		goto disconnect;
+
+	ret = memif_msg_send_from_queue(cc);
+	if (ret < 0)
+		goto disconnect;
+
+	return;
+
+ disconnect:
+	dev = rte_eth_dev_allocated(rte_vdev_device_name(cc->pmd->vdev));
+	if (dev == NULL) {
+		MIF_LOG(WARNING, "%s: eth dev not allocated",
+			rte_vdev_device_name(cc->pmd->vdev));
+		return;
+	}
+	memif_disconnect(dev);
+}
+
+static void memif_listener_handler(void *arg)
+{
+	struct memif_socket *socket = arg;
+	int sockfd;
+	int addr_len;
+	struct sockaddr_un client;
+	struct memif_control_channel *cc;
+	int ret;
+
+	addr_len = sizeof(client);
+	sockfd = accept(socket->intr_handle.fd, (struct sockaddr *)&client,
+			(socklen_t *) & addr_len);
+	if (sockfd < 0) {
+		MIF_LOG(ERR,
+			"Failed to accept connection request on socket fd %d",
+			socket->intr_handle.fd);
+		return;
+	}
+
+	MIF_LOG(DEBUG, "%s: Connection request accepted.", socket->filename);
+
+	cc = rte_zmalloc("memif-cc", sizeof(struct memif_control_channel), 0);
+	if (cc == NULL) {
+		MIF_LOG(ERR, "Failed to allocate control channel.");
+		goto error;
+	}
+
+	cc->intr_handle.fd = sockfd;
+	cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
+	cc->socket = socket;
+	cc->pmd = NULL;
+	TAILQ_INIT(&cc->msg_queue);
+
+	ret =
+	    rte_intr_callback_register(&cc->intr_handle, memif_intr_handler,
+				       cc);
+	if (ret < 0) {
+		MIF_LOG(ERR, "Failed to register control channel callback.");
+		goto error;
+	}
+
+	ret = memif_msg_enq_hello(cc);
+	if (ret < 0) {
+		MIF_LOG(ERR, "Failed to enqueue hello message.");
+		goto error;
+	}
+	ret = memif_msg_send_from_queue(cc);
+	if (ret < 0)
+		goto error;
+
+	return;
+
+ error:
+	if (sockfd > 0) {
+		close(sockfd);
+		sockfd = -1;
+	}
+	if (cc != NULL) {
+		rte_free(cc);
+		cc = NULL;
+	}
+}
+
+static inline struct memif_socket *memif_socket_create(struct pmd_internals
+						       *pmd, char *key,
+						       uint8_t listener)
+{
+	struct memif_socket *sock;
+	struct sockaddr_un un;
+	int sockfd;
+	int ret;
+	int on = 1;
+
+	sock = rte_zmalloc("memif-socket", sizeof(struct memif_socket), 0);
+	if (sock == NULL) {
+		MIF_LOG(ERR, "Failed to allocate memory for memif socket");
+		return NULL;
+	}
+
+	sock->listener = listener;
+	rte_memcpy(sock->filename, key, 256);
+	TAILQ_INIT(&sock->pmd_queue);
+
+	if (listener != 0) {
+		sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+		if (sockfd < 0)
+			goto error;;
+
+		un.sun_family = AF_UNIX;
+		strncpy((char *)un.sun_path, (char *)sock->filename,
+			sizeof(un.sun_path) - 1);
+
+		ret = setsockopt(sockfd, SOL_SOCKET, SO_PASSCRED, &on,
+				 sizeof(on));
+		if (ret < 0) {
+			goto error;
+		}
+		ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
+		if (ret < 0) {
+			goto error;
+		}
+		ret = listen(sockfd, 1);
+		if (ret < 0) {
+			goto error;
+		}
+
+		MIF_LOG(DEBUG, "%s: Memif listener socket %s created.",
+			rte_vdev_device_name(pmd->vdev), sock->filename);
+
+		sock->intr_handle.fd = sockfd;
+		sock->intr_handle.type = RTE_INTR_HANDLE_EXT;
+		ret = rte_intr_callback_register(&sock->intr_handle,
+						 memif_listener_handler, sock);
+		if (ret < 0) {
+			MIF_LOG(ERR, "%s: Failed to register interrupt "
+				"callback for listener socket",
+				rte_vdev_device_name(pmd->vdev));
+			return NULL;
+		}
+	}
+
+	return sock;
+
+ error:
+	MIF_LOG(ERR, "%s: Failed to setup socket %s: %s",
+		rte_vdev_device_name(pmd->vdev), key, strerror(errno));
+	if (sock != NULL)
+		rte_free(sock);
+	return NULL;
+}
+
+static inline struct rte_hash *memif_create_socket_hash(void)
+{
+	struct rte_hash_parameters params = { 0 };
+	params.name = MEMIF_SOCKET_HASH_NAME;
+	params.entries = 256;
+	params.key_len = 256;
+	params.hash_func = rte_jhash;
+	params.hash_func_init_val = 0;
+	return rte_hash_create(&params);
+}
+
+int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_socket *socket = NULL;
+	struct memif_socket_pmd_list_elt *elt;
+	int ret;
+	char key[256];
+
+	struct rte_hash *hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
+	if (hash == NULL) {
+		hash = memif_create_socket_hash();
+		if (hash == NULL) {
+			MIF_LOG(ERR, "Failed to create memif socket hash.");
+			return -1;
+		}
+	}
+
+	memset(key, 0, 256);
+	rte_memcpy(key, socket_filename, strlen(socket_filename));
+	ret = rte_hash_lookup_data(hash, key, (void **)&socket);
+	if (ret < 0) {
+		socket = memif_socket_create(pmd, key,
+					     (pmd->role ==
+					      MEMIF_ROLE_SLAVE) ? 0 : 1);
+		if (socket == NULL) {
+			return -1;
+		}
+		ret = rte_hash_add_key_data(hash, key, socket);
+		if (ret < 0) {
+			MIF_LOG(ERR, "Failed to add socket to socket hash.");
+			return ret;
+		}
+	}
+	pmd->socket_filename = socket->filename;
+
+	if ((socket->listener != 0) && (pmd->role == MEMIF_ROLE_SLAVE)) {
+		MIF_LOG(ERR, "Socket is a listener.");
+		return -1;
+	} else if ((socket->listener == 0) && (pmd->role == MEMIF_ROLE_MASTER)) {
+		MIF_LOG(ERR, "Socket is not a listener.");
+		return -1;
+	}
+
+	TAILQ_FOREACH(elt, &socket->pmd_queue, next) {
+		if (elt->pmd->id == pmd->id) {
+			MIF_LOG(ERR, "Memif device with id %d already "
+				"exists on socket %s",
+				pmd->id, socket->filename);
+			return -1;
+		}
+	}
+
+	elt =
+	    rte_malloc("pmd-queue", sizeof(struct memif_socket_pmd_list_elt),
+		       0);
+	if (elt == NULL) {
+		MIF_LOG(ERR, "%s: Failed to add device to socket device list.",
+			rte_vdev_device_name(pmd->vdev));
+		return -1;
+	}
+	elt->pmd = pmd;
+	TAILQ_INSERT_TAIL(&socket->pmd_queue, elt, next);
+
+	return 0;
+}
+
+void memif_socket_remove_device(struct pmd_internals *pmd)
+{
+	struct memif_socket *socket = NULL;
+	struct memif_socket_pmd_list_elt *elt, *next;
+
+	struct rte_hash *hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
+	if (hash == NULL) {
+		return;
+	}
+
+	if (rte_hash_lookup_data(hash, pmd->socket_filename, (void **)&socket) <
+	    0)
+		return;
+
+	for (elt = TAILQ_FIRST(&socket->pmd_queue); elt != NULL; elt = next) {
+		next = TAILQ_NEXT(elt, next);
+		if (elt->pmd == pmd) {
+			TAILQ_REMOVE(&socket->pmd_queue, elt, next);
+			free(elt);
+			pmd->socket_filename = NULL;
+		}
+	}
+
+	/* remove socket, if this was the last device using it */
+	if (TAILQ_EMPTY(&socket->pmd_queue)) {
+		rte_hash_del_key(hash, socket->filename);
+		if (socket->listener) {
+			/* remove listener socket file,
+			 * so we can create new one later.
+			 */
+			remove(socket->filename);
+		}
+		rte_free(socket);
+	}
+}
+
+int memif_connect_master(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	if ((pmd->rx_queues == NULL) || (pmd->tx_queues == NULL) ||
+	    (pmd->socket_filename == NULL)) {
+		MIF_LOG(ERR, "%s: Device not configured!",
+			rte_vdev_device_name(pmd->vdev));
+		return -1;
+	}
+	memset(pmd->local_disc_string, 0, 96);
+	memset(pmd->remote_disc_string, 0, 96);
+	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
+	return 0;
+}
+
+int memif_connect_slave(struct rte_eth_dev *dev)
+{
+	int sockfd;
+	int ret;
+	struct sockaddr_un sun;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	if ((pmd->rx_queues == NULL) || (pmd->tx_queues == NULL) ||
+	    (pmd->socket_filename == NULL)) {
+		MIF_LOG(ERR, "%s: Device not configured!",
+			rte_vdev_device_name(pmd->vdev));
+		return -1;
+	}
+
+	memset(pmd->local_disc_string, 0, 96);
+	memset(pmd->remote_disc_string, 0, 96);
+	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
+
+	sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (sockfd < 0) {
+		MIF_LOG(ERR, "%s: Failed to open socket.",
+			rte_vdev_device_name(pmd->vdev));
+		return -1;
+	}
+
+	sun.sun_family = AF_UNIX;
+
+	strncpy(sun.sun_path, pmd->socket_filename, sizeof(sun.sun_path) - 1);
+
+	ret = connect(sockfd, (struct sockaddr *)&sun,
+		      sizeof(struct sockaddr_un));
+	if (ret < 0) {
+		MIF_LOG(ERR, "%s: Failed to connect socket: %s.",
+			rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
+		goto error;
+	}
+
+	MIF_LOG(DEBUG, "%s: Memif socket: %s connected.",
+		rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
+
+	pmd->cc = rte_zmalloc("memif-cc",
+			      sizeof(struct memif_control_channel), 0);
+	if (pmd->cc == NULL) {
+		MIF_LOG(ERR, "%s: Failed to allocate control channel.",
+			rte_vdev_device_name(pmd->vdev));
+		goto error;
+	}
+
+	pmd->cc->intr_handle.fd = sockfd;
+	pmd->cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
+	pmd->cc->socket = NULL;
+	pmd->cc->pmd = pmd;
+	TAILQ_INIT(&pmd->cc->msg_queue);
+
+	ret = rte_intr_callback_register(&pmd->cc->intr_handle,
+					 memif_intr_handler, pmd->cc);
+	if (ret < 0) {
+		MIF_LOG(ERR, "%s: Failed to register interrupt callback "
+			"for controll fd", rte_vdev_device_name(pmd->vdev));
+		goto error;
+	}
+
+	return 0;
+
+ error:
+	if (sockfd > 0) {
+		close(sockfd);
+		sockfd = -1;
+	}
+	if (pmd->cc != NULL) {
+		rte_free(pmd->cc);
+		pmd->cc = NULL;
+	}
+	return -1;
+}
diff --git a/drivers/net/memif/memif_socket.h b/drivers/net/memif/memif_socket.h
new file mode 100644
index 000000000..f9136dbe5
--- /dev/null
+++ b/drivers/net/memif/memif_socket.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _MEMIF_SOCKET_H_
+#define _MEMIF_SOCKET_H_
+
+#include <sys/queue.h>
+
+/*
+ * Remove device from socket device list. If no device is left on the socket,
+ * remove the socket as well.
+ */
+void memif_socket_remove_device(struct pmd_internals *pmd);
+
+void memif_msg_enq_disconnect(struct memif_control_channel *cc,
+			      const char *reason, int err_code);
+
+int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename);
+
+void memif_disconnect(struct rte_eth_dev *dev);
+
+/* Allow master to receive connection requests. */
+int memif_connect_master(struct rte_eth_dev *dev);
+
+/* Send connection request. */
+int memif_connect_slave(struct rte_eth_dev *dev);
+
+struct memif_socket_pmd_list_elt {
+	TAILQ_ENTRY(memif_socket_pmd_list_elt) next;
+	struct pmd_internals *pmd;
+};
+
+#define MEMIF_SOCKET_HASH_NAME			"memif-sh"
+struct memif_socket {
+	struct rte_intr_handle intr_handle;
+	uint8_t listener;
+	char filename[256];
+
+	 TAILQ_HEAD(, memif_socket_pmd_list_elt) pmd_queue;
+};
+
+/* Control mesage queue. */
+struct memif_msg_queue_elt {
+	TAILQ_ENTRY(memif_msg_queue_elt) next;
+	memif_msg_t msg;
+	int fd;
+};
+
+struct memif_control_channel {
+	struct rte_intr_handle intr_handle;
+	 TAILQ_HEAD(, memif_msg_queue_elt) msg_queue;
+	struct memif_socket *socket;
+	struct pmd_internals *pmd;
+};
+
+#endif				/* MEMIF_SOCKET_H */
diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
new file mode 100644
index 000000000..ea18394fd
--- /dev/null
+++ b/drivers/net/memif/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+
+if host_machine.system() != 'linux'
+        build = false
+endif
+sources = files('rte_eth_memif.c',
+		'memif_socket.c')
diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c
new file mode 100644
index 000000000..7988010c4
--- /dev/null
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -0,0 +1,1172 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/if_ether.h>
+#include <errno.h>
+#include <sys/eventfd.h>
+
+#include <rte_version.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_bus_vdev.h>
+
+#include <rte_eth_memif.h>
+#include <memif_socket.h>
+
+#define ETH_MEMIF_ID_ARG		"id"
+#define ETH_MEMIF_ROLE_ARG		"role"
+#define ETH_MEMIF_BUFFER_SIZE_ARG	"bsize"
+#define ETH_MEMIF_RING_SIZE_ARG		"rsize"
+#define ETH_MEMIF_NRXQ_ARG		"nrxq"
+#define ETH_MEMIF_NTXQ_ARG		"ntxq"
+#define ETH_MEMIF_SOCKET_ARG		"socket"
+#define ETH_MEMIF_MAC_ARG		"mac"
+#define ETH_MEMIF_ZC_ARG		"zero-copy"
+#define ETH_MEMIF_SECRET_ARG		"secret"
+
+static const char *valid_arguments[] = {
+	ETH_MEMIF_ID_ARG,
+	ETH_MEMIF_ROLE_ARG,
+	ETH_MEMIF_BUFFER_SIZE_ARG,
+	ETH_MEMIF_RING_SIZE_ARG,
+	ETH_MEMIF_NRXQ_ARG,
+	ETH_MEMIF_NTXQ_ARG,
+	ETH_MEMIF_SOCKET_ARG,
+	ETH_MEMIF_MAC_ARG,
+	ETH_MEMIF_ZC_ARG,
+	ETH_MEMIF_SECRET_ARG,
+	NULL
+};
+
+static struct rte_vdev_driver pmd_memif_drv;
+
+const char *memif_version(void)
+{
+#define STR_HELP(s)	#s
+#define STR(s)		STR_HELP(s)
+	return ("memif-" STR(MEMIF_VERSION_MAJOR) "." STR(MEMIF_VERSION_MINOR));
+#undef STR
+#undef STR_HELP
+}
+
+static void
+memif_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	dev_info->if_index = pmd->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t) ETH_FRAME_LEN;
+	dev_info->max_rx_queues = (pmd->role == MEMIF_ROLE_SLAVE) ?
+	    pmd->cfg.num_m2s_rings : pmd->cfg.num_s2m_rings;
+	dev_info->max_tx_queues = (pmd->role == MEMIF_ROLE_SLAVE) ?
+	    pmd->cfg.num_s2m_rings : pmd->cfg.num_m2s_rings;
+	dev_info->min_rx_bufsize = 0;
+}
+
+static inline memif_ring_t *memif_get_ring(struct pmd_internals *pmd,
+					   memif_ring_type_t type,
+					   uint16_t ring_num)
+{
+	/* rings only in region 0 */
+	void *p = pmd->regions[0].addr;
+	int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
+	    (1 << pmd->run.log2_ring_size);
+	p += (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
+
+	return (memif_ring_t *) p;
+}
+
+static inline void *memif_get_buffer(struct pmd_internals *pmd,
+				     memif_desc_t * d)
+{
+	return (pmd->regions[d->region].addr + d->offset);
+}
+
+static uint16_t
+eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct memif_queue *mq = queue;
+	struct pmd_internals *pmd = mq->pmd;
+	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
+		return 0;
+	memif_ring_t *ring = mq->ring;
+	if (unlikely(ring == NULL))
+		return 0;
+	uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
+	uint16_t n_rx_pkts = 0;
+	uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
+	    RTE_PKTMBUF_HEADROOM;
+	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
+	memif_ring_type_t type = mq->type;
+	memif_desc_t *d0;
+	struct rte_mbuf *mbuf;
+	struct rte_mbuf *mbuf_head = NULL;
+
+	/* consume interrupt */
+	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
+		uint64_t b;
+		ssize_t size __rte_unused;
+		size = read(mq->intr_handle.fd, &b, sizeof(b));
+	}
+
+	ring_size = 1 << mq->log2_ring_size;
+	mask = ring_size - 1;
+
+	cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail;
+	last_slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
+	if (cur_slot == last_slot)
+		goto refill;
+	n_slots = last_slot - cur_slot;
+
+	while (n_slots && n_rx_pkts < nb_pkts) {
+		mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+		if (unlikely(mbuf_head == NULL))
+			goto no_free_bufs;
+		mbuf = mbuf_head;
+		mbuf->port = mq->in_port;
+
+ next_slot:
+		s0 = cur_slot & mask;
+		d0 = &ring->desc[s0];
+
+		src_len = d0->length;
+		dst_off = 0;
+		src_off = 0;
+
+		do {
+			dst_len = mbuf_size - dst_off;
+			if (dst_len == 0) {
+				dst_off = 0;
+				dst_len = mbuf_size + RTE_PKTMBUF_HEADROOM;
+
+				mbuf = rte_pktmbuf_alloc(mq->mempool);
+				if (unlikely(mbuf == NULL))
+					goto no_free_bufs;
+				mbuf->port = mq->in_port;
+				rte_pktmbuf_chain(mbuf_head, mbuf);
+			}
+			cp_len = memif_min(dst_len, src_len);
+
+			rte_pktmbuf_pkt_len(mbuf) =
+			    rte_pktmbuf_data_len(mbuf) += cp_len;
+
+			memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
+			       memif_get_buffer(pmd, d0) + src_off, cp_len);
+
+			mq->n_bytes += cp_len;
+			src_off += cp_len;
+			dst_off += cp_len;
+			src_len -= cp_len;
+		} while (src_len);
+
+		cur_slot++;
+		n_slots--;
+		if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
+			goto next_slot;
+		}
+
+		*bufs++ = mbuf_head;
+		n_rx_pkts++;
+
+	}
+
+ no_free_bufs:
+	if (type == MEMIF_RING_S2M) {
+		rte_mb();
+		ring->tail = mq->last_head = cur_slot;
+	} else {
+		mq->last_tail = cur_slot;
+	}
+
+ refill:
+	if (type == MEMIF_RING_M2S) {
+		uint16_t head = ring->head;
+		n_slots = ring_size - head + mq->last_tail;
+
+		while (n_slots--) {
+			s0 = head++ & mask;
+			d0 = &ring->desc[s0];
+			d0->length = pmd->run.buffer_size;
+		}
+		rte_mb();
+		ring->head = head;
+	}
+
+	mq->n_pkts += n_rx_pkts;
+	return n_rx_pkts;
+}
+
+static uint16_t
+eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct memif_queue *mq = queue;
+	struct pmd_internals *pmd = mq->pmd;
+	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
+		return 0;
+	memif_ring_t *ring = mq->ring;
+	if (unlikely(ring == NULL))
+		return 0;
+	uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
+	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
+	memif_ring_type_t type = mq->type;
+	memif_desc_t *d0;
+	struct rte_mbuf *mbuf;
+	struct rte_mbuf *mbuf_head;
+
+	ring_size = 1 << mq->log2_ring_size;
+	mask = ring_size - 1;
+
+	n_free = ring->tail - mq->last_tail;
+	mq->last_tail += n_free;
+	slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
+
+	if (type == MEMIF_RING_S2M)
+		n_free = ring_size - ring->head + mq->last_tail;
+	else
+		n_free = ring->head - ring->tail;
+
+	while (n_free && n_tx_pkts < nb_pkts) {
+		mbuf_head = *bufs++;
+		mbuf = mbuf_head;
+
+		saved_slot = slot;
+		d0 = &ring->desc[slot & mask];
+		dst_off = 0;
+		dst_len =
+		    (type ==
+		     MEMIF_RING_S2M) ? pmd->run.buffer_size : d0->length;
+
+ next_in_chain:
+		src_off = 0;
+		src_len = rte_pktmbuf_data_len(mbuf);
+
+		while (src_len) {
+			if (dst_len == 0) {
+				if (n_free) {
+					slot++;
+					n_free--;
+					d0->flags |= MEMIF_DESC_FLAG_NEXT;
+					d0 = &ring->desc[slot & mask];
+					dst_off = 0;
+					dst_len = (type == MEMIF_RING_S2M) ?
+					    pmd->run.buffer_size : d0->length;
+					d0->flags = 0;
+				} else {
+					slot = saved_slot;
+					goto no_free_slots;
+				}
+			}
+			cp_len = memif_min(dst_len, src_len);
+
+			memcpy(memif_get_buffer(pmd, d0) + dst_off,
+			       rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
+			       cp_len);
+
+			mq->n_bytes += cp_len;
+			src_off += cp_len;
+			dst_off += cp_len;
+			src_len -= cp_len;
+			dst_len -= cp_len;
+
+			d0->length = dst_off;
+		}
+
+		if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
+			mbuf = mbuf->next;
+			goto next_in_chain;
+		}
+
+		n_tx_pkts++;
+		slot++;
+		n_free--;
+		rte_pktmbuf_free(mbuf_head);
+	}
+
+ no_free_slots:
+	rte_mb();
+	if (type == MEMIF_RING_S2M)
+		ring->head = slot;
+	else
+		ring->tail = slot;
+
+	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
+		uint64_t a = 1;
+		ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
+		if (unlikely(size < 0)) {
+			MIF_LOG(WARNING,
+				"%s: Failed to send interrupt on qid %ld: %s",
+				rte_vdev_device_name(pmd->vdev),
+				mq - pmd->tx_queues, strerror(errno));
+		}
+	}
+
+	mq->n_err += nb_pkts - n_tx_pkts;
+	mq->n_pkts += n_tx_pkts;
+	return n_tx_pkts;
+}
+
+void memif_free_regions(struct pmd_internals *pmd)
+{
+	int i;
+	struct memif_region *r;
+
+	for (i = 0; i < pmd->regions_num; i++) {
+		r = pmd->regions + i;
+		if (r == NULL)
+			return;
+		if (r->addr == NULL)
+			return;
+		munmap(r->addr, r->region_size);
+		if (r->fd > 0) {
+			close(r->fd);
+			r->fd = -1;
+		}
+	}
+	rte_free(pmd->regions);
+}
+
+static int memif_alloc_regions(struct pmd_internals *pmd, uint8_t brn)
+{
+	struct memif_region *r;
+	char shm_name[32];
+	int i;
+	int ret = 0;
+
+	r = rte_zmalloc("memif_region", sizeof(struct memif_region) * (brn + 1),
+			0);
+	if (r == NULL) {
+		MIF_LOG(ERR, "%s: Failed to allocate regions.",
+			rte_vdev_device_name(pmd->vdev));
+		return -ENOMEM;
+	}
+
+	pmd->regions = r;
+	pmd->regions_num = brn + 1;
+
+	/*
+	 * Create shm for every region. Region 0 is reserved for descriptors.
+	 * Other regions contain buffers.
+	 */
+	for (i = 0; i < (brn + 1); i++) {
+		r = &pmd->regions[i];
+
+		r->buffer_offset = (i == 0) ? (pmd->run.num_s2m_rings +
+					       pmd->run.num_m2s_rings) *
+		    (sizeof(memif_ring_t) +
+		     sizeof(memif_desc_t) * (1 << pmd->run.log2_ring_size)) : 0;
+		r->region_size = (i == 0) ? r->buffer_offset :
+		    (uint32_t) (pmd->run.buffer_size *
+				(1 << pmd->run.log2_ring_size) *
+				(pmd->run.num_s2m_rings +
+				 pmd->run.num_m2s_rings));
+
+		memset(shm_name, 0, sizeof(char) * 32);
+		sprintf(shm_name, "memif region %d", i);
+
+		r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
+		if (r->fd < 0) {
+			MIF_LOG(ERR, "%s: Failed to create shm file: %s.",
+				rte_vdev_device_name(pmd->vdev),
+				strerror(errno));
+			return -1;
+		}
+
+		ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
+		if (ret < 0) {
+			MIF_LOG(ERR, "%s: Failed to add seals to shm file: %s.",
+				rte_vdev_device_name(pmd->vdev),
+				strerror(errno));
+			return -1;
+		}
+
+		ret = ftruncate(r->fd, r->region_size);
+		if (ret < 0) {
+			MIF_LOG(ERR, "%s: Failed to truncate shm file: %s.",
+				rte_vdev_device_name(pmd->vdev),
+				strerror(errno));
+			return -1;
+		}
+
+		r->addr = mmap(NULL, r->region_size, PROT_READ |
+			       PROT_WRITE, MAP_SHARED, r->fd, 0);
+		if (r->addr == NULL) {
+			MIF_LOG(ERR, "%s: Failed to mmap shm region: %s.",
+				rte_vdev_device_name(pmd->vdev),
+				strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static void memif_init_rings(struct pmd_internals *pmd)
+{
+	memif_ring_t *ring;
+	int i, j;
+
+	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+		ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
+		ring->head = ring->tail = 0;
+		ring->cookie = MEMIF_COOKIE;
+		ring->flags = 0;
+		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
+			uint16_t slot = i * (1 << pmd->run.log2_ring_size) + j;
+			ring->desc[j].region = 1;
+			ring->desc[j].offset = pmd->regions[1].buffer_offset +
+			    (uint32_t) (slot * pmd->run.buffer_size);
+			ring->desc[j].length = pmd->run.buffer_size;
+		}
+	}
+
+	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+		ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
+		ring->head = ring->tail = 0;
+		ring->cookie = MEMIF_COOKIE;
+		ring->flags = 0;
+		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
+			uint16_t slot = (i + pmd->run.num_s2m_rings) *
+			    (1 << pmd->run.log2_ring_size) + j;
+			ring->desc[j].region = 1;
+			ring->desc[j].offset = pmd->regions[1].buffer_offset +
+			    (uint32_t) (slot * pmd->run.buffer_size);
+			ring->desc[j].length = pmd->run.buffer_size;
+		}
+	}
+}
+
+static void memif_init_queues(struct pmd_internals *pmd)
+{
+	struct memif_queue *mq;
+	int i;
+
+	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+		mq = &pmd->tx_queues[i];
+		mq->ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
+		mq->log2_ring_size = pmd->run.log2_ring_size;
+		/* queues located only in region 0 */
+		mq->region = 0;
+		mq->offset = (void *)mq->ring - (void *)pmd->regions[0].addr;
+		mq->last_head = mq->last_tail = 0;
+		mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
+		if (mq->intr_handle.fd < 0) {
+			MIF_LOG(WARNING,
+				"%s: Failed to create eventfd for tx queue %d: %s.",
+				rte_vdev_device_name(pmd->vdev), i,
+				strerror(errno));
+		}
+	}
+
+	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+		mq = &pmd->rx_queues[i];
+		mq->ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
+		mq->log2_ring_size = pmd->run.log2_ring_size;
+		/* queues located only in region 0 */
+		mq->region = 0;
+		mq->offset = (void *)mq->ring - (void *)pmd->regions[0].addr;
+		mq->last_head = mq->last_tail = 0;
+		mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
+		if (mq->intr_handle.fd < 0) {
+			MIF_LOG(WARNING,
+				"%s: Failed to create eventfd for rx queue %d: %s.",
+				rte_vdev_device_name(pmd->vdev), i,
+				strerror(errno));
+		}
+	}
+}
+
+int memif_init_regions_and_queues(struct pmd_internals *pmd)
+{
+	int ret;
+
+	ret = memif_alloc_regions(pmd, /* num of buffer regions */ 1);
+	if (ret < 0) {
+		return ret;
+	}
+
+	memif_init_rings(pmd);
+
+	memif_init_queues(pmd);
+
+	return 0;
+}
+
+int memif_connect(struct pmd_internals *pmd)
+{
+	struct rte_eth_dev *eth_dev =
+	    rte_eth_dev_allocated(rte_vdev_device_name(pmd->vdev));
+	struct memif_region *mr;
+	struct memif_queue *mq;
+	int i;
+
+	for (i = 0; i < pmd->regions_num; i++) {
+		mr = pmd->regions + i;
+		if (mr != NULL) {
+			if (mr->addr == NULL) {
+				if (mr->fd < 0)
+					return -1;
+				mr->addr = mmap(NULL, mr->region_size,
+						PROT_READ | PROT_WRITE,
+						MAP_SHARED, mr->fd, 0);
+				if (mr->addr == NULL)
+					return -1;
+			}
+		}
+	}
+
+	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+		    &pmd->tx_queues[i] : &pmd->rx_queues[i];
+		mq->ring = pmd->regions[mq->region].addr + mq->offset;
+		if (mq->ring->cookie != MEMIF_COOKIE) {
+			MIF_LOG(ERR, "%s: Wrong cookie",
+				rte_vdev_device_name(pmd->vdev));
+			return -1;
+		}
+		mq->ring->head = mq->ring->tail = mq->last_head =
+		    mq->last_tail = 0;
+		/* polling mode by default */
+		if (pmd->role == MEMIF_ROLE_MASTER) {
+			mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
+		}
+	}
+	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+		    &pmd->rx_queues[i] : &pmd->tx_queues[i];
+		mq->ring = pmd->regions[mq->region].addr + mq->offset;
+		if (mq->ring->cookie != MEMIF_COOKIE) {
+			MIF_LOG(ERR, "%s: Wrong cookie",
+				rte_vdev_device_name(pmd->vdev));
+			return -1;
+		}
+		mq->ring->head = mq->ring->tail = mq->last_head =
+		    mq->last_tail = 0;
+		/* polling mode by default */
+		if (pmd->role == MEMIF_ROLE_SLAVE) {
+			mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
+		}
+	}
+
+	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
+	pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
+	eth_dev->data->dev_link.link_status = ETH_LINK_UP;
+	MIF_LOG(INFO, "%s: Connected.", rte_vdev_device_name(pmd->vdev));
+	return 0;
+}
+
+static int memif_dev_start(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	int ret = 0;
+
+	switch (pmd->role) {
+	case MEMIF_ROLE_SLAVE:
+		ret = memif_connect_slave(dev);
+		break;
+	case MEMIF_ROLE_MASTER:
+		ret = memif_connect_master(dev);
+		break;
+	default:
+		MIF_LOG(ERR, "%s: Unknown role: %d.",
+			rte_vdev_device_name(pmd->vdev), pmd->role);
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+static int memif_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static int
+memif_tx_queue_setup(struct rte_eth_dev *dev,
+		     uint16_t qid,
+		     uint16_t nb_tx_desc __rte_unused,
+		     unsigned int socket_id __rte_unused,
+		     const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_queue *mq;
+
+	mq = rte_realloc(pmd->tx_queues, sizeof(struct memif_queue) * (qid + 1),
+			 0);
+	if (mq == NULL) {
+		MIF_LOG(ERR, "%s: Failed to alloc tx queue %u.",
+			rte_vdev_device_name(pmd->vdev), qid);
+		return -ENOMEM;
+	}
+
+	pmd->tx_queues = mq;
+
+	mq->type =
+	    (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
+	mq->n_pkts = 0;
+	mq->n_bytes = 0;
+	mq->n_err = 0;
+	mq->intr_handle.fd = -1;
+	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
+	mq->pmd = pmd;
+	dev->data->tx_queues[qid] = mq;
+
+	return 0;
+}
+
+static int
+memif_rx_queue_setup(struct rte_eth_dev *dev,
+		     uint16_t qid,
+		     uint16_t nb_rx_desc __rte_unused,
+		     unsigned int socket_id __rte_unused,
+		     const struct rte_eth_rxconf *rx_conf __rte_unused,
+		     struct rte_mempool *mb_pool)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_queue *mq;
+
+	mq = rte_realloc(pmd->rx_queues, sizeof(struct memif_queue) * (qid + 1),
+			 0);
+	if (mq == NULL) {
+		MIF_LOG(ERR, "%s: Failed to alloc rx queue %u.",
+			rte_vdev_device_name(pmd->vdev), qid);
+		return -ENOMEM;
+	}
+
+	pmd->rx_queues = mq;
+
+	mq->type =
+	    (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
+	mq->n_pkts = 0;
+	mq->n_bytes = 0;
+	mq->n_err = 0;
+	mq->intr_handle.fd = -1;
+	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
+	mq->mempool = mb_pool;
+	mq->in_port = dev->data->port_id;
+	mq->pmd = pmd;
+	dev->data->rx_queues[qid] = mq;
+
+	return 0;
+}
+
+static int
+memif_link_update(struct rte_eth_dev *dev __rte_unused,
+		  int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	struct memif_queue *mq;
+	int i;
+
+	stats->ipackets = 0;
+	stats->ibytes = 0;
+	stats->opackets = 0;
+	stats->obytes = 0;
+	stats->oerrors = 0;
+
+	uint8_t tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings :
+	    pmd->run.num_m2s_rings;
+	uint8_t nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
+	    RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	/* RX stats */
+	for (i = 0; i < nq; i++) {
+		mq = &pmd->rx_queues[i];
+		stats->q_ipackets[i] = mq->n_pkts;
+		stats->q_ibytes[i] = mq->n_bytes;
+		stats->ipackets += mq->n_pkts;
+		stats->ibytes += mq->n_bytes;
+	}
+
+	tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
+	    pmd->run.num_s2m_rings;
+	nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
+	    RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	/* TX stats */
+	for (i = 0; i < nq; i++) {
+		mq = &pmd->tx_queues[i];
+		stats->q_opackets[i] = mq->n_pkts;
+		stats->q_obytes[i] = mq->n_bytes;
+		stats->q_errors[i] = mq->n_err;
+		stats->opackets += mq->n_pkts;
+		stats->obytes += mq->n_bytes;
+		stats->oerrors += mq->n_err;
+	}
+	return 0;
+}
+
+static void memif_stats_reset(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+	int i;
+	struct memif_queue *mq;
+
+	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ? &pmd->tx_queues[i] :
+		    &pmd->rx_queues[i];
+		mq->n_pkts = 0;
+		mq->n_bytes = 0;
+		mq->n_err = 0;
+	}
+	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+		mq = (pmd->role == MEMIF_ROLE_SLAVE) ? &pmd->rx_queues[i] :
+		    &pmd->tx_queues[i];
+		mq->n_pkts = 0;
+		mq->n_bytes = 0;
+		mq->n_err = 0;
+	}
+}
+
+static int
+memif_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
+{
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	MIF_LOG(WARNING, "%s: Interrupt mode not supported.",
+		rte_vdev_device_name(pmd->vdev));
+
+	/* Enable MEMIF interrupts. */
+	/* pmd->rx_queues[qid].ring->flags  &= ~MEMIF_RING_FLAG_MASK_INT; */
+
+	/*
+	 * TODO: Tell dpdk to use interrupt mode.
+	 *
+	 * return rte_intr_enable(&pmd->rx_queues[qid].intr_handle);
+	 */
+	return -1;
+}
+
+static int
+memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
+{
+	struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
+
+	/* Disable MEMIF interrupts. */
+	/* pmd->rx_queues[qid].ring->flags |= MEMIF_RING_FLAG_MASK_INT; */
+
+	/*
+	 * TODO: Tell dpdk to use polling mode.
+	 *
+	 * return rte_intr_disable(&pmd->rx_queues[qid].intr_handle);
+	 */
+	return 0;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start = memif_dev_start,
+	.dev_infos_get = memif_dev_info,
+	.dev_configure = memif_dev_configure,
+	.tx_queue_setup = memif_tx_queue_setup,
+	.rx_queue_setup = memif_rx_queue_setup,
+	.rx_queue_intr_enable = memif_rx_queue_intr_enable,
+	.rx_queue_intr_disable = memif_rx_queue_intr_disable,
+	.link_update = memif_link_update,
+	.stats_get = memif_stats_get,
+	.stats_reset = memif_stats_reset,
+};
+
+static int
+memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
+	     memif_interface_id_t id, uint32_t flags,
+	     const char *socket_filename,
+	     memif_log2_ring_size_t log2_ring_size, uint8_t nrxq,
+	     uint8_t ntxq, uint16_t buffer_size, const char *secret,
+	     const char *eth_addr)
+{
+	int ret = 0;
+	struct rte_eth_dev *eth_dev;
+	struct rte_eth_dev_data *data;
+	struct pmd_internals *pmd;
+	const unsigned int numa_node = vdev->device.numa_node;
+	const char *name = rte_vdev_device_name(vdev);
+
+	if (flags & ETH_MEMIF_FLAG_ZERO_COPY) {
+		MIF_LOG(ERR, "Zero-copy not supported.");
+		return -1;
+	}
+
+	eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
+	if (eth_dev == NULL) {
+		MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
+		return -1;
+	}
+
+	pmd = eth_dev->data->dev_private;
+	memset(pmd, 0, sizeof(*pmd));
+
+	pmd->if_index = id;
+	pmd->vdev = vdev;
+	pmd->id = id;
+	pmd->flags = flags;
+	pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
+	pmd->role = role;
+	ret = memif_socket_init(eth_dev, socket_filename);
+	if (ret < 0)
+		return ret;
+
+	memset(pmd->secret, 0, sizeof(char) * 24);
+	if (secret != NULL)
+		strncpy(pmd->secret, secret,
+			(strlen(secret) >= 24) ? 24 : strlen(secret));
+
+	pmd->cfg.log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
+	if (log2_ring_size != 0)
+		pmd->cfg.log2_ring_size = log2_ring_size;
+	pmd->cfg.num_s2m_rings = ETH_MEMIF_DEFAULT_NRXQ;
+	pmd->cfg.num_m2s_rings = ETH_MEMIF_DEFAULT_NTXQ;
+
+	if (nrxq != 0) {
+		if (role == MEMIF_ROLE_SLAVE)
+			pmd->cfg.num_m2s_rings = nrxq;
+		else
+			pmd->cfg.num_s2m_rings = nrxq;
+	}
+	if (ntxq != 0) {
+		if (role == MEMIF_ROLE_SLAVE)
+			pmd->cfg.num_s2m_rings = ntxq;
+		else
+			pmd->cfg.num_m2s_rings = ntxq;
+	}
+
+	pmd->cfg.buffer_size = ETH_MEMIF_DEFAULT_BUFFER_SIZE;
+	if (buffer_size != 0)
+		pmd->cfg.buffer_size = buffer_size;
+
+	/* FIXME: generate mac? */
+	if (eth_addr == NULL)
+		eth_addr = ETH_MEMIF_DEFAULT_ETH_ADDR;
+
+	sscanf(eth_addr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+	       &pmd->eth_addr.addr_bytes[0], &pmd->eth_addr.addr_bytes[1],
+	       &pmd->eth_addr.addr_bytes[2], &pmd->eth_addr.addr_bytes[3],
+	       &pmd->eth_addr.addr_bytes[4], &pmd->eth_addr.addr_bytes[5]);
+
+	data = eth_dev->data;
+	data->dev_private = pmd;
+	data->numa_node = numa_node;
+	data->mac_addrs = &pmd->eth_addr;
+
+	eth_dev->dev_ops = &ops;
+	eth_dev->device = &vdev->device;
+	eth_dev->rx_pkt_burst = eth_memif_rx;
+	eth_dev->tx_pkt_burst = eth_memif_tx;
+
+	rte_eth_dev_probing_finish(eth_dev);
+
+	return ret;
+}
+
+static int
+memif_set_role(const char *key __rte_unused, const char *value,
+	       void *extra_args)
+{
+	enum memif_role_t *role = (enum memif_role_t *)extra_args;
+	if (strstr(value, "master") != NULL) {
+		*role = MEMIF_ROLE_MASTER;
+	} else if (strstr(value, "slave") != NULL) {
+		*role = MEMIF_ROLE_SLAVE;
+	} else {
+		MIF_LOG(ERR, "Unknown role: %s.", value);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int
+memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	uint32_t *flags = (uint32_t *) extra_args;
+
+	if (strstr(value, "yes") != NULL) {
+		*flags |= ETH_MEMIF_FLAG_ZERO_COPY;
+	} else if (strstr(value, "no") != NULL) {
+		*flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
+	} else {
+		MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int
+memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	memif_interface_id_t *id = (memif_interface_id_t *) extra_args;
+	/* even if parsing fails, 0 is a valid id */
+	*id = strtoul(value, NULL, 10);
+	return 0;
+}
+
+static int
+memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	unsigned long int tmp;
+	uint16_t *buffer_size = (uint16_t *) extra_args;
+
+	tmp = strtoul(value, NULL, 10);
+	if ((tmp == 0) || (tmp > 0xFFFF)) {
+		MIF_LOG(ERR, "Invalid buffer size: %s.", value);
+		return -EINVAL;
+	}
+	*buffer_size = tmp;
+	return 0;
+}
+
+static int
+memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	unsigned long int tmp;
+	memif_log2_ring_size_t *log2_ring_size =
+	    (memif_log2_ring_size_t *) extra_args;
+
+	tmp = strtoul(value, NULL, 10);
+	if ((tmp == 0) || (tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE)) {
+		MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
+			value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
+		return -EINVAL;
+	}
+	*log2_ring_size = tmp;
+	return 0;
+}
+
+static int
+memif_set_nq(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	unsigned long int tmp;
+	uint16_t *nq = (uint16_t *) extra_args;
+
+	tmp = strtoul(value, NULL, 10);
+	if ((tmp == 0) || (tmp > 0xFF)) {
+		MIF_LOG(ERR, "Invalid number of queues: %s.", value);
+		return -EINVAL;
+	}
+	*nq = tmp;
+	return 0;
+}
+
+/* check if directory exists and if we have permission to read/write */
+static inline int memif_check_socket_filename(const char *filename)
+{
+	char *dir = NULL, *tmp;
+	uint32_t idx;
+	int ret = 0;
+
+	tmp = strrchr(filename, '/');
+	if (tmp != NULL) {
+		idx = tmp - filename;
+		dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 2), 0);
+		if (dir == NULL) {
+			MIF_LOG(ERR, "Failed to allocate memory.");
+			return -1;
+		}
+		strncpy(dir, filename, idx);
+	}
+
+	if ((dir == NULL) || (faccessat(-1, dir, F_OK | R_OK |
+					W_OK, AT_EACCESS) < 0)) {
+		MIF_LOG(ERR, "Invalid directory: %s.", dir);
+		ret = -EINVAL;
+	}
+
+	if (dir != NULL)
+		rte_free(dir);
+
+	return ret;
+}
+
+static int rte_pmd_memif_probe(struct rte_vdev_device *vdev)
+{
+	int ret = 0;
+	unsigned int i;
+	struct rte_kvargs *kvlist;
+	const struct rte_kvargs_pair *pair;
+
+	const char *name = rte_vdev_device_name(vdev);
+
+	enum memif_role_t role;
+	memif_interface_id_t id;
+
+	uint16_t buffer_size;
+	memif_log2_ring_size_t log2_ring_size;
+	uint8_t nrxq, ntxq;
+	const char *socket_filename;
+	const char *eth_addr;
+	uint32_t flags;
+	const char *secret;
+
+	MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
+
+	kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
+
+	/* set default values */
+	role = MEMIF_ROLE_SLAVE;
+	flags = 0;
+	id = 0;
+	buffer_size = 2048;
+	log2_ring_size = 10;
+	nrxq = 1;
+	ntxq = 1;
+	socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
+	secret = NULL;
+	eth_addr = NULL;
+
+	/* parse parameters */
+	if (kvlist != NULL) {
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_ROLE_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
+						 &memif_set_role, &role);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_ID_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
+						 &memif_set_id, &id);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_BUFFER_SIZE_ARG) == 1) {
+			ret =
+			    rte_kvargs_process(kvlist,
+					       ETH_MEMIF_BUFFER_SIZE_ARG,
+					       &memif_set_bs, &buffer_size);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_RING_SIZE_ARG) == 1) {
+			ret =
+			    rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
+					       &memif_set_rs, &log2_ring_size);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_NRXQ_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_NRXQ_ARG,
+						 &memif_set_nq, &nrxq);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_NTXQ_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_NTXQ_ARG,
+						 &memif_set_nq, &ntxq);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_SOCKET_ARG) == 1) {
+			for (i = 0; i < kvlist->count; i++) {
+				pair = &kvlist->pairs[i];
+				if (strcmp(pair->key, ETH_MEMIF_SOCKET_ARG) ==
+				    0) {
+					socket_filename = pair->value;
+					ret =
+					    memif_check_socket_filename
+					    (socket_filename);
+					if (ret < 0)
+						goto exit;
+				}
+			}
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_MAC_ARG) == 1) {
+			for (i = 0; i < kvlist->count; i++) {
+				pair = &kvlist->pairs[i];
+				if (strcmp(pair->key, ETH_MEMIF_MAC_ARG) == 0) {
+					eth_addr = pair->value;
+				}
+			}
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_ZC_ARG) == 1) {
+			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
+						 &memif_set_zc, &flags);
+			if (ret < 0)
+				goto exit;
+		}
+		if (rte_kvargs_count(kvlist, ETH_MEMIF_SECRET_ARG) == 1) {
+			for (i = 0; i < kvlist->count; i++) {
+				pair = &kvlist->pairs[i];
+				if (strcmp(pair->key, ETH_MEMIF_SECRET_ARG) ==
+				    0) {
+					secret = pair->value;
+				}
+			}
+		}
+	}
+
+	/* create interface */
+	ret =
+	    memif_create(vdev, role, id, flags, socket_filename, log2_ring_size,
+			 nrxq, ntxq, buffer_size, secret, eth_addr);
+
+ exit:
+	if (kvlist != NULL)
+		rte_kvargs_free(kvlist);
+	return ret;
+}
+
+static int rte_pmd_memif_remove(struct rte_vdev_device *vdev)
+{
+	struct rte_eth_dev *eth_dev;
+
+	eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
+	if (eth_dev == NULL)
+		return 0;
+
+	struct pmd_internals *pmd = eth_dev->data->dev_private;
+
+	memif_msg_enq_disconnect(pmd->cc, "Invalid message size", 0);
+	memif_disconnect(eth_dev);
+
+	memif_socket_remove_device(pmd);
+
+	pmd->vdev = NULL;
+
+	rte_free(eth_dev->data->dev_private);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_memif_drv = {
+	.probe = rte_pmd_memif_probe,
+	.remove = rte_pmd_memif_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
+RTE_PMD_REGISTER_ALIAS(net_memif, eth_memif);
+RTE_PMD_REGISTER_PARAM_STRING(net_memif,
+			      ETH_MEMIF_ID_ARG "=<int>"
+			      ETH_MEMIF_ROLE_ARG "=<string>"
+			      ETH_MEMIF_BUFFER_SIZE_ARG "=<int>"
+			      ETH_MEMIF_RING_SIZE_ARG "=<int>"
+			      ETH_MEMIF_NRXQ_ARG "=<int>"
+			      ETH_MEMIF_NTXQ_ARG "=<int>"
+			      ETH_MEMIF_SOCKET_ARG "=<string>"
+			      ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
+			      ETH_MEMIF_ZC_ARG "=<string>"
+			      ETH_MEMIF_SECRET_ARG "=<string>");
+
+RTE_INIT(memif_init_log)
+{
+	memif_logtype = rte_log_register("pmd.net.memif");
+	if (memif_logtype >= 0)
+		rte_log_set_level(memif_logtype, RTE_LOG_NOTICE);
+}
diff --git a/drivers/net/memif/rte_eth_memif.h b/drivers/net/memif/rte_eth_memif.h
new file mode 100644
index 000000000..bbd79e1a5
--- /dev/null
+++ b/drivers/net/memif/rte_eth_memif.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _RTE_ETH_MEMIF_H_
+#define _RTE_ETH_MEMIF_H_
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif				/* GNU_SOURCE */
+
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_ethdev_driver.h>
+#include <rte_ether.h>
+#include <rte_timer.h>
+#include <rte_interrupts.h>
+
+#include <memif.h>
+
+/* generate mac? */
+#define ETH_MEMIF_DEFAULT_ETH_ADDR		"01:ab:23:cd:45:ef"
+
+#define ETH_MEMIF_DEFAULT_SOCKET_FILENAME	"/tmp/memif.sock"
+#define ETH_MEMIF_DEFAULT_RING_SIZE		10
+#define ETH_MEMIF_DEFAULT_NRXQ			1
+#define ETH_MEMIF_DEFAULT_NTXQ			1
+#define ETH_MEMIF_DEFAULT_BUFFER_SIZE		2048
+
+#define ETH_MEMIF_MAX_NUM_Q_PAIRS		256
+#define ETH_MEMIF_MAX_LOG2_RING_SIZE		14
+#define ETH_MEMIF_MAX_REGION_IDX		255
+
+int memif_logtype;
+
+#define memif_min(a,b) (((a) < (b)) ? (a) : (b))
+
+#define MIF_LOG(level, fmt, args...) \
+do {							\
+	rte_log(RTE_LOG_ ## level, memif_logtype,	\
+		"%s(): " fmt "\n", __func__, ##args);	\
+} while (0)
+
+enum memif_role_t {
+	MEMIF_ROLE_MASTER = 0,
+	MEMIF_ROLE_SLAVE = 1,
+};
+
+/* Shared memory region. */
+struct memif_region {
+	void *addr;
+	memif_region_size_t region_size;
+	int fd;
+	uint32_t buffer_offset;
+};
+
+struct memif_queue {
+	struct rte_mempool *mempool;
+	uint16_t in_port;
+
+	struct pmd_internals *pmd;
+
+	struct rte_intr_handle intr_handle;
+
+	/* ring info */
+	memif_ring_type_t type;
+	memif_ring_t *ring;
+	memif_log2_ring_size_t log2_ring_size;
+
+	memif_region_index_t region;
+	memif_region_offset_t offset;
+
+	uint16_t last_head;
+	uint16_t last_tail;
+	uint32_t *buffers;
+
+	/* rx/tx info */
+	uint64_t n_pkts;
+	uint64_t n_bytes;
+	uint64_t n_err;
+};
+
+struct pmd_internals {
+	int if_index;
+	memif_interface_id_t id;
+	enum memif_role_t role;
+	uint32_t flags;
+#define ETH_MEMIF_FLAG_CONNECTING	(1 << 0)
+#define ETH_MEMIF_FLAG_CONNECTED	(1 << 1)
+#define ETH_MEMIF_FLAG_ZERO_COPY	(1 << 2)
+/* device has not been configured and can not accept connection requests */
+#define ETH_MEMIF_FLAG_DISABLED		(1 << 3)
+
+	struct ether_addr eth_addr;
+	char *socket_filename;
+	char secret[24];
+
+	struct memif_control_channel *cc;
+
+	struct memif_region *regions;
+	uint8_t regions_num;
+
+	struct memif_queue *rx_queues;
+	struct memif_queue *tx_queues;
+
+	/* remote info */
+	char remote_name[64];
+	char remote_if_name[64];
+
+	/* Configured parameters (max values) */
+	struct {
+		memif_log2_ring_size_t log2_ring_size;
+		uint8_t num_s2m_rings;
+		uint8_t num_m2s_rings;
+		uint16_t buffer_size;
+	} cfg;
+
+	/* Parameters used in active connection */
+	struct {
+		memif_log2_ring_size_t log2_ring_size;
+		uint8_t num_s2m_rings;
+		uint8_t num_m2s_rings;
+		uint16_t buffer_size;
+	} run;
+
+	char local_disc_string[96];
+	char remote_disc_string[96];
+
+	/* vdev handle */
+	struct rte_vdev_device *vdev;
+};
+
+void memif_free_regions(struct pmd_internals *pmd);
+
+/*
+ * Finalize connection establishment process. Map shared memory file
+ * (master role), initialize ring queue, set link status up.
+ */
+int memif_connect(struct pmd_internals *pmd);
+
+/*
+ * Create shared memory file and initialize ring queue.
+ * Only called by slave when establishing connection
+ */
+int memif_init_regions_and_queues(struct pmd_internals *pmd);
+
+const char *memif_version(void);
+
+#ifndef MFD_HUGETLB
+#ifndef __NR_memfd_create
+
+#if defined __x86_64__
+#define __NR_memfd_create 319
+#elif defined __arm__
+#define __NR_memfd_create 385
+#elif defined __aarch64__
+#define __NR_memfd_create 279
+#else
+#error "__NR_memfd_create unknown for this architecture"
+#endif
+
+#endif				/* __NR_memfd_create */
+
+static inline int memfd_create(const char *name, unsigned int flags)
+{
+	return syscall(__NR_memfd_create, name, flags);
+}
+#endif				/* MFD_HUGETLB */
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING       0x0002U
+#endif
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL     0x0001	/* prevent further seals from being set */
+#define F_SEAL_SHRINK   0x0002	/* prevent file from shrinking */
+#define F_SEAL_GROW     0x0004	/* prevent file from growing */
+#define F_SEAL_WRITE    0x0008	/* prevent writes */
+#endif
+
+#endif				/* RTE_ETH_MEMIF_H */
diff --git a/drivers/net/memif/rte_pmd_memif_version.map b/drivers/net/memif/rte_pmd_memif_version.map
new file mode 100644
index 000000000..aee560afa
--- /dev/null
+++ b/drivers/net/memif/rte_pmd_memif_version.map
@@ -0,0 +1,4 @@
+DPDK_2.0 {
+
+        local: *;
+};
diff --git a/drivers/net/meson.build b/drivers/net/meson.build
index 980eec233..b0becbf31 100644
--- a/drivers/net/meson.build
+++ b/drivers/net/meson.build
@@ -21,6 +21,7 @@ drivers = ['af_packet',
 	'ixgbe',
 	'kni',
 	'liquidio',
+	'memif',
 	'mlx4',
 	'mlx5',
 	'mvneta',
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 5699d979d..f236c5ebc 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -168,6 +168,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
 endif
 _LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD)        += -lrte_pmd_lio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF)      += -lrte_pmd_memif
 ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -ldl
 else
-- 
2.17.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-10 10:06 ` [dpdk-dev] [RFC v2] " Jakub Grajciar
@ 2018-12-10 10:42   ` Burakov, Anatoly
  2018-12-10 10:50     ` Richardson, Bruce
  2018-12-10 14:48   ` Wiles, Keith
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Burakov, Anatoly @ 2018-12-10 10:42 UTC (permalink / raw)
  To: Jakub Grajciar, dev

On 10-Dec-18 10:06 AM, Jakub Grajciar wrote:
> Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
> ---

As a general comment, some description/cover letter would have been nice.

> +
> +	memif_msg_disconnect_t *d = &e->msg.disconnect;
> +
> +	e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
> +	d->code = err_code;
> +
> +	if (reason != NULL) {
> +		strncpy((char *)d->string, reason, strlen(reason));
> +		if (cc->pmd != NULL) {
> +			strncpy(cc->pmd->local_disc_string, reason,
> +				strlen(reason));
> +		}

I haven't looked at the entire thing, this is just something that caught 
my eye during quick skimming through code.

On the face of it, this looks dangerous - you're setting the destination 
buffer size from source buffer size. What if `d->string` is shorter than 
`reason`?


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-10 10:42   ` Burakov, Anatoly
@ 2018-12-10 10:50     ` Richardson, Bruce
  2018-12-12 10:19       ` Burakov, Anatoly
  0 siblings, 1 reply; 13+ messages in thread
From: Richardson, Bruce @ 2018-12-10 10:50 UTC (permalink / raw)
  To: Burakov, Anatoly, Jakub Grajciar, dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Burakov, Anatoly
> Sent: Monday, December 10, 2018 10:43 AM
> To: Jakub Grajciar <jgrajcia@cisco.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
> 
> On 10-Dec-18 10:06 AM, Jakub Grajciar wrote:
> > Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
> > ---
> 
> As a general comment, some description/cover letter would have been nice.
> 
> > +
> > +	memif_msg_disconnect_t *d = &e->msg.disconnect;
> > +
> > +	e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
> > +	d->code = err_code;
> > +
> > +	if (reason != NULL) {
> > +		strncpy((char *)d->string, reason, strlen(reason));
> > +		if (cc->pmd != NULL) {
> > +			strncpy(cc->pmd->local_disc_string, reason,
> > +				strlen(reason));
> > +		}
> 
> I haven't looked at the entire thing, this is just something that caught
> my eye during quick skimming through code.
> 
> On the face of it, this looks dangerous - you're setting the destination
> buffer size from source buffer size. What if `d->string` is shorter than
> `reason`?
>

And strncpy is dangerous - use strlcpy instead.

/Bruce

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-10 10:06 ` [dpdk-dev] [RFC v2] " Jakub Grajciar
  2018-12-10 10:42   ` Burakov, Anatoly
@ 2018-12-10 14:48   ` Wiles, Keith
  2018-12-10 15:13     ` Wiles, Keith
  2018-12-10 16:20   ` Stephen Hemminger
  2018-12-11  7:39   ` Ananyev, Konstantin
  3 siblings, 1 reply; 13+ messages in thread
From: Wiles, Keith @ 2018-12-10 14:48 UTC (permalink / raw)
  To: Jakub Grajciar; +Cc: dev



> On Dec 10, 2018, at 4:06 AM, Jakub Grajciar <jgrajcia@cisco.com> wrote:

I do not like being the coding style police, but that is most of the comments here and I will try to test this one later this week. Plus I am sure I missed some style problems, if you have not read the coding style for DPDK please have a read.

http://doc.dpdk.org/guides/contributing/coding_style.html

One comment, why did you include all of the code to handle memif instead of including the libmemif.a from VPP. I worry if libmemif is changed then we have a breakage. I do not mind the PMD being standalone and I do like not having the dependence.

As I did not dive into the code much it does look reasonable and I hope to give it a try later this week.
> 
> Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
> ---
> config/common_base                          |    5 +
> config/common_linuxapp                      |    1 +
> drivers/net/Makefile                        |    1 +
> drivers/net/memif/Makefile                  |   29 +
> drivers/net/memif/memif.h                   |  156 +++
> drivers/net/memif/memif_socket.c            | 1085 +++++++++++++++++
> drivers/net/memif/memif_socket.h            |   57 +
> drivers/net/memif/meson.build               |    8 +
> drivers/net/memif/rte_eth_memif.c           | 1172 +++++++++++++++++++
> drivers/net/memif/rte_eth_memif.h           |  189 +++
> drivers/net/memif/rte_pmd_memif_version.map |    4 +
> drivers/net/meson.build                     |    1 +
> mk/rte.app.mk                               |    1 +
> 13 files changed, 2709 insertions(+)
> create mode 100644 drivers/net/memif/Makefile
> create mode 100644 drivers/net/memif/memif.h
> create mode 100644 drivers/net/memif/memif_socket.c
> create mode 100644 drivers/net/memif/memif_socket.h
> create mode 100644 drivers/net/memif/meson.build
> create mode 100644 drivers/net/memif/rte_eth_memif.c
> create mode 100644 drivers/net/memif/rte_eth_memif.h
> create mode 100644 drivers/net/memif/rte_pmd_memif_version.map
> 
> diff --git a/config/common_base b/config/common_base
> index d12ae98bc..b8ed10ae5 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -403,6 +403,11 @@ CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n
> #
> CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n
> 
> +#
> +# Compile Memory Interface PMD driver (Linux only)
> +#
> +CONFIG_RTE_LIBRTE_PMD_MEMIF=n
> +
> #
> # Compile link bonding PMD library
> #
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 6c1c8d0f4..42cbde8f5 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -18,6 +18,7 @@ CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n
> CONFIG_RTE_LIBRTE_PMD_VHOST=y
> CONFIG_RTE_LIBRTE_IFC_PMD=y
> CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
> +CONFIG_RTE_LIBRTE_PMD_MEMIF=y
> CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y
> CONFIG_RTE_LIBRTE_PMD_TAP=y
> CONFIG_RTE_LIBRTE_AVP_PMD=y
> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index c0386feb9..0feab5241 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -32,6 +32,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
> DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
> DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
> DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio
> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif
> DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
> DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
> DIRS-$(CONFIG_RTE_LIBRTE_MVNETA_PMD) += mvneta
> diff --git a/drivers/net/memif/Makefile b/drivers/net/memif/Makefile
> new file mode 100644
> index 000000000..a82448423
> --- /dev/null
> +++ b/drivers/net/memif/Makefile
> @@ -0,0 +1,29 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_pmd_memif.a
> +
> +EXPORT_MAP := rte_pmd_memif_version.map
> +
> +LIBABIVER := 1
> +
> +CFLAGS += -O3
> +CFLAGS += -I$(SRCDIR)
> +CFLAGS += $(WERROR_FLAGS)
> +CFLAGS += -Wno-pointer-arith
> +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
> +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
> +LDLIBS += -lrte_bus_vdev
> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += rte_eth_memif.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif_socket.c
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/memif/memif.h b/drivers/net/memif/memif.h
> new file mode 100644
> index 000000000..6a23dbad7
> --- /dev/null
> +++ b/drivers/net/memif/memif.h
> @@ -0,0 +1,156 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#ifndef _MEMIF_H_
> +#define _MEMIF_H_
> +
> +#ifndef MEMIF_CACHELINE_SIZE
> +#define MEMIF_CACHELINE_SIZE 64
> +#endif
> +
> +#define MEMIF_COOKIE		0x3E31F20
> +#define MEMIF_VERSION_MAJOR	2
> +#define MEMIF_VERSION_MINOR	0
> +#define MEMIF_VERSION		((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR)
> +
> +/*
> + *  Type definitions
> + */
> +
> +typedef enum memif_msg_type {
> +	MEMIF_MSG_TYPE_NONE = 0,
> +	MEMIF_MSG_TYPE_ACK = 1,
> +	MEMIF_MSG_TYPE_HELLO = 2,
> +	MEMIF_MSG_TYPE_INIT = 3,
> +	MEMIF_MSG_TYPE_ADD_REGION = 4,
> +	MEMIF_MSG_TYPE_ADD_RING = 5,
> +	MEMIF_MSG_TYPE_CONNECT = 6,
> +	MEMIF_MSG_TYPE_CONNECTED = 7,
> +	MEMIF_MSG_TYPE_DISCONNECT = 8,
> +} memif_msg_type_t;
> +
> +typedef enum {
> +	MEMIF_RING_S2M = 0,
> +	MEMIF_RING_M2S = 1
> +} memif_ring_type_t;
> +
> +typedef enum {
> +	MEMIF_INTERFACE_MODE_ETHERNET = 0,
> +	MEMIF_INTERFACE_MODE_IP = 1,
> +	MEMIF_INTERFACE_MODE_PUNT_INJECT = 2,
> +} memif_interface_mode_t;
> +
> +typedef uint16_t memif_region_index_t;
> +typedef uint32_t memif_region_offset_t;
> +typedef uint64_t memif_region_size_t;
> +typedef uint16_t memif_ring_index_t;
> +typedef uint32_t memif_interface_id_t;
> +typedef uint16_t memif_version_t;
> +typedef uint8_t memif_log2_ring_size_t;
> +
> +/*
> + *  Socket messages
> + */
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint8_t name[32];
> +	memif_version_t min_version;
> +	memif_version_t max_version;
> +	memif_region_index_t max_region;
> +	memif_ring_index_t max_m2s_ring;
> +	memif_ring_index_t max_s2m_ring;
> +	memif_log2_ring_size_t max_log2_ring_size;
> +} memif_msg_hello_t;

Normally we have comments after the members of a structure for documentation, not sure how everyone else wants it done.

	char foo;	/**< The description of foo */
> +
> +typedef struct __attribute__ ((packed)) {
> +	memif_version_t version;
> +	memif_interface_id_t id;
> +	memif_interface_mode_t mode:8;
> +	uint8_t secret[24];
> +	uint8_t name[32];
> +} memif_msg_init_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	memif_region_index_t index;
> +	memif_region_size_t size;
> +} memif_msg_add_region_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint16_t flags;
> +#define MEMIF_MSG_ADD_RING_FLAG_S2M	(1 << 0)
> +	memif_ring_index_t index;
> +	memif_region_index_t region;
> +	memif_region_offset_t offset;
> +	memif_log2_ring_size_t log2_ring_size;
> +	uint16_t private_hdr_size;	/* used for private metadata */
> +} memif_msg_add_ring_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint8_t if_name[32];
> +} memif_msg_connect_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint8_t if_name[32];
> +} memif_msg_connected_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint32_t code;
> +	uint8_t string[96];
> +} memif_msg_disconnect_t;
> +
> +typedef struct __attribute__ ((packed, aligned(128))) {
> +	memif_msg_type_t type:16;

WE normally use RTE_C11_STD before unions without names a variable name and using the internal names directly.
> +	union {
> +		memif_msg_hello_t hello;
> +		memif_msg_init_t init;
> +		memif_msg_add_region_t add_region;
> +		memif_msg_add_ring_t add_ring;
> +		memif_msg_connect_t connect;
> +		memif_msg_connected_t connected;
> +		memif_msg_disconnect_t disconnect;
> +	};
> +} memif_msg_t;
> +
> +_Static_assert(sizeof(memif_msg_t) == 128, "Size of memif_msg_t must be 128");
> +
> +/*
> + *  Ring and Descriptor Layout
> + */
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint16_t flags;
> +#define MEMIF_DESC_FLAG_NEXT (1 << 0)
> +	memif_region_index_t region;
> +	uint32_t length;
> +	memif_region_offset_t offset;
> +	uint32_t metadata;
> +} memif_desc_t;
> +
> +_Static_assert(sizeof(memif_desc_t) == 16,
> +	       "Size of memif_dsct_t must be 16 bytes");
> +
> +#define MEMIF_CACHELINE_ALIGN_MARK(mark) \
> +  uint8_t mark[0] __attribute__((aligned(MEMIF_CACHELINE_SIZE)))
> +
> +typedef struct {
> +	MEMIF_CACHELINE_ALIGN_MARK(cacheline0);
> +	uint32_t cookie;
> +	uint16_t flags;
> +#define MEMIF_RING_FLAG_MASK_INT 1
> +	volatile uint16_t head;
> +	 MEMIF_CACHELINE_ALIGN_MARK(cacheline1);
> +	volatile uint16_t tail;
> +	 MEMIF_CACHELINE_ALIGN_MARK(cacheline2);
> +	memif_desc_t desc[0];
> +} memif_ring_t;

> +
> +#endif				/* _MEMIF_H_ */
> +
> +/*
> + * fd.io coding-style-patch-verification: ON
> + *
> + * Local Variables:
> + * eval: (c-set-style "gnu")
> + * End:
> + */
> diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
> new file mode 100644
> index 000000000..afd4ac888
> --- /dev/null
> +++ b/drivers/net/memif/memif_socket.c
> @@ -0,0 +1,1085 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#include <stdlib.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/ioctl.h>
> +#include <errno.h>
> +
> +#include <rte_version.h>
> +#include <rte_mbuf.h>
> +#include <rte_ether.h>
> +#include <rte_ethdev_driver.h>
> +#include <rte_ethdev_vdev.h>
> +#include <rte_malloc.h>
> +#include <rte_kvargs.h>
> +#include <rte_bus_vdev.h>
> +#include <rte_hash.h>
> +#include <rte_jhash.h>
> +
> +#include <rte_eth_memif.h>
> +#include <memif_socket.h>
> +
> +static void memif_intr_handler(void *arg);
> +
> +static inline ssize_t memif_msg_send(int fd, memif_msg_t * msg, int afd)

The coding guide states the function type is on a line by itself section 1.7.2
http://doc.dpdk.org/guides/contributing/coding_style.html

> +{
> +	struct msghdr mh = { 0 };
> +	struct iovec iov[1];
> +	char ctl[CMSG_SPACE(sizeof(int))];
> +
> +	iov[0].iov_base = (void *)msg;
> +	iov[0].iov_len = sizeof(memif_msg_t);
> +	mh.msg_iov = iov;
> +	mh.msg_iovlen = 1;
> +
> +	if (afd > 0) {
> +		struct cmsghdr *cmsg;
> +		memset(&ctl, 0, sizeof(ctl));
> +		mh.msg_control = ctl;
> +		mh.msg_controllen = sizeof(ctl);
> +		cmsg = CMSG_FIRSTHDR(&mh);
> +		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
> +		cmsg->cmsg_level = SOL_SOCKET;
> +		cmsg->cmsg_type = SCM_RIGHTS;
> +		rte_memcpy(CMSG_DATA(cmsg), &afd, sizeof(int));
> +	}
> +
> +	return sendmsg(fd, &mh, 0);
> +}
> +
> +static inline int memif_msg_send_from_queue(struct memif_control_channel *cc)
> +{
> +	ssize_t size;
> +	int ret = 0;
> +	struct memif_msg_queue_elt *e;

Space after function declares and the first line of the function, this also includes declares inside a block of code {}
> +	e = TAILQ_FIRST(&cc->msg_queue);
> +	if (e == NULL)
> +		return 0;
> +
> +	size = memif_msg_send(cc->intr_handle.fd, &e->msg, e->fd);
> +	if (size != sizeof(memif_msg_t)) {
> +		MIF_LOG(ERR, "sendmsg fail: %s.", strerror(errno));
> +		ret = -1;
> +	} else {
> +		MIF_LOG(DEBUG, "%s: Sent msg type %u.",
> +			(cc->pmd !=
> +			 NULL) ? rte_vdev_device_name(cc->pmd->
> +						      vdev) : "memif_driver",
> +			e->msg.type);
> +	}
> +	TAILQ_REMOVE(&cc->msg_queue, e, next);
> +	rte_free(e);
> +
> +	return ret;
> +}
> +
> +static inline struct memif_msg_queue_elt *memif_msg_enq(struct
> +							memif_control_channel
> +							*cc)
> +{
> +	struct memif_msg_queue_elt *e = rte_zmalloc("memif_msg",
> +						    sizeof(struct
> +							   memif_msg_queue_elt),
> +						    0);

> +	if (e == NULL) {
> +		MIF_LOG(ERR, "Failed to allocate control message.");
> +		return NULL;
> +	}
> +
> +	e->fd = -1;
> +	TAILQ_INSERT_TAIL(&cc->msg_queue, e, next);
> +
> +	return e;
> +}
> +
> +void
> +memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason,
> +			 int err_code)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(cc);


> +	if (e == NULL) {
> +		MIF_LOG(WARNING, "%s: Failed to enqueue disconnect message.",
> +			(cc->pmd !=
> +			 NULL) ? rte_vdev_device_name(cc->pmd->
> +						      vdev) : "memif_driver");
> +		return;
> +	}
> +
> +	memif_msg_disconnect_t *d = &e->msg.disconnect;
> +
> +	e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
> +	d->code = err_code;
> +
> +	if (reason != NULL) {
> +		strncpy((char *)d->string, reason, strlen(reason));
> +		if (cc->pmd != NULL) {
> +			strncpy(cc->pmd->local_disc_string, reason,
> +				strlen(reason));
> +		}
> +	}
> +}
> +
> +static int memif_msg_enq_hello(struct memif_control_channel *cc)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(cc);


> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_hello_t *h = &e->msg.hello;
> +
> +	e->msg.type = MEMIF_MSG_TYPE_HELLO;
> +	h->min_version = MEMIF_VERSION;
> +	h->max_version = MEMIF_VERSION;
> +	h->max_s2m_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
> +	h->max_m2s_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
> +	h->max_region = ETH_MEMIF_MAX_REGION_IDX;
> +	h->max_log2_ring_size = ETH_MEMIF_MAX_LOG2_RING_SIZE;
> +
> +	strncpy((char *)h->name, rte_version(), strlen(rte_version()));
> +
> +	return 0;
> +}
> +
> +static int memif_msg_receive_hello(struct pmd_internals *pmd, memif_msg_t * msg)
> +{
> +	memif_msg_hello_t *h = &msg->hello;
> +
> +	if (h->min_version > MEMIF_VERSION || h->max_version < MEMIF_VERSION) {
> +		memif_msg_enq_disconnect(pmd->cc, "Incompatible memif version",
> +					 0);
> +		return -1;
> +	}
> +
> +	/* Set parameters for active connection */
> +	pmd->run.num_s2m_rings = memif_min(h->max_s2m_ring + 1,
> +					   pmd->cfg.num_s2m_rings);
> +	pmd->run.num_m2s_rings = memif_min(h->max_m2s_ring + 1,
> +					   pmd->cfg.num_m2s_rings);
> +	pmd->run.log2_ring_size = memif_min(h->max_log2_ring_size,
> +					    pmd->cfg.log2_ring_size);
> +	pmd->run.buffer_size = pmd->cfg.buffer_size;
> +
> +	strncpy(pmd->remote_name, (char *)h->name, strlen((char *)h->name));
> +
> +	MIF_LOG(DEBUG, "%s: Connecting to %s.",
> +		rte_vdev_device_name(pmd->vdev), pmd->remote_name);
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t * msg)
> +{
> +	memif_msg_init_t *i = &msg->init;
> +	struct memif_socket_pmd_list_elt *elt;
> +	struct pmd_internals *pmd;
> +
> +	if (i->version != MEMIF_VERSION) {
> +		memif_msg_enq_disconnect(cc, "Incompatible memif version", 0);
> +		return -1;
> +	}
> +
> +	if (cc->socket == NULL) {
> +		memif_msg_enq_disconnect(cc, "Device error", 0);
> +		return -1;
> +	}
> +
> +	/* Find device with requested ID */
> +	TAILQ_FOREACH(elt, &cc->socket->pmd_queue, next) {
> +		pmd = elt->pmd;
> +		if (((pmd->flags & ETH_MEMIF_FLAG_DISABLED) == 0)
> +		    && (pmd->id == i->id)) {
> +			/* assign control channel to device */
> +			cc->pmd = pmd;
> +			pmd->cc = cc;
> +
> +			if (i->mode != MEMIF_INTERFACE_MODE_ETHERNET) {
> +				memif_msg_enq_disconnect(pmd->cc,
> +							 "Only ethernet mode supported",
> +							 0);
> +				return -1;
> +			}
> +
> +			if (pmd->flags && (ETH_MEMIF_FLAG_CONNECTING |
> +					   ETH_MEMIF_FLAG_CONNECTED)) {
> +				memif_msg_enq_disconnect(pmd->cc,
> +							 "Already connected",
> +							 0);
> +				return -1;
> +			}
> +			strncpy(pmd->remote_name, (char *)i->name,
> +				strlen((char *)i->name));
> +
> +			if (*pmd->secret != '\0') {
> +				if (*i->secret == '\0') {
> +					memif_msg_enq_disconnect(pmd->cc,
> +								 "Secret required",
> +								 0);
> +					return -1;
> +				}
> +				if (strcmp(pmd->secret, (char *)i->secret) != 0) {
> +					memif_msg_enq_disconnect(pmd->cc,
> +								 "Incorrect secret",
> +								 0);
> +					return -1;
> +				}
> +			}
> +
> +			pmd->flags |= ETH_MEMIF_FLAG_CONNECTING;
> +			return 0;
> +		}
> +	}
> +
> +	/* ID not found on this socket */
> +	MIF_LOG(DEBUG, "ID %u not found.", i->id);
> +	memif_msg_enq_disconnect(cc, "ID not found", 0);
> +	return -1;
> +}
> +
> +static int
> +memif_msg_receive_add_region(struct pmd_internals *pmd, memif_msg_t * msg,
> +			     int fd)
> +{
> +	memif_msg_add_region_t *ar = &msg->add_region;
> +
> +	if (fd < 0) {
> +		memif_msg_enq_disconnect(pmd->cc, "Missing region fd", 0);
> +		return -1;
> +	}
> +
> +	struct memif_region *mr;
> +
> +	if (ar->index > ETH_MEMIF_MAX_REGION_IDX) {
> +		memif_msg_enq_disconnect(pmd->cc, "Invalid region index", 0);
> +		return -1;
> +	}
> +
> +	mr = rte_realloc(pmd->regions, sizeof(struct memif_region) *
> +			 (ar->index + 1), 0);
> +	if (mr == NULL) {
> +		memif_msg_enq_disconnect(pmd->cc, "Device error", 0);
> +		return -1;
> +	}
> +
> +	pmd->regions = mr;
> +	pmd->regions[ar->index].fd = fd;
> +	pmd->regions[ar->index].region_size = ar->size;
> +	pmd->regions[ar->index].addr = NULL;
> +	pmd->regions_num++;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_add_ring(struct pmd_internals *pmd, memif_msg_t * msg, int fd)
> +{
> +	memif_msg_add_ring_t *ar = &msg->add_ring;
> +
> +	if (fd < 0) {
> +		memif_msg_enq_disconnect(pmd->cc, "Missing interrupt fd", 0);
> +		return -1;
> +	}
> +
> +	struct memif_queue *mq;
> +
> +	/* check if we have enough queues */
> +	if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) {
> +		if (ar->index >= pmd->cfg.num_s2m_rings) {
> +			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index",
> +						 0);
> +			return -1;
> +		}
> +		pmd->run.num_s2m_rings++;
> +	} else {
> +		if (ar->index >= pmd->cfg.num_m2s_rings) {
> +			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index",
> +						 0);
> +			return -1;
> +		}
> +		pmd->run.num_m2s_rings++;
> +	}
> +
> +	mq = (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) ?
> +	    &pmd->rx_queues[ar->index] : &pmd->tx_queues[ar->index];
> +
> +	mq->intr_handle.fd = fd;
> +	mq->log2_ring_size = ar->log2_ring_size;
> +	mq->region = ar->region;
> +	mq->offset = ar->offset;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_connect(struct pmd_internals *pmd, memif_msg_t * msg)
> +{
> +	memif_msg_connect_t *c = &msg->connect;
> +	int ret;
> +
> +	ret = memif_connect(pmd);
> +	if (ret < 0)
> +		return ret;
> +
> +	strncpy(pmd->remote_if_name, (char *)c->if_name,
> +		strlen((char *)c->if_name));
> +	MIF_LOG(INFO, "%s: Remote interface %s connected.",
> +		rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_connected(struct pmd_internals *pmd, memif_msg_t * msg)
> +{
> +	memif_msg_connected_t *c = &msg->connected;
> +	int ret;
> +
> +	ret = memif_connect(pmd);
> +	if (ret < 0)
> +		return ret;
> +
> +	strncpy(pmd->remote_if_name, (char *)c->if_name,
> +		strlen((char *)c->if_name));
> +	MIF_LOG(INFO, "%s: Remote interface %s connected.",
> +		rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_disconnect(struct pmd_internals *pmd, memif_msg_t * msg)
> +{
> +	memif_msg_disconnect_t *d = &msg->disconnect;
> +
> +	memset(pmd->remote_disc_string, 0, sizeof(pmd->remote_disc_string));
> +	strncpy(pmd->remote_disc_string, (char *)d->string,
> +		strlen((char *)d->string));
> +
> +	MIF_LOG(INFO, "%s: Disconnect received: %s",
> +		rte_vdev_device_name(pmd->vdev), pmd->remote_disc_string);
> +
> +	memset(pmd->local_disc_string, 0, 96);
> +	memif_disconnect(rte_eth_dev_allocated
> +			 (rte_vdev_device_name(pmd->vdev)));
> +	return 0;
> +}
> +
> +static int memif_msg_enq_ack(struct pmd_internals *pmd)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
> +	if (e == NULL)
> +		return -1;
> +
> +	e->msg.type = MEMIF_MSG_TYPE_ACK;
> +
> +	return 0;
> +}
> +
> +static int memif_msg_enq_init(struct pmd_internals *pmd)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_init_t *i = &e->msg.init;
> +
> +	e->msg.type = MEMIF_MSG_TYPE_INIT;
> +	i->version = MEMIF_VERSION;
> +	i->id = pmd->id;
> +	i->mode = MEMIF_INTERFACE_MODE_ETHERNET;
> +
> +	strncpy((char *)i->name, rte_version(), strlen(rte_version()));
> +
> +	if (pmd->secret) {
> +		strncpy((char *)i->secret, pmd->secret, sizeof(i->secret) - 1);
> +	}

DPDK does not put {} around a single line blocks of code for if, while, for, ...

> +
> +	return 0;
> +}
> +
> +static int memif_msg_enq_add_region(struct pmd_internals *pmd, uint8_t idx)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);

> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_add_region_t *ar = &e->msg.add_region;
> +	struct memif_region *mr = &pmd->regions[idx];
> +
> +	e->msg.type = MEMIF_MSG_TYPE_ADD_REGION;
> +	e->fd = mr->fd;
> +	ar->index = idx;
> +	ar->size = mr->region_size;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_enq_add_ring(struct pmd_internals *pmd, uint8_t idx,
> +		       memif_ring_type_t type)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);

> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_add_ring_t *ar = &e->msg.add_ring;
> +	struct memif_queue *mq;
> +
> +	mq = (type == MEMIF_RING_S2M) ? &pmd->tx_queues[idx] :
> +	    &pmd->rx_queues[idx];
> +
> +	e->msg.type = MEMIF_MSG_TYPE_ADD_RING;
> +	e->fd = mq->intr_handle.fd;
> +	ar->index = idx;
> +	ar->offset = mq->offset;
> +	ar->region = mq->region;
> +	ar->log2_ring_size = mq->log2_ring_size;
> +	ar->flags = (type == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0;
> +	ar->private_hdr_size = 0;
> +
> +	return 0;
> +}
> +
> +static int memif_msg_enq_connect(struct pmd_internals *pmd)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);

> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_connect_t *c = &e->msg.connect;
> +	const char *name = rte_vdev_device_name(pmd->vdev);
> +
> +	e->msg.type = MEMIF_MSG_TYPE_CONNECT;
> +	strncpy((char *)c->if_name, name, strlen(name));
> +
> +	return 0;
> +}
> +
> +static int memif_msg_enq_connected(struct pmd_internals *pmd)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);

> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_connected_t *c = &e->msg.connected;
> +
> +	const char *name = rte_vdev_device_name(pmd->vdev);
> +
> +	e->msg.type = MEMIF_MSG_TYPE_CONNECTED;
> +	strncpy((char *)c->if_name, name, strlen(name));
> +
> +	return 0;
> +}
> +
> +static void
> +memif_intr_unregister_handler(struct rte_intr_handle *intr_handle, void *arg)
> +{
> +	struct memif_msg_queue_elt *elt;
> +	struct memif_control_channel *cc = arg;

> +	/* close control channel fd */
> +	close(intr_handle->fd);
> +	/* clear message queue */
> +	while ((elt = TAILQ_FIRST(&cc->msg_queue)) != NULL) {
> +		TAILQ_REMOVE(&cc->msg_queue, elt, next);
> +		free(elt);
> +	}
> +	/* free control channel */
> +	rte_free(cc);
> +}
> +
> +void memif_disconnect(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_msg_queue_elt *elt;
> +	int i;
> +	int ret;
> +
> +	if (pmd->cc != NULL) {
> +		/* Clear control message queue (except disconnect message if any). */
> +		while ((elt = TAILQ_FIRST(&pmd->cc->msg_queue)) != NULL) {
> +			if (elt->msg.type != MEMIF_MSG_TYPE_DISCONNECT) {
> +				TAILQ_REMOVE(&pmd->cc->msg_queue, elt, next);
> +				free(elt);
> +			}
> +		}
> +		/* send disconnect message (if there is any in queue) */
> +		memif_msg_send_from_queue(pmd->cc);
> +
> +		/* at this point, there should be no more messages in queue */
> +		if (TAILQ_FIRST(&pmd->cc->msg_queue) != NULL) {
> +			MIF_LOG(WARNING,
> +				"%s: Unexpected message(s) in message queue.",
> +				rte_vdev_device_name(pmd->vdev));
> +		}

Single line block of code, the MIF_LOG() should have do { stmt; } while(0); around the macro.
> +
> +		if (pmd->cc->intr_handle.fd > 0) {
> +			ret =
> +			    rte_intr_callback_unregister(&pmd->cc->intr_handle,
> +							 memif_intr_handler,
> +							 pmd->cc);
> +			/*
> +			 * If callback is active (disconnecting based on
> +			 * received control message).
> +			 */
> +			if (ret == -EAGAIN) {
> +				ret =
> +				    rte_intr_callback_unregister_pending(&pmd->
> +									 cc->
> +									 intr_handle,
> +									 memif_intr_handler,
> +									 pmd->
> +									 cc,
> +									 memif_intr_unregister_handler);
> +			} else if (ret > 0) {
> +				close(pmd->cc->intr_handle.fd);
> +				rte_free(pmd->cc);
> +			}
> +			if (ret <= 0)
> +				MIF_LOG(WARNING,
> +					"%s: Failed to unregister control channel callback.",
> +					rte_vdev_device_name(pmd->vdev));
> +		}
> +	}
> +
> +	/* unconfig interrupts */
> +	struct memif_queue *mq;
> +	for (i = 0; i < pmd->cfg.num_s2m_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +		    &pmd->tx_queues[i] : &pmd->rx_queues[i];
> +		if (mq->intr_handle.fd > 0) {
> +			rte_intr_disable(&mq->intr_handle);
> +			close(mq->intr_handle.fd);
> +			mq->intr_handle.fd = -1;
> +		}
> +		mq->ring = NULL;
> +	}
> +	for (i = 0; i < pmd->cfg.num_m2s_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +		    &pmd->rx_queues[i] : &pmd->tx_queues[i];
> +		if (mq->intr_handle.fd > 0) {
> +			rte_intr_disable(&mq->intr_handle);
> +			close(mq->intr_handle.fd);
> +			mq->intr_handle.fd = -1;
> +		}
> +		mq->ring = NULL;
> +	}
> +
> +	memif_free_regions(pmd);
> +
> +	dev->data->dev_link.link_status = ETH_LINK_DOWN;
> +	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
> +	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTED;
> +	MIF_LOG(DEBUG, "%s: Disconnected.", rte_vdev_device_name(pmd->vdev));
> +}
> +
> +static int memif_msg_receive(struct memif_control_channel *cc)
> +{
> +	char ctl[CMSG_SPACE(sizeof(int)) +
> +		 CMSG_SPACE(sizeof(struct ucred))] = { 0 };
> +	struct msghdr mh = { 0 };
> +	struct iovec iov[1];
> +	memif_msg_t msg = { 0 };
> +	ssize_t size;
> +	int ret = 0;
> +	struct ucred *cr __rte_unused;
> +	cr = 0;
> +	struct cmsghdr *cmsg;
> +	int afd = -1;
> +	int i;
> +
> +	iov[0].iov_base = (void *)&msg;
> +	iov[0].iov_len = sizeof(memif_msg_t);
> +	mh.msg_iov = iov;
> +	mh.msg_iovlen = 1;
> +	mh.msg_control = ctl;
> +	mh.msg_controllen = sizeof(ctl);
> +
> +	size = recvmsg(cc->intr_handle.fd, &mh, 0);
> +	if (size != sizeof(memif_msg_t)) {
> +		MIF_LOG(DEBUG, "Invalid message size.");
> +		memif_msg_enq_disconnect(cc, "Invalid message size", 0);
> +		return -1;
> +	}
> +	MIF_LOG(DEBUG, "Received msg type: %u.", msg.type);
> +
> +	cmsg = CMSG_FIRSTHDR(&mh);
> +	while (cmsg) {
> +		if (cmsg->cmsg_level == SOL_SOCKET) {
> +			if (cmsg->cmsg_type == SCM_CREDENTIALS) {
> +				cr = (struct ucred *)CMSG_DATA(cmsg);
> +			} else if (cmsg->cmsg_type == SCM_RIGHTS) {
> +				afd = *(int *)CMSG_DATA(cmsg);
> +			}
> +		}
> +		cmsg = CMSG_NXTHDR(&mh, cmsg);
> +	}
> +
> +	if ((cc->pmd == NULL) && msg.type != MEMIF_MSG_TYPE_INIT) {
> +		MIF_LOG(DEBUG, "Unexpected message.");
> +		memif_msg_enq_disconnect(cc, "Unexpected message", 0);
> +		return -1;
> +	}
> +
> +	/* get device from hash data */
> +	switch (msg.type) {
> +	case MEMIF_MSG_TYPE_ACK:
> +		break;
> +	case MEMIF_MSG_TYPE_HELLO:
> +		ret = memif_msg_receive_hello(cc->pmd, &msg);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_init_regions_and_queues(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_init(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		for (i = 0; i < cc->pmd->regions_num; i++) {
> +			ret = memif_msg_enq_add_region(cc->pmd, i);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		for (i = 0; i < cc->pmd->run.num_s2m_rings; i++) {
> +			ret = memif_msg_enq_add_ring(cc->pmd, i,
> +						     MEMIF_RING_S2M);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		for (i = 0; i < cc->pmd->run.num_m2s_rings; i++) {
> +			ret = memif_msg_enq_add_ring(cc->pmd, i,
> +						     MEMIF_RING_M2S);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		ret = memif_msg_enq_connect(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_INIT:
> +		ret = memif_msg_receive_init(cc, &msg);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_ack(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_ADD_REGION:
> +		ret = memif_msg_receive_add_region(cc->pmd, &msg, afd);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_ack(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_ADD_RING:
> +		ret = memif_msg_receive_add_ring(cc->pmd, &msg, afd);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_ack(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_CONNECT:
> +		ret = memif_msg_receive_connect(cc->pmd, &msg);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_connected(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_CONNECTED:
> +		ret = memif_msg_receive_connected(cc->pmd, &msg);
> +		break;
> +	case MEMIF_MSG_TYPE_DISCONNECT:
> +		ret = memif_msg_receive_disconnect(cc->pmd, &msg);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	default:
> +		memif_msg_enq_disconnect(cc, "Unknown message type", 0);
> +		ret = -1;
> +		goto exit;
> +	}
> +
> + exit:
> +	return ret;
> +}
> +
> +static void memif_intr_handler(void *arg)
> +{
> +	struct memif_control_channel *cc = arg;
> +	struct rte_eth_dev *dev;
> +	int ret;
> +
> +	ret = memif_msg_receive(cc);
> +	/* if driver failed to assign device */
> +	if (cc->pmd == NULL) {
> +		ret = rte_intr_callback_unregister_pending(&cc->intr_handle,
> +							   memif_intr_handler,
> +							   cc,
> +							   memif_intr_unregister_handler);
> +		if (ret < 0)
> +			MIF_LOG(WARNING,
> +				"Failed to unregister control channel callback.");
> +		return;
> +	}
> +	/* if memif_msg_receive failed */
> +	if (ret < 0)
> +		goto disconnect;
> +
> +	ret = memif_msg_send_from_queue(cc);
> +	if (ret < 0)
> +		goto disconnect;
> +
> +	return;
> +
> + disconnect:
> +	dev = rte_eth_dev_allocated(rte_vdev_device_name(cc->pmd->vdev));
> +	if (dev == NULL) {
> +		MIF_LOG(WARNING, "%s: eth dev not allocated",
> +			rte_vdev_device_name(cc->pmd->vdev));
> +		return;
> +	}
> +	memif_disconnect(dev);
> +}
> +
> +static void memif_listener_handler(void *arg)
> +{
> +	struct memif_socket *socket = arg;
> +	int sockfd;
> +	int addr_len;
> +	struct sockaddr_un client;
> +	struct memif_control_channel *cc;
> +	int ret;
> +
> +	addr_len = sizeof(client);
> +	sockfd = accept(socket->intr_handle.fd, (struct sockaddr *)&client,
> +			(socklen_t *) & addr_len);
> +	if (sockfd < 0) {
> +		MIF_LOG(ERR,
> +			"Failed to accept connection request on socket fd %d",
> +			socket->intr_handle.fd);
> +		return;
> +	}
> +
> +	MIF_LOG(DEBUG, "%s: Connection request accepted.", socket->filename);
> +
> +	cc = rte_zmalloc("memif-cc", sizeof(struct memif_control_channel), 0);
> +	if (cc == NULL) {
> +		MIF_LOG(ERR, "Failed to allocate control channel.");
> +		goto error;
> +	}
> +
> +	cc->intr_handle.fd = sockfd;
> +	cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +	cc->socket = socket;
> +	cc->pmd = NULL;
> +	TAILQ_INIT(&cc->msg_queue);
> +
> +	ret =
> +	    rte_intr_callback_register(&cc->intr_handle, memif_intr_handler,
> +				       cc);
> +	if (ret < 0) {
> +		MIF_LOG(ERR, "Failed to register control channel callback.");
> +		goto error;
> +	}
> +
> +	ret = memif_msg_enq_hello(cc);
> +	if (ret < 0) {
> +		MIF_LOG(ERR, "Failed to enqueue hello message.");
> +		goto error;
> +	}
> +	ret = memif_msg_send_from_queue(cc);
> +	if (ret < 0)
> +		goto error;
> +
> +	return;
> +
> + error:
> +	if (sockfd > 0) {
> +		close(sockfd);
> +		sockfd = -1;
> +	}
> +	if (cc != NULL) {
> +		rte_free(cc);
> +		cc = NULL;
> +	}
> +}
> +
> +static inline struct memif_socket *memif_socket_create(struct pmd_internals
> +						       *pmd, char *key,
> +						       uint8_t listener)
> +{
> +	struct memif_socket *sock;
> +	struct sockaddr_un un;
> +	int sockfd;
> +	int ret;
> +	int on = 1;
> +
> +	sock = rte_zmalloc("memif-socket", sizeof(struct memif_socket), 0);
> +	if (sock == NULL) {
> +		MIF_LOG(ERR, "Failed to allocate memory for memif socket");
> +		return NULL;
> +	}
> +
> +	sock->listener = listener;
> +	rte_memcpy(sock->filename, key, 256);
> +	TAILQ_INIT(&sock->pmd_queue);
> +
> +	if (listener != 0) {
> +		sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
> +		if (sockfd < 0)
> +			goto error;;
> +
> +		un.sun_family = AF_UNIX;
> +		strncpy((char *)un.sun_path, (char *)sock->filename,
> +			sizeof(un.sun_path) - 1);
> +
> +		ret = setsockopt(sockfd, SOL_SOCKET, SO_PASSCRED, &on,
> +				 sizeof(on));
> +		if (ret < 0) {
> +			goto error;
> +		}
> +		ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> +		if (ret < 0) {
> +			goto error;
> +		}
> +		ret = listen(sockfd, 1);
> +		if (ret < 0) {
> +			goto error;
> +		}

Single line blocks
> +
> +		MIF_LOG(DEBUG, "%s: Memif listener socket %s created.",
> +			rte_vdev_device_name(pmd->vdev), sock->filename);
> +
> +		sock->intr_handle.fd = sockfd;
> +		sock->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +		ret = rte_intr_callback_register(&sock->intr_handle,
> +						 memif_listener_handler, sock);
> +		if (ret < 0) {
> +			MIF_LOG(ERR, "%s: Failed to register interrupt "
> +				"callback for listener socket",
> +				rte_vdev_device_name(pmd->vdev));
> +			return NULL;
> +		}
> +	}
> +
> +	return sock;
> +
> + error:
> +	MIF_LOG(ERR, "%s: Failed to setup socket %s: %s",
> +		rte_vdev_device_name(pmd->vdev), key, strerror(errno));
> +	if (sock != NULL)
> +		rte_free(sock);
> +	return NULL;
> +}
> +
> +static inline struct rte_hash *memif_create_socket_hash(void)
> +{
> +	struct rte_hash_parameters params = { 0 };
> +	params.name = MEMIF_SOCKET_HASH_NAME;
> +	params.entries = 256;
> +	params.key_len = 256;
> +	params.hash_func = rte_jhash;
> +	params.hash_func_init_val = 0;
> +	return rte_hash_create(&params);
> +}
> +
> +int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_socket *socket = NULL;
> +	struct memif_socket_pmd_list_elt *elt;
> +	int ret;
> +	char key[256];
> +
> +	struct rte_hash *hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
> +	if (hash == NULL) {
> +		hash = memif_create_socket_hash();
> +		if (hash == NULL) {
> +			MIF_LOG(ERR, "Failed to create memif socket hash.");
> +			return -1;
> +		}
> +	}
> +
> +	memset(key, 0, 256);
> +	rte_memcpy(key, socket_filename, strlen(socket_filename));
> +	ret = rte_hash_lookup_data(hash, key, (void **)&socket);
> +	if (ret < 0) {
> +		socket = memif_socket_create(pmd, key,
> +					     (pmd->role ==
> +					      MEMIF_ROLE_SLAVE) ? 0 : 1);
> +		if (socket == NULL) {
> +			return -1;
> +		}
> +		ret = rte_hash_add_key_data(hash, key, socket);
> +		if (ret < 0) {
> +			MIF_LOG(ERR, "Failed to add socket to socket hash.");
> +			return ret;
> +		}
> +	}
> +	pmd->socket_filename = socket->filename;
> +
> +	if ((socket->listener != 0) && (pmd->role == MEMIF_ROLE_SLAVE)) {
> +		MIF_LOG(ERR, "Socket is a listener.");
> +		return -1;
> +	} else if ((socket->listener == 0) && (pmd->role == MEMIF_ROLE_MASTER)) {
> +		MIF_LOG(ERR, "Socket is not a listener.");
> +		return -1;
> +	}
> +
> +	TAILQ_FOREACH(elt, &socket->pmd_queue, next) {
> +		if (elt->pmd->id == pmd->id) {
> +			MIF_LOG(ERR, "Memif device with id %d already "
> +				"exists on socket %s",
> +				pmd->id, socket->filename);
> +			return -1;
> +		}
> +	}
> +
> +	elt =
> +	    rte_malloc("pmd-queue", sizeof(struct memif_socket_pmd_list_elt),
> +		       0);
> +	if (elt == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to add device to socket device list.",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -1;
> +	}
> +	elt->pmd = pmd;
> +	TAILQ_INSERT_TAIL(&socket->pmd_queue, elt, next);
> +
> +	return 0;
> +}
> +
> +void memif_socket_remove_device(struct pmd_internals *pmd)
> +{
> +	struct memif_socket *socket = NULL;
> +	struct memif_socket_pmd_list_elt *elt, *next;
> +
> +	struct rte_hash *hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
> +	if (hash == NULL) {
> +		return;
> +	}
> +
> +	if (rte_hash_lookup_data(hash, pmd->socket_filename, (void **)&socket) <
> +	    0)
> +		return;
> +
> +	for (elt = TAILQ_FIRST(&socket->pmd_queue); elt != NULL; elt = next) {
> +		next = TAILQ_NEXT(elt, next);
> +		if (elt->pmd == pmd) {
> +			TAILQ_REMOVE(&socket->pmd_queue, elt, next);
> +			free(elt);
> +			pmd->socket_filename = NULL;
> +		}
> +	}
> +
> +	/* remove socket, if this was the last device using it */
> +	if (TAILQ_EMPTY(&socket->pmd_queue)) {
> +		rte_hash_del_key(hash, socket->filename);
> +		if (socket->listener) {
> +			/* remove listener socket file,
> +			 * so we can create new one later.
> +			 */
> +			remove(socket->filename);
> +		}
> +		rte_free(socket);
> +	}
> +}
> +
> +int memif_connect_master(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	if ((pmd->rx_queues == NULL) || (pmd->tx_queues == NULL) ||
> +	    (pmd->socket_filename == NULL)) {
> +		MIF_LOG(ERR, "%s: Device not configured!",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -1;
> +	}
> +	memset(pmd->local_disc_string, 0, 96);
> +	memset(pmd->remote_disc_string, 0, 96);
> +	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
> +	return 0;
> +}
> +
> +int memif_connect_slave(struct rte_eth_dev *dev)
> +{
> +	int sockfd;
> +	int ret;
> +	struct sockaddr_un sun;
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +
> +	if ((pmd->rx_queues == NULL) || (pmd->tx_queues == NULL) ||
> +	    (pmd->socket_filename == NULL)) {
> +		MIF_LOG(ERR, "%s: Device not configured!",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -1;
> +	}
> +
> +	memset(pmd->local_disc_string, 0, 96);
> +	memset(pmd->remote_disc_string, 0, 96);
> +	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
> +
> +	sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
> +	if (sockfd < 0) {
> +		MIF_LOG(ERR, "%s: Failed to open socket.",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -1;
> +	}
> +
> +	sun.sun_family = AF_UNIX;
> +
> +	strncpy(sun.sun_path, pmd->socket_filename, sizeof(sun.sun_path) - 1);
> +
> +	ret = connect(sockfd, (struct sockaddr *)&sun,
> +		      sizeof(struct sockaddr_un));
> +	if (ret < 0) {
> +		MIF_LOG(ERR, "%s: Failed to connect socket: %s.",
> +			rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
> +		goto error;
> +	}
> +
> +	MIF_LOG(DEBUG, "%s: Memif socket: %s connected.",
> +		rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
> +
> +	pmd->cc = rte_zmalloc("memif-cc",
> +			      sizeof(struct memif_control_channel), 0);
> +	if (pmd->cc == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to allocate control channel.",
> +			rte_vdev_device_name(pmd->vdev));
> +		goto error;
> +	}
> +
> +	pmd->cc->intr_handle.fd = sockfd;
> +	pmd->cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +	pmd->cc->socket = NULL;
> +	pmd->cc->pmd = pmd;
> +	TAILQ_INIT(&pmd->cc->msg_queue);
> +
> +	ret = rte_intr_callback_register(&pmd->cc->intr_handle,
> +					 memif_intr_handler, pmd->cc);
> +	if (ret < 0) {
> +		MIF_LOG(ERR, "%s: Failed to register interrupt callback "
> +			"for controll fd", rte_vdev_device_name(pmd->vdev));
> +		goto error;
> +	}
> +
> +	return 0;
> +
> + error:
> +	if (sockfd > 0) {
> +		close(sockfd);
> +		sockfd = -1;
> +	}
> +	if (pmd->cc != NULL) {
> +		rte_free(pmd->cc);
> +		pmd->cc = NULL;
> +	}
> +	return -1;
> +}
> diff --git a/drivers/net/memif/memif_socket.h b/drivers/net/memif/memif_socket.h
> new file mode 100644
> index 000000000..f9136dbe5
> --- /dev/null
> +++ b/drivers/net/memif/memif_socket.h
> @@ -0,0 +1,57 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#ifndef _MEMIF_SOCKET_H_
> +#define _MEMIF_SOCKET_H_
> +
> +#include <sys/queue.h>
> +
> +/*
> + * Remove device from socket device list. If no device is left on the socket,
> + * remove the socket as well.
> + */
> +void memif_socket_remove_device(struct pmd_internals *pmd);
> +
> +void memif_msg_enq_disconnect(struct memif_control_channel *cc,
> +			      const char *reason, int err_code);
> +
> +int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename);
> +
> +void memif_disconnect(struct rte_eth_dev *dev);
> +
> +/* Allow master to receive connection requests. */
> +int memif_connect_master(struct rte_eth_dev *dev);
> +
> +/* Send connection request. */
> +int memif_connect_slave(struct rte_eth_dev *dev);

We document functions in the header file before the declare using Doxygen formatting

/**
 * One line description
 *
 * More description if needed (optional)
 *
 * @param foo1
 *    Description of argument
 * @param foo2
 *    Description of argument
 * @return (only if function returns something, but could be N/A)
 *    Desciption of return value(s)
 */
int
Function(int foo1, int foo2)
{
	return foo1 + foo2;
}

> +
> +struct memif_socket_pmd_list_elt {
> +	TAILQ_ENTRY(memif_socket_pmd_list_elt) next;
> +	struct pmd_internals *pmd;
> +};
> +
> +#define MEMIF_SOCKET_HASH_NAME			"memif-sh"
> +struct memif_socket {
> +	struct rte_intr_handle intr_handle;
> +	uint8_t listener;
> +	char filename[256];
> +
> +	 TAILQ_HEAD(, memif_socket_pmd_list_elt) pmd_queue;

Odd spacing see below
> +};
> +
> +/* Control mesage queue. */
> +struct memif_msg_queue_elt {
> +	TAILQ_ENTRY(memif_msg_queue_elt) next;
> +	memif_msg_t msg;
> +	int fd;
> +};
> +
> +struct memif_control_channel {
> +	struct rte_intr_handle intr_handle;
> +	 TAILQ_HEAD(, memif_msg_queue_elt) msg_queue;

Odd spacing above, maybe a space and tab problem. We use tabs of 8 first then align with spaces.
> +	struct memif_socket *socket;
> +	struct pmd_internals *pmd;
> +};
> +
> +#endif				/* MEMIF_SOCKET_H */
> diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
> new file mode 100644
> index 000000000..ea18394fd
> --- /dev/null
> +++ b/drivers/net/memif/meson.build
> @@ -0,0 +1,8 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> +
> +if host_machine.system() != 'linux'
> +        build = false
> +endif
> +sources = files('rte_eth_memif.c',
> +		'memif_socket.c')
> diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c
> new file mode 100644
> index 000000000..7988010c4
> --- /dev/null
> +++ b/drivers/net/memif/rte_eth_memif.c
> @@ -0,0 +1,1172 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#include <stdlib.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <linux/if_ether.h>
> +#include <errno.h>
> +#include <sys/eventfd.h>
> +
> +#include <rte_version.h>
> +#include <rte_mbuf.h>
> +#include <rte_ether.h>
> +#include <rte_ethdev_driver.h>
> +#include <rte_ethdev_vdev.h>
> +#include <rte_malloc.h>
> +#include <rte_kvargs.h>
> +#include <rte_bus_vdev.h>
> +
> +#include <rte_eth_memif.h>
> +#include <memif_socket.h>
> +
> +#define ETH_MEMIF_ID_ARG		"id"
> +#define ETH_MEMIF_ROLE_ARG		"role"
> +#define ETH_MEMIF_BUFFER_SIZE_ARG	"bsize"
> +#define ETH_MEMIF_RING_SIZE_ARG		"rsize"
> +#define ETH_MEMIF_NRXQ_ARG		"nrxq"
> +#define ETH_MEMIF_NTXQ_ARG		"ntxq"
> +#define ETH_MEMIF_SOCKET_ARG		"socket"
> +#define ETH_MEMIF_MAC_ARG		"mac"
> +#define ETH_MEMIF_ZC_ARG		"zero-copy"
> +#define ETH_MEMIF_SECRET_ARG		"secret"
> +
> +static const char *valid_arguments[] = {
> +	ETH_MEMIF_ID_ARG,
> +	ETH_MEMIF_ROLE_ARG,
> +	ETH_MEMIF_BUFFER_SIZE_ARG,
> +	ETH_MEMIF_RING_SIZE_ARG,
> +	ETH_MEMIF_NRXQ_ARG,
> +	ETH_MEMIF_NTXQ_ARG,
> +	ETH_MEMIF_SOCKET_ARG,
> +	ETH_MEMIF_MAC_ARG,
> +	ETH_MEMIF_ZC_ARG,
> +	ETH_MEMIF_SECRET_ARG,
> +	NULL
> +};
> +
> +static struct rte_vdev_driver pmd_memif_drv;
> +
> +const char *memif_version(void)
> +{
> +#define STR_HELP(s)	#s
> +#define STR(s)		STR_HELP(s)
> +	return ("memif-" STR(MEMIF_VERSION_MAJOR) "." STR(MEMIF_VERSION_MINOR));
> +#undef STR
> +#undef STR_HELP
> +}
> +
> +static void
> +memif_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +
> +	dev_info->if_index = pmd->if_index;
> +	dev_info->max_mac_addrs = 1;
> +	dev_info->max_rx_pktlen = (uint32_t) ETH_FRAME_LEN;
> +	dev_info->max_rx_queues = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +	    pmd->cfg.num_m2s_rings : pmd->cfg.num_s2m_rings;
> +	dev_info->max_tx_queues = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +	    pmd->cfg.num_s2m_rings : pmd->cfg.num_m2s_rings;
> +	dev_info->min_rx_bufsize = 0;
> +}
> +
> +static inline memif_ring_t *memif_get_ring(struct pmd_internals *pmd,
> +					   memif_ring_type_t type,
> +					   uint16_t ring_num)
> +{
> +	/* rings only in region 0 */
> +	void *p = pmd->regions[0].addr;
> +	int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
> +	    (1 << pmd->run.log2_ring_size);
> +	p += (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
> +
> +	return (memif_ring_t *) p;
> +}
> +
> +static inline void *memif_get_buffer(struct pmd_internals *pmd,
> +				     memif_desc_t * d)
> +{
> +	return (pmd->regions[d->region].addr + d->offset);
> +}
> +
> +static uint16_t
> +eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> +{
> +	struct memif_queue *mq = queue;
> +	struct pmd_internals *pmd = mq->pmd;
> +	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
> +		return 0;
> +	memif_ring_t *ring = mq->ring;
> +	if (unlikely(ring == NULL))
> +		return 0;
> +	uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
> +	uint16_t n_rx_pkts = 0;
> +	uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
> +	    RTE_PKTMBUF_HEADROOM;
> +	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
> +	memif_ring_type_t type = mq->type;
> +	memif_desc_t *d0;
> +	struct rte_mbuf *mbuf;
> +	struct rte_mbuf *mbuf_head = NULL;
> +
> +	/* consume interrupt */
> +	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
> +		uint64_t b;
> +		ssize_t size __rte_unused;
> +		size = read(mq->intr_handle.fd, &b, sizeof(b));
> +	}
> +
> +	ring_size = 1 << mq->log2_ring_size;
> +	mask = ring_size - 1;
> +
> +	cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail;
> +	last_slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
> +	if (cur_slot == last_slot)
> +		goto refill;
> +	n_slots = last_slot - cur_slot;
> +
> +	while (n_slots && n_rx_pkts < nb_pkts) {
> +		mbuf_head = rte_pktmbuf_alloc(mq->mempool);
> +		if (unlikely(mbuf_head == NULL))
> +			goto no_free_bufs;
> +		mbuf = mbuf_head;
> +		mbuf->port = mq->in_port;
> +
> + next_slot:
> +		s0 = cur_slot & mask;
> +		d0 = &ring->desc[s0];
> +
> +		src_len = d0->length;
> +		dst_off = 0;
> +		src_off = 0;
> +
> +		do {
> +			dst_len = mbuf_size - dst_off;
> +			if (dst_len == 0) {
> +				dst_off = 0;
> +				dst_len = mbuf_size + RTE_PKTMBUF_HEADROOM;
> +
> +				mbuf = rte_pktmbuf_alloc(mq->mempool);
> +				if (unlikely(mbuf == NULL))
> +					goto no_free_bufs;
> +				mbuf->port = mq->in_port;
> +				rte_pktmbuf_chain(mbuf_head, mbuf);
> +			}
> +			cp_len = memif_min(dst_len, src_len);
> +
> +			rte_pktmbuf_pkt_len(mbuf) =
> +			    rte_pktmbuf_data_len(mbuf) += cp_len;
> +
> +			memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
> +			       memif_get_buffer(pmd, d0) + src_off, cp_len);
> +
> +			mq->n_bytes += cp_len;
> +			src_off += cp_len;
> +			dst_off += cp_len;
> +			src_len -= cp_len;
> +		} while (src_len);
> +
> +		cur_slot++;
> +		n_slots--;
> +		if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
> +			goto next_slot;
> +		}

Single line block
> +
> +		*bufs++ = mbuf_head;
> +		n_rx_pkts++;
> +
> +	}
> +
> + no_free_bufs:
> +	if (type == MEMIF_RING_S2M) {
> +		rte_mb();
> +		ring->tail = mq->last_head = cur_slot;
> +	} else {
> +		mq->last_tail = cur_slot;
> +	}
> +
 Single line block
> + refill:
> +	if (type == MEMIF_RING_M2S) {
> +		uint16_t head = ring->head;
> +		n_slots = ring_size - head + mq->last_tail;
> +
> +		while (n_slots--) {
> +			s0 = head++ & mask;
> +			d0 = &ring->desc[s0];
> +			d0->length = pmd->run.buffer_size;
> +		}
> +		rte_mb();
> +		ring->head = head;
> +	}
> +
> +	mq->n_pkts += n_rx_pkts;
> +	return n_rx_pkts;
> +}
> +
> +static uint16_t
> +eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> +{
> +	struct memif_queue *mq = queue;
> +	struct pmd_internals *pmd = mq->pmd;
> +	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
> +		return 0;
> +	memif_ring_t *ring = mq->ring;
> +	if (unlikely(ring == NULL))
> +		return 0;
> +	uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
> +	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
> +	memif_ring_type_t type = mq->type;
> +	memif_desc_t *d0;
> +	struct rte_mbuf *mbuf;
> +	struct rte_mbuf *mbuf_head;
> +
> +	ring_size = 1 << mq->log2_ring_size;
> +	mask = ring_size - 1;
> +
> +	n_free = ring->tail - mq->last_tail;
> +	mq->last_tail += n_free;
> +	slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
> +
> +	if (type == MEMIF_RING_S2M)
> +		n_free = ring_size - ring->head + mq->last_tail;
> +	else
> +		n_free = ring->head - ring->tail;
> +
> +	while (n_free && n_tx_pkts < nb_pkts) {
> +		mbuf_head = *bufs++;
> +		mbuf = mbuf_head;
> +
> +		saved_slot = slot;
> +		d0 = &ring->desc[slot & mask];
> +		dst_off = 0;
> +		dst_len =
> +		    (type ==
> +		     MEMIF_RING_S2M) ? pmd->run.buffer_size : d0->length;
> +
> + next_in_chain:
> +		src_off = 0;
> +		src_len = rte_pktmbuf_data_len(mbuf);
> +
> +		while (src_len) {
> +			if (dst_len == 0) {
> +				if (n_free) {
> +					slot++;
> +					n_free--;
> +					d0->flags |= MEMIF_DESC_FLAG_NEXT;
> +					d0 = &ring->desc[slot & mask];
> +					dst_off = 0;
> +					dst_len = (type == MEMIF_RING_S2M) ?
> +					    pmd->run.buffer_size : d0->length;
> +					d0->flags = 0;
> +				} else {
> +					slot = saved_slot;
> +					goto no_free_slots;
> +				}
> +			}
> +			cp_len = memif_min(dst_len, src_len);
> +
> +			memcpy(memif_get_buffer(pmd, d0) + dst_off,
> +			       rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
> +			       cp_len);
> +
> +			mq->n_bytes += cp_len;
> +			src_off += cp_len;
> +			dst_off += cp_len;
> +			src_len -= cp_len;
> +			dst_len -= cp_len;
> +
> +			d0->length = dst_off;
> +		}
> +
> +		if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
> +			mbuf = mbuf->next;
> +			goto next_in_chain;
> +		}
> +
> +		n_tx_pkts++;
> +		slot++;
> +		n_free--;
> +		rte_pktmbuf_free(mbuf_head);
> +	}
> +
> + no_free_slots:
> +	rte_mb();
> +	if (type == MEMIF_RING_S2M)
> +		ring->head = slot;
> +	else
> +		ring->tail = slot;
> +
> +	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
> +		uint64_t a = 1;
> +		ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
> +		if (unlikely(size < 0)) {
> +			MIF_LOG(WARNING,
> +				"%s: Failed to send interrupt on qid %ld: %s",
> +				rte_vdev_device_name(pmd->vdev),
> +				mq - pmd->tx_queues, strerror(errno));
> +		}
> +	}
> +
> +	mq->n_err += nb_pkts - n_tx_pkts;
> +	mq->n_pkts += n_tx_pkts;
> +	return n_tx_pkts;
> +}
> +
> +void memif_free_regions(struct pmd_internals *pmd)
> +{
> +	int i;
> +	struct memif_region *r;
> +
> +	for (i = 0; i < pmd->regions_num; i++) {
> +		r = pmd->regions + i;
> +		if (r == NULL)
> +			return;
> +		if (r->addr == NULL)
> +			return;
> +		munmap(r->addr, r->region_size);
> +		if (r->fd > 0) {
> +			close(r->fd);
> +			r->fd = -1;
> +		}
> +	}
> +	rte_free(pmd->regions);
> +}
> +
> +static int memif_alloc_regions(struct pmd_internals *pmd, uint8_t brn)
> +{
> +	struct memif_region *r;
> +	char shm_name[32];
> +	int i;
> +	int ret = 0;
> +
> +	r = rte_zmalloc("memif_region", sizeof(struct memif_region) * (brn + 1),
> +			0);
> +	if (r == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to allocate regions.",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -ENOMEM;
> +	}
> +
> +	pmd->regions = r;
> +	pmd->regions_num = brn + 1;
> +
> +	/*
> +	 * Create shm for every region. Region 0 is reserved for descriptors.
> +	 * Other regions contain buffers.
> +	 */
> +	for (i = 0; i < (brn + 1); i++) {
> +		r = &pmd->regions[i];
> +
> +		r->buffer_offset = (i == 0) ? (pmd->run.num_s2m_rings +
> +					       pmd->run.num_m2s_rings) *
> +		    (sizeof(memif_ring_t) +
> +		     sizeof(memif_desc_t) * (1 << pmd->run.log2_ring_size)) : 0;
> +		r->region_size = (i == 0) ? r->buffer_offset :
> +		    (uint32_t) (pmd->run.buffer_size *
> +				(1 << pmd->run.log2_ring_size) *
> +				(pmd->run.num_s2m_rings +
> +				 pmd->run.num_m2s_rings));
> +
> +		memset(shm_name, 0, sizeof(char) * 32);
> +		sprintf(shm_name, "memif region %d", i);
> +
> +		r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
> +		if (r->fd < 0) {
> +			MIF_LOG(ERR, "%s: Failed to create shm file: %s.",
> +				rte_vdev_device_name(pmd->vdev),
> +				strerror(errno));
> +			return -1;
> +		}
> +
> +		ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
> +		if (ret < 0) {
> +			MIF_LOG(ERR, "%s: Failed to add seals to shm file: %s.",
> +				rte_vdev_device_name(pmd->vdev),
> +				strerror(errno));
> +			return -1;
> +		}
> +
> +		ret = ftruncate(r->fd, r->region_size);
> +		if (ret < 0) {
> +			MIF_LOG(ERR, "%s: Failed to truncate shm file: %s.",
> +				rte_vdev_device_name(pmd->vdev),
> +				strerror(errno));
> +			return -1;
> +		}
> +
> +		r->addr = mmap(NULL, r->region_size, PROT_READ |
> +			       PROT_WRITE, MAP_SHARED, r->fd, 0);
> +		if (r->addr == NULL) {
> +			MIF_LOG(ERR, "%s: Failed to mmap shm region: %s.",
> +				rte_vdev_device_name(pmd->vdev),
> +				strerror(errno));
> +			return -1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void memif_init_rings(struct pmd_internals *pmd)
> +{
> +	memif_ring_t *ring;
> +	int i, j;
> +
> +	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
> +		ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
> +		ring->head = ring->tail = 0;
> +		ring->cookie = MEMIF_COOKIE;
> +		ring->flags = 0;
> +		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
> +			uint16_t slot = i * (1 << pmd->run.log2_ring_size) + j;
> +			ring->desc[j].region = 1;
> +			ring->desc[j].offset = pmd->regions[1].buffer_offset +
> +			    (uint32_t) (slot * pmd->run.buffer_size);
> +			ring->desc[j].length = pmd->run.buffer_size;
> +		}
> +	}
> +
> +	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
> +		ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
> +		ring->head = ring->tail = 0;
> +		ring->cookie = MEMIF_COOKIE;
> +		ring->flags = 0;
> +		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
> +			uint16_t slot = (i + pmd->run.num_s2m_rings) *
> +			    (1 << pmd->run.log2_ring_size) + j;
> +			ring->desc[j].region = 1;
> +			ring->desc[j].offset = pmd->regions[1].buffer_offset +
> +			    (uint32_t) (slot * pmd->run.buffer_size);
> +			ring->desc[j].length = pmd->run.buffer_size;
> +		}
> +	}
> +}
> +
> +static void memif_init_queues(struct pmd_internals *pmd)
> +{
> +	struct memif_queue *mq;
> +	int i;
> +
> +	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
> +		mq = &pmd->tx_queues[i];
> +		mq->ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
> +		mq->log2_ring_size = pmd->run.log2_ring_size;
> +		/* queues located only in region 0 */
> +		mq->region = 0;
> +		mq->offset = (void *)mq->ring - (void *)pmd->regions[0].addr;
> +		mq->last_head = mq->last_tail = 0;
> +		mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
> +		if (mq->intr_handle.fd < 0) {
> +			MIF_LOG(WARNING,
> +				"%s: Failed to create eventfd for tx queue %d: %s.",
> +				rte_vdev_device_name(pmd->vdev), i,
> +				strerror(errno));
> +		}
> +	}
> +
> +	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
> +		mq = &pmd->rx_queues[i];
> +		mq->ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
> +		mq->log2_ring_size = pmd->run.log2_ring_size;
> +		/* queues located only in region 0 */
> +		mq->region = 0;
> +		mq->offset = (void *)mq->ring - (void *)pmd->regions[0].addr;
> +		mq->last_head = mq->last_tail = 0;
> +		mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
> +		if (mq->intr_handle.fd < 0) {
> +			MIF_LOG(WARNING,
> +				"%s: Failed to create eventfd for rx queue %d: %s.",
> +				rte_vdev_device_name(pmd->vdev), i,
> +				strerror(errno));
> +		}
> +	}
> +}
> +
> +int memif_init_regions_and_queues(struct pmd_internals *pmd)
> +{
> +	int ret;
> +
> +	ret = memif_alloc_regions(pmd, /* num of buffer regions */ 1);
> +	if (ret < 0) {
> +		return ret;
> +	}

Single line block
> +
> +	memif_init_rings(pmd);
> +
> +	memif_init_queues(pmd);
> +
> +	return 0;
> +}
> +
> +int memif_connect(struct pmd_internals *pmd)
> +{
> +	struct rte_eth_dev *eth_dev =
> +	    rte_eth_dev_allocated(rte_vdev_device_name(pmd->vdev));
> +	struct memif_region *mr;
> +	struct memif_queue *mq;
> +	int i;
> +
> +	for (i = 0; i < pmd->regions_num; i++) {
> +		mr = pmd->regions + i;
> +		if (mr != NULL) {
> +			if (mr->addr == NULL) {
> +				if (mr->fd < 0)
> +					return -1;
> +				mr->addr = mmap(NULL, mr->region_size,
> +						PROT_READ | PROT_WRITE,
> +						MAP_SHARED, mr->fd, 0);
> +				if (mr->addr == NULL)
> +					return -1;
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +		    &pmd->tx_queues[i] : &pmd->rx_queues[i];
> +		mq->ring = pmd->regions[mq->region].addr + mq->offset;
> +		if (mq->ring->cookie != MEMIF_COOKIE) {
> +			MIF_LOG(ERR, "%s: Wrong cookie",
> +				rte_vdev_device_name(pmd->vdev));
> +			return -1;
> +		}
> +		mq->ring->head = mq->ring->tail = mq->last_head =
> +		    mq->last_tail = 0;
> +		/* polling mode by default */
> +		if (pmd->role == MEMIF_ROLE_MASTER) {
> +			mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
> +		}
> +	}
> +	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +		    &pmd->rx_queues[i] : &pmd->tx_queues[i];
> +		mq->ring = pmd->regions[mq->region].addr + mq->offset;
> +		if (mq->ring->cookie != MEMIF_COOKIE) {
> +			MIF_LOG(ERR, "%s: Wrong cookie",
> +				rte_vdev_device_name(pmd->vdev));
> +			return -1;
> +		}
> +		mq->ring->head = mq->ring->tail = mq->last_head =
> +		    mq->last_tail = 0;
> +		/* polling mode by default */
> +		if (pmd->role == MEMIF_ROLE_SLAVE) {
> +			mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
> +		}
> +	}
> +
> +	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
> +	pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
> +	eth_dev->data->dev_link.link_status = ETH_LINK_UP;
> +	MIF_LOG(INFO, "%s: Connected.", rte_vdev_device_name(pmd->vdev));
> +	return 0;
> +}
> +
> +static int memif_dev_start(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	int ret = 0;
> +
> +	switch (pmd->role) {
> +	case MEMIF_ROLE_SLAVE:
> +		ret = memif_connect_slave(dev);
> +		break;
> +	case MEMIF_ROLE_MASTER:
> +		ret = memif_connect_master(dev);
> +		break;
> +	default:
> +		MIF_LOG(ERR, "%s: Unknown role: %d.",
> +			rte_vdev_device_name(pmd->vdev), pmd->role);
> +		ret = -1;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +static int memif_dev_configure(struct rte_eth_dev *dev __rte_unused)
> +{
> +	return 0;
> +}
> +
> +static int
> +memif_tx_queue_setup(struct rte_eth_dev *dev,
> +		     uint16_t qid,
> +		     uint16_t nb_tx_desc __rte_unused,
> +		     unsigned int socket_id __rte_unused,
> +		     const struct rte_eth_txconf *tx_conf __rte_unused)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_queue *mq;
> +
> +	mq = rte_realloc(pmd->tx_queues, sizeof(struct memif_queue) * (qid + 1),
> +			 0);
> +	if (mq == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to alloc tx queue %u.",
> +			rte_vdev_device_name(pmd->vdev), qid);
> +		return -ENOMEM;
> +	}
> +
> +	pmd->tx_queues = mq;
> +
> +	mq->type =
> +	    (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
> +	mq->n_pkts = 0;
> +	mq->n_bytes = 0;
> +	mq->n_err = 0;
> +	mq->intr_handle.fd = -1;
> +	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +	mq->pmd = pmd;
> +	dev->data->tx_queues[qid] = mq;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_rx_queue_setup(struct rte_eth_dev *dev,
> +		     uint16_t qid,
> +		     uint16_t nb_rx_desc __rte_unused,
> +		     unsigned int socket_id __rte_unused,
> +		     const struct rte_eth_rxconf *rx_conf __rte_unused,
> +		     struct rte_mempool *mb_pool)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_queue *mq;
> +
> +	mq = rte_realloc(pmd->rx_queues, sizeof(struct memif_queue) * (qid + 1),
> +			 0);
> +	if (mq == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to alloc rx queue %u.",
> +			rte_vdev_device_name(pmd->vdev), qid);
> +		return -ENOMEM;
> +	}
> +
> +	pmd->rx_queues = mq;
> +
> +	mq->type =
> +	    (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
> +	mq->n_pkts = 0;
> +	mq->n_bytes = 0;
> +	mq->n_err = 0;
> +	mq->intr_handle.fd = -1;
> +	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +	mq->mempool = mb_pool;
> +	mq->in_port = dev->data->port_id;
> +	mq->pmd = pmd;
> +	dev->data->rx_queues[qid] = mq;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_link_update(struct rte_eth_dev *dev __rte_unused,
> +		  int wait_to_complete __rte_unused)
> +{
> +	return 0;
> +}
> +
> +static int memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_queue *mq;
> +	int i;
> +
> +	stats->ipackets = 0;
> +	stats->ibytes = 0;
> +	stats->opackets = 0;
> +	stats->obytes = 0;
> +	stats->oerrors = 0;
> +
> +	uint8_t tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings :
> +	    pmd->run.num_m2s_rings;
> +	uint8_t nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
> +	    RTE_ETHDEV_QUEUE_STAT_CNTRS;
> +
> +	/* RX stats */
> +	for (i = 0; i < nq; i++) {
> +		mq = &pmd->rx_queues[i];
> +		stats->q_ipackets[i] = mq->n_pkts;
> +		stats->q_ibytes[i] = mq->n_bytes;
> +		stats->ipackets += mq->n_pkts;
> +		stats->ibytes += mq->n_bytes;
> +	}
> +
> +	tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
> +	    pmd->run.num_s2m_rings;
> +	nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
> +	    RTE_ETHDEV_QUEUE_STAT_CNTRS;
> +
> +	/* TX stats */
> +	for (i = 0; i < nq; i++) {
> +		mq = &pmd->tx_queues[i];
> +		stats->q_opackets[i] = mq->n_pkts;
> +		stats->q_obytes[i] = mq->n_bytes;
> +		stats->q_errors[i] = mq->n_err;
> +		stats->opackets += mq->n_pkts;
> +		stats->obytes += mq->n_bytes;
> +		stats->oerrors += mq->n_err;
> +	}
> +	return 0;
> +}
> +
> +static void memif_stats_reset(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	int i;
> +	struct memif_queue *mq;
> +
> +	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ? &pmd->tx_queues[i] :
> +		    &pmd->rx_queues[i];
> +		mq->n_pkts = 0;
> +		mq->n_bytes = 0;
> +		mq->n_err = 0;
> +	}
> +	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ? &pmd->rx_queues[i] :
> +		    &pmd->tx_queues[i];
> +		mq->n_pkts = 0;
> +		mq->n_bytes = 0;
> +		mq->n_err = 0;
> +	}
> +}
> +
> +static int
> +memif_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +
> +	MIF_LOG(WARNING, "%s: Interrupt mode not supported.",
> +		rte_vdev_device_name(pmd->vdev));
> +
> +	/* Enable MEMIF interrupts. */
> +	/* pmd->rx_queues[qid].ring->flags  &= ~MEMIF_RING_FLAG_MASK_INT; */
> +
> +	/*
> +	 * TODO: Tell dpdk to use interrupt mode.
> +	 *
> +	 * return rte_intr_enable(&pmd->rx_queues[qid].intr_handle);
> +	 */
> +	return -1;
> +}
> +
> +static int
> +memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
> +{
> +	struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
> +
> +	/* Disable MEMIF interrupts. */
> +	/* pmd->rx_queues[qid].ring->flags |= MEMIF_RING_FLAG_MASK_INT; */
> +
> +	/*
> +	 * TODO: Tell dpdk to use polling mode.
> +	 *
> +	 * return rte_intr_disable(&pmd->rx_queues[qid].intr_handle);
> +	 */
> +	return 0;
> +}
> +
> +static const struct eth_dev_ops ops = {
> +	.dev_start = memif_dev_start,
> +	.dev_infos_get = memif_dev_info,
> +	.dev_configure = memif_dev_configure,
> +	.tx_queue_setup = memif_tx_queue_setup,
> +	.rx_queue_setup = memif_rx_queue_setup,
> +	.rx_queue_intr_enable = memif_rx_queue_intr_enable,
> +	.rx_queue_intr_disable = memif_rx_queue_intr_disable,
> +	.link_update = memif_link_update,
> +	.stats_get = memif_stats_get,
> +	.stats_reset = memif_stats_reset,
> +};
> +
> +static int
> +memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
> +	     memif_interface_id_t id, uint32_t flags,
> +	     const char *socket_filename,
> +	     memif_log2_ring_size_t log2_ring_size, uint8_t nrxq,
> +	     uint8_t ntxq, uint16_t buffer_size, const char *secret,
> +	     const char *eth_addr)
> +{
> +	int ret = 0;
> +	struct rte_eth_dev *eth_dev;
> +	struct rte_eth_dev_data *data;
> +	struct pmd_internals *pmd;
> +	const unsigned int numa_node = vdev->device.numa_node;
> +	const char *name = rte_vdev_device_name(vdev);
> +
> +	if (flags & ETH_MEMIF_FLAG_ZERO_COPY) {
> +		MIF_LOG(ERR, "Zero-copy not supported.");
> +		return -1;
> +	}
> +
> +	eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
> +	if (eth_dev == NULL) {
> +		MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
> +		return -1;
> +	}
> +
> +	pmd = eth_dev->data->dev_private;
> +	memset(pmd, 0, sizeof(*pmd));
> +
> +	pmd->if_index = id;
> +	pmd->vdev = vdev;
> +	pmd->id = id;
> +	pmd->flags = flags;
> +	pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
> +	pmd->role = role;
> +	ret = memif_socket_init(eth_dev, socket_filename);
> +	if (ret < 0)
> +		return ret;
> +
> +	memset(pmd->secret, 0, sizeof(char) * 24);
> +	if (secret != NULL)
> +		strncpy(pmd->secret, secret,
> +			(strlen(secret) >= 24) ? 24 : strlen(secret));
> +
> +	pmd->cfg.log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
> +	if (log2_ring_size != 0)
> +		pmd->cfg.log2_ring_size = log2_ring_size;
> +	pmd->cfg.num_s2m_rings = ETH_MEMIF_DEFAULT_NRXQ;
> +	pmd->cfg.num_m2s_rings = ETH_MEMIF_DEFAULT_NTXQ;
> +
> +	if (nrxq != 0) {
> +		if (role == MEMIF_ROLE_SLAVE)
> +			pmd->cfg.num_m2s_rings = nrxq;
> +		else
> +			pmd->cfg.num_s2m_rings = nrxq;
> +	}
> +	if (ntxq != 0) {
> +		if (role == MEMIF_ROLE_SLAVE)
> +			pmd->cfg.num_s2m_rings = ntxq;
> +		else
> +			pmd->cfg.num_m2s_rings = ntxq;
> +	}
> +
> +	pmd->cfg.buffer_size = ETH_MEMIF_DEFAULT_BUFFER_SIZE;
> +	if (buffer_size != 0)
> +		pmd->cfg.buffer_size = buffer_size;
> +
> +	/* FIXME: generate mac? */
> +	if (eth_addr == NULL)
> +		eth_addr = ETH_MEMIF_DEFAULT_ETH_ADDR;
> +
> +	sscanf(eth_addr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
> +	       &pmd->eth_addr.addr_bytes[0], &pmd->eth_addr.addr_bytes[1],
> +	       &pmd->eth_addr.addr_bytes[2], &pmd->eth_addr.addr_bytes[3],
> +	       &pmd->eth_addr.addr_bytes[4], &pmd->eth_addr.addr_bytes[5]);
> +
> +	data = eth_dev->data;
> +	data->dev_private = pmd;
> +	data->numa_node = numa_node;
> +	data->mac_addrs = &pmd->eth_addr;
> +
> +	eth_dev->dev_ops = &ops;
> +	eth_dev->device = &vdev->device;
> +	eth_dev->rx_pkt_burst = eth_memif_rx;
> +	eth_dev->tx_pkt_burst = eth_memif_tx;
> +
> +	rte_eth_dev_probing_finish(eth_dev);
> +
> +	return ret;
> +}
> +
> +static int
> +memif_set_role(const char *key __rte_unused, const char *value,
> +	       void *extra_args)
> +{
> +	enum memif_role_t *role = (enum memif_role_t *)extra_args;
> +	if (strstr(value, "master") != NULL) {
> +		*role = MEMIF_ROLE_MASTER;
> +	} else if (strstr(value, "slave") != NULL) {
> +		*role = MEMIF_ROLE_SLAVE;

Single line blocks
> +	} else {
> +		MIF_LOG(ERR, "Unknown role: %s.", value);
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
> +static int
> +memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	uint32_t *flags = (uint32_t *) extra_args;
> +
> +	if (strstr(value, "yes") != NULL) {
> +		*flags |= ETH_MEMIF_FLAG_ZERO_COPY;
> +	} else if (strstr(value, "no") != NULL) {
> +		*flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;

Single line blocks.
> +	} else {
> +		MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
> +static int
> +memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	memif_interface_id_t *id = (memif_interface_id_t *) extra_args;
> +	/* even if parsing fails, 0 is a valid id */
> +	*id = strtoul(value, NULL, 10);
> +	return 0;
> +}
> +
> +static int
> +memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	unsigned long int tmp;
> +	uint16_t *buffer_size = (uint16_t *) extra_args;
> +
> +	tmp = strtoul(value, NULL, 10);
> +	if ((tmp == 0) || (tmp > 0xFFFF)) {
> +		MIF_LOG(ERR, "Invalid buffer size: %s.", value);
> +		return -EINVAL;
> +	}
> +	*buffer_size = tmp;
> +	return 0;
> +}
> +
> +static int
> +memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	unsigned long int tmp;
> +	memif_log2_ring_size_t *log2_ring_size =
> +	    (memif_log2_ring_size_t *) extra_args;
> +
> +	tmp = strtoul(value, NULL, 10);
> +	if ((tmp == 0) || (tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE)) {
> +		MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
> +			value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
> +		return -EINVAL;
> +	}
> +	*log2_ring_size = tmp;
> +	return 0;
> +}
> +
> +static int
> +memif_set_nq(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	unsigned long int tmp;
> +	uint16_t *nq = (uint16_t *) extra_args;
> +
> +	tmp = strtoul(value, NULL, 10);
> +	if ((tmp == 0) || (tmp > 0xFF)) {
> +		MIF_LOG(ERR, "Invalid number of queues: %s.", value);
> +		return -EINVAL;
> +	}
> +	*nq = tmp;
> +	return 0;
> +}
> +
> +/* check if directory exists and if we have permission to read/write */
> +static inline int memif_check_socket_filename(const char *filename)
> +{
> +	char *dir = NULL, *tmp;
> +	uint32_t idx;
> +	int ret = 0;
> +
> +	tmp = strrchr(filename, '/');
> +	if (tmp != NULL) {
> +		idx = tmp - filename;
> +		dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 2), 0);
> +		if (dir == NULL) {
> +			MIF_LOG(ERR, "Failed to allocate memory.");
> +			return -1;
> +		}
> +		strncpy(dir, filename, idx);
> +	}
> +
> +	if ((dir == NULL) || (faccessat(-1, dir, F_OK | R_OK |
> +					W_OK, AT_EACCESS) < 0)) {
> +		MIF_LOG(ERR, "Invalid directory: %s.", dir);
> +		ret = -EINVAL;
> +	}
> +
> +	if (dir != NULL)
> +		rte_free(dir);
> +
> +	return ret;
> +}
> +
> +static int rte_pmd_memif_probe(struct rte_vdev_device *vdev)
> +{
> +	int ret = 0;
> +	unsigned int i;
> +	struct rte_kvargs *kvlist;
> +	const struct rte_kvargs_pair *pair;
> +
> +	const char *name = rte_vdev_device_name(vdev);
> +
> +	enum memif_role_t role;
> +	memif_interface_id_t id;
> +
> +	uint16_t buffer_size;
> +	memif_log2_ring_size_t log2_ring_size;
> +	uint8_t nrxq, ntxq;
> +	const char *socket_filename;
> +	const char *eth_addr;
> +	uint32_t flags;
> +	const char *secret;
> +
> +	MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
> +
> +	kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
> +
> +	/* set default values */
> +	role = MEMIF_ROLE_SLAVE;
> +	flags = 0;
> +	id = 0;
> +	buffer_size = 2048;
> +	log2_ring_size = 10;
> +	nrxq = 1;
> +	ntxq = 1;
> +	socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
> +	secret = NULL;
> +	eth_addr = NULL;
> +
> +	/* parse parameters */
> +	if (kvlist != NULL) {
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_ROLE_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
> +						 &memif_set_role, &role);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_ID_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
> +						 &memif_set_id, &id);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_BUFFER_SIZE_ARG) == 1) {
> +			ret =
> +			    rte_kvargs_process(kvlist,
> +					       ETH_MEMIF_BUFFER_SIZE_ARG,
> +					       &memif_set_bs, &buffer_size);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_RING_SIZE_ARG) == 1) {
> +			ret =
> +			    rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
> +					       &memif_set_rs, &log2_ring_size);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_NRXQ_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_NRXQ_ARG,
> +						 &memif_set_nq, &nrxq);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_NTXQ_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_NTXQ_ARG,
> +						 &memif_set_nq, &ntxq);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_SOCKET_ARG) == 1) {
> +			for (i = 0; i < kvlist->count; i++) {
> +				pair = &kvlist->pairs[i];
> +				if (strcmp(pair->key, ETH_MEMIF_SOCKET_ARG) ==
> +				    0) {
> +					socket_filename = pair->value;
> +					ret =
> +					    memif_check_socket_filename
> +					    (socket_filename);
> +					if (ret < 0)
> +						goto exit;
> +				}
> +			}
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_MAC_ARG) == 1) {
> +			for (i = 0; i < kvlist->count; i++) {
> +				pair = &kvlist->pairs[i];
> +				if (strcmp(pair->key, ETH_MEMIF_MAC_ARG) == 0) {
> +					eth_addr = pair->value;
> +				}
> +			}
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_ZC_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
> +						 &memif_set_zc, &flags);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_SECRET_ARG) == 1) {
> +			for (i = 0; i < kvlist->count; i++) {
> +				pair = &kvlist->pairs[i];
> +				if (strcmp(pair->key, ETH_MEMIF_SECRET_ARG) ==
> +				    0) {
> +					secret = pair->value;
> +				}

Single line blocks in this function.
> +			}
> +		}
> +	}
> +
> +	/* create interface */
> +	ret =
> +	    memif_create(vdev, role, id, flags, socket_filename, log2_ring_size,
> +			 nrxq, ntxq, buffer_size, secret, eth_addr);
> +
> + exit:
> +	if (kvlist != NULL)
> +		rte_kvargs_free(kvlist);
> +	return ret;
> +}
> +
> +static int rte_pmd_memif_remove(struct rte_vdev_device *vdev)
> +{
> +	struct rte_eth_dev *eth_dev;
> +
> +	eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
> +	if (eth_dev == NULL)
> +		return 0;
> +
> +	struct pmd_internals *pmd = eth_dev->data->dev_private;
> +
> +	memif_msg_enq_disconnect(pmd->cc, "Invalid message size", 0);
> +	memif_disconnect(eth_dev);
> +
> +	memif_socket_remove_device(pmd);
> +
> +	pmd->vdev = NULL;
> +
> +	rte_free(eth_dev->data->dev_private);
> +
> +	rte_eth_dev_release_port(eth_dev);
> +
> +	return 0;
> +}
> +
> +static struct rte_vdev_driver pmd_memif_drv = {
> +	.probe = rte_pmd_memif_probe,
> +	.remove = rte_pmd_memif_remove,
> +};
> +
> +RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
> +RTE_PMD_REGISTER_ALIAS(net_memif, eth_memif);
> +RTE_PMD_REGISTER_PARAM_STRING(net_memif,
> +			      ETH_MEMIF_ID_ARG "=<int>"
> +			      ETH_MEMIF_ROLE_ARG "=<string>"
> +			      ETH_MEMIF_BUFFER_SIZE_ARG "=<int>"
> +			      ETH_MEMIF_RING_SIZE_ARG "=<int>"
> +			      ETH_MEMIF_NRXQ_ARG "=<int>"
> +			      ETH_MEMIF_NTXQ_ARG "=<int>"
> +			      ETH_MEMIF_SOCKET_ARG "=<string>"
> +			      ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
> +			      ETH_MEMIF_ZC_ARG "=<string>"
> +			      ETH_MEMIF_SECRET_ARG "=<string>");
> +
> +RTE_INIT(memif_init_log)
> +{
> +	memif_logtype = rte_log_register("pmd.net.memif");
> +	if (memif_logtype >= 0)
> +		rte_log_set_level(memif_logtype, RTE_LOG_NOTICE);
> +}
> diff --git a/drivers/net/memif/rte_eth_memif.h b/drivers/net/memif/rte_eth_memif.h
> new file mode 100644
> index 000000000..bbd79e1a5
> --- /dev/null
> +++ b/drivers/net/memif/rte_eth_memif.h
> @@ -0,0 +1,189 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#ifndef _RTE_ETH_MEMIF_H_
> +#define _RTE_ETH_MEMIF_H_
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif				/* GNU_SOURCE */
> +
> +#include <stdio.h>
> +#include <sys/queue.h>
> +
> +#include <rte_ethdev_driver.h>
> +#include <rte_ether.h>
> +#include <rte_timer.h>
> +#include <rte_interrupts.h>
> +
> +#include <memif.h>
> +
> +/* generate mac? */
> +#define ETH_MEMIF_DEFAULT_ETH_ADDR		"01:ab:23:cd:45:ef"
> +
> +#define ETH_MEMIF_DEFAULT_SOCKET_FILENAME	"/tmp/memif.sock"
> +#define ETH_MEMIF_DEFAULT_RING_SIZE		10
> +#define ETH_MEMIF_DEFAULT_NRXQ			1
> +#define ETH_MEMIF_DEFAULT_NTXQ			1
> +#define ETH_MEMIF_DEFAULT_BUFFER_SIZE		2048
> +
> +#define ETH_MEMIF_MAX_NUM_Q_PAIRS		256
> +#define ETH_MEMIF_MAX_LOG2_RING_SIZE		14
> +#define ETH_MEMIF_MAX_REGION_IDX		255
> +
> +int memif_logtype;
> +
> +#define memif_min(a,b) (((a) < (b)) ? (a) : (b))
> +
> +#define MIF_LOG(level, fmt, args...) \
> +do {							\
> +	rte_log(RTE_LOG_ ## level, memif_logtype,	\
> +		"%s(): " fmt "\n", __func__, ##args);	\
> +} while (0)
> +
> +enum memif_role_t {
> +	MEMIF_ROLE_MASTER = 0,
> +	MEMIF_ROLE_SLAVE = 1,
> +};
> +
> +/* Shared memory region. */
> +struct memif_region {
> +	void *addr;
> +	memif_region_size_t region_size;
> +	int fd;
> +	uint32_t buffer_offset;
> +};
> +
> +struct memif_queue {
> +	struct rte_mempool *mempool;
> +	uint16_t in_port;
> +
> +	struct pmd_internals *pmd;
> +
> +	struct rte_intr_handle intr_handle;
> +
> +	/* ring info */
> +	memif_ring_type_t type;
> +	memif_ring_t *ring;
> +	memif_log2_ring_size_t log2_ring_size;
> +
> +	memif_region_index_t region;
> +	memif_region_offset_t offset;
> +
> +	uint16_t last_head;
> +	uint16_t last_tail;
> +	uint32_t *buffers;
> +
> +	/* rx/tx info */
> +	uint64_t n_pkts;
> +	uint64_t n_bytes;
> +	uint64_t n_err;
> +};
> +
> +struct pmd_internals {
> +	int if_index;
> +	memif_interface_id_t id;
> +	enum memif_role_t role;
> +	uint32_t flags;
> +#define ETH_MEMIF_FLAG_CONNECTING	(1 << 0)
> +#define ETH_MEMIF_FLAG_CONNECTED	(1 << 1)
> +#define ETH_MEMIF_FLAG_ZERO_COPY	(1 << 2)
> +/* device has not been configured and can not accept connection requests */
> +#define ETH_MEMIF_FLAG_DISABLED		(1 << 3)
> +
> +	struct ether_addr eth_addr;
> +	char *socket_filename;
> +	char secret[24];
> +
> +	struct memif_control_channel *cc;
> +
> +	struct memif_region *regions;
> +	uint8_t regions_num;
> +
> +	struct memif_queue *rx_queues;
> +	struct memif_queue *tx_queues;
> +
> +	/* remote info */
> +	char remote_name[64];
> +	char remote_if_name[64];
> +
> +	/* Configured parameters (max values) */
> +	struct {
> +		memif_log2_ring_size_t log2_ring_size;
> +		uint8_t num_s2m_rings;
> +		uint8_t num_m2s_rings;
> +		uint16_t buffer_size;
> +	} cfg;
> +
> +	/* Parameters used in active connection */
> +	struct {
> +		memif_log2_ring_size_t log2_ring_size;
> +		uint8_t num_s2m_rings;
> +		uint8_t num_m2s_rings;
> +		uint16_t buffer_size;
> +	} run;
> +
> +	char local_disc_string[96];
> +	char remote_disc_string[96];
> +
> +	/* vdev handle */
> +	struct rte_vdev_device *vdev;
> +};
> +
> +void memif_free_regions(struct pmd_internals *pmd);
> +
> +/*
> + * Finalize connection establishment process. Map shared memory file
> + * (master role), initialize ring queue, set link status up.
> + */
> +int memif_connect(struct pmd_internals *pmd);
> +
> +/*
> + * Create shared memory file and initialize ring queue.
> + * Only called by slave when establishing connection
> + */
> +int memif_init_regions_and_queues(struct pmd_internals *pmd);
> +
> +const char *memif_version(void);
> +
> +#ifndef MFD_HUGETLB
> +#ifndef __NR_memfd_create
> +
> +#if defined __x86_64__
> +#define __NR_memfd_create 319
> +#elif defined __arm__
> +#define __NR_memfd_create 385
> +#elif defined __aarch64__
> +#define __NR_memfd_create 279
> +#else
> +#error "__NR_memfd_create unknown for this architecture"
> +#endif
> +
> +#endif				/* __NR_memfd_create */
> +
> +static inline int memfd_create(const char *name, unsigned int flags)
> +{
> +	return syscall(__NR_memfd_create, name, flags);
> +}
> +#endif				/* MFD_HUGETLB */
> +
> +#ifndef F_LINUX_SPECIFIC_BASE
> +#define F_LINUX_SPECIFIC_BASE 1024
> +#endif
> +
> +#ifndef MFD_ALLOW_SEALING
> +#define MFD_ALLOW_SEALING       0x0002U
> +#endif
> +
> +#ifndef F_ADD_SEALS
> +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
> +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
> +
> +#define F_SEAL_SEAL     0x0001	/* prevent further seals from being set */
> +#define F_SEAL_SHRINK   0x0002	/* prevent file from shrinking */
> +#define F_SEAL_GROW     0x0004	/* prevent file from growing */
> +#define F_SEAL_WRITE    0x0008	/* prevent writes */
> +#endif
> +
> +#endif				/* RTE_ETH_MEMIF_H */
> diff --git a/drivers/net/memif/rte_pmd_memif_version.map b/drivers/net/memif/rte_pmd_memif_version.map
> new file mode 100644
> index 000000000..aee560afa
> --- /dev/null
> +++ b/drivers/net/memif/rte_pmd_memif_version.map
> @@ -0,0 +1,4 @@
> +DPDK_2.0 {
> +
> +        local: *;
> +};

Should be DPDK_19.02 for the release number

> diff --git a/drivers/net/meson.build b/drivers/net/meson.build
> index 980eec233..b0becbf31 100644
> --- a/drivers/net/meson.build
> +++ b/drivers/net/meson.build
> @@ -21,6 +21,7 @@ drivers = ['af_packet',
> 	'ixgbe',
> 	'kni',
> 	'liquidio',
> +	'memif',
> 	'mlx4',
> 	'mlx5',
> 	'mvneta',
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 5699d979d..f236c5ebc 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -168,6 +168,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
> endif
> _LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD)        += -lrte_pmd_lio
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF)      += -lrte_pmd_memif
> ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y)
> _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -ldl
> else
> -- 
> 2.17.1
> 

Regards,
Keith

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-10 14:48   ` Wiles, Keith
@ 2018-12-10 15:13     ` Wiles, Keith
  2018-12-17  4:26       ` Honnappa Nagarahalli
  0 siblings, 1 reply; 13+ messages in thread
From: Wiles, Keith @ 2018-12-10 15:13 UTC (permalink / raw)
  To: Jakub Grajciar; +Cc: dev



> On Dec 10, 2018, at 8:48 AM, Wiles, Keith <keith.wiles@intel.com> wrote:
> 
> 
> 
>> On Dec 10, 2018, at 4:06 AM, Jakub Grajciar <jgrajcia@cisco.com> wrote:
> 
> I do not like being the coding style police, but that is most of the comments here and I will try to test this one later this week. Plus I am sure I missed some style problems, if you have not read the coding style for DPDK please have a read.
> 
> http://doc.dpdk.org/guides/contributing/coding_style.html
> 
> One comment, why did you include all of the code to handle memif instead of including the libmemif.a from VPP. I worry if libmemif is changed then we have a breakage. I do not mind the PMD being standalone and I do like not having the dependence.
> 
> As I did not dive into the code much it does look reasonable and I hope to give it a try later this week.
>> 

A couple more items, do you plan on writing the documentation for the PMD and provide an example program?

Regards,
Keith

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-10 10:06 ` [dpdk-dev] [RFC v2] " Jakub Grajciar
  2018-12-10 10:42   ` Burakov, Anatoly
  2018-12-10 14:48   ` Wiles, Keith
@ 2018-12-10 16:20   ` Stephen Hemminger
  2018-12-11  7:39   ` Ananyev, Konstantin
  3 siblings, 0 replies; 13+ messages in thread
From: Stephen Hemminger @ 2018-12-10 16:20 UTC (permalink / raw)
  To: Jakub Grajciar; +Cc: dev

Things I saw so far, probably not everything

> +
> +_Static_assert(sizeof(memif_msg_t) == 128, "Size of memif_msg_t must be 128");
> +

DPDK uses RTE_BUILD_BUG_ON for checks like this.

> diff --git a/drivers/net/memif/Makefile b/drivers/net/memif/Makefile
> new file mode 100644
> index 000000000..a82448423
> --- /dev/null
> +++ b/drivers/net/memif/Makefile
> @@ -0,0 +1,29 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_pmd_memif.a
> +
> +EXPORT_MAP := rte_pmd_memif_version.map
> +
> +LIBABIVER := 1
> +
> +CFLAGS += -O3
> +CFLAGS += -I$(SRCDIR)
> +CFLAGS += $(WERROR_FLAGS)
> +CFLAGS += -Wno-pointer-arith
> +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
> +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
> +LDLIBS += -lrte_bus_vdev
> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += rte_eth_memif.c
> +SR

What about meson build?

...

> +
> +/*
> + * fd.io coding-style-patch-verification: ON
> + *
> + * Local Variables:
> + * eval: (c-set-style "gnu")
> + * End:
> + */


This is DPDK not FD.io, this must be removed.


> +static inline ssize_t memif_msg_send(int fd, memif_msg_t * msg, int afd)
> +{

inline is not necessary with current generation compilers.
Only needed in header file.

> +
> +#define memif_min(a,b) (((a) < (b)) ? (a) : (b))

Use RTE_MIN() instead.

Also, what about documentation??

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-10 10:06 ` [dpdk-dev] [RFC v2] " Jakub Grajciar
                     ` (2 preceding siblings ...)
  2018-12-10 16:20   ` Stephen Hemminger
@ 2018-12-11  7:39   ` Ananyev, Konstantin
  3 siblings, 0 replies; 13+ messages in thread
From: Ananyev, Konstantin @ 2018-12-11  7:39 UTC (permalink / raw)
  To: Jakub Grajciar, dev

Hi,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jakub Grajciar
> Sent: Monday, December 10, 2018 10:07 AM
> To: dev@dpdk.org
> Cc: Jakub Grajciar <jgrajcia@cisco.com>
> Subject: [dpdk-dev] [RFC v2] /net: memory interface (memif)

Could you please add some explanation -
what for this driver is intended to be used (which pseudo-device, etc.).
Konstantin

> 
> Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
> ---
>  config/common_base                          |    5 +
>  config/common_linuxapp                      |    1 +
>  drivers/net/Makefile                        |    1 +
>  drivers/net/memif/Makefile                  |   29 +
>  drivers/net/memif/memif.h                   |  156 +++
>  drivers/net/memif/memif_socket.c            | 1085 +++++++++++++++++
>  drivers/net/memif/memif_socket.h            |   57 +
>  drivers/net/memif/meson.build               |    8 +
>  drivers/net/memif/rte_eth_memif.c           | 1172 +++++++++++++++++++
>  drivers/net/memif/rte_eth_memif.h           |  189 +++
>  drivers/net/memif/rte_pmd_memif_version.map |    4 +
>  drivers/net/meson.build                     |    1 +
>  mk/rte.app.mk                               |    1 +
>  13 files changed, 2709 insertions(+)
>  create mode 100644 drivers/net/memif/Makefile
>  create mode 100644 drivers/net/memif/memif.h
>  create mode 100644 drivers/net/memif/memif_socket.c
>  create mode 100644 drivers/net/memif/memif_socket.h
>  create mode 100644 drivers/net/memif/meson.build
>  create mode 100644 drivers/net/memif/rte_eth_memif.c
>  create mode 100644 drivers/net/memif/rte_eth_memif.h
>  create mode 100644 drivers/net/memif/rte_pmd_memif_version.map
> 
> diff --git a/config/common_base b/config/common_base
> index d12ae98bc..b8ed10ae5 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -403,6 +403,11 @@ CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n
>  #
>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n
> 
> +#
> +# Compile Memory Interface PMD driver (Linux only)
> +#
> +CONFIG_RTE_LIBRTE_PMD_MEMIF=n
> +
>  #
>  # Compile link bonding PMD library
>  #
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 6c1c8d0f4..42cbde8f5 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -18,6 +18,7 @@ CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n
>  CONFIG_RTE_LIBRTE_PMD_VHOST=y
>  CONFIG_RTE_LIBRTE_IFC_PMD=y
>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
> +CONFIG_RTE_LIBRTE_PMD_MEMIF=y
>  CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y
>  CONFIG_RTE_LIBRTE_PMD_TAP=y
>  CONFIG_RTE_LIBRTE_AVP_PMD=y
> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index c0386feb9..0feab5241 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -32,6 +32,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
>  DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio
> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif
>  DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
>  DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
>  DIRS-$(CONFIG_RTE_LIBRTE_MVNETA_PMD) += mvneta
> diff --git a/drivers/net/memif/Makefile b/drivers/net/memif/Makefile
> new file mode 100644
> index 000000000..a82448423
> --- /dev/null
> +++ b/drivers/net/memif/Makefile
> @@ -0,0 +1,29 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_pmd_memif.a
> +
> +EXPORT_MAP := rte_pmd_memif_version.map
> +
> +LIBABIVER := 1
> +
> +CFLAGS += -O3
> +CFLAGS += -I$(SRCDIR)
> +CFLAGS += $(WERROR_FLAGS)
> +CFLAGS += -Wno-pointer-arith
> +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
> +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
> +LDLIBS += -lrte_bus_vdev
> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += rte_eth_memif.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif_socket.c
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/memif/memif.h b/drivers/net/memif/memif.h
> new file mode 100644
> index 000000000..6a23dbad7
> --- /dev/null
> +++ b/drivers/net/memif/memif.h
> @@ -0,0 +1,156 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#ifndef _MEMIF_H_
> +#define _MEMIF_H_
> +
> +#ifndef MEMIF_CACHELINE_SIZE
> +#define MEMIF_CACHELINE_SIZE 64
> +#endif
> +
> +#define MEMIF_COOKIE		0x3E31F20
> +#define MEMIF_VERSION_MAJOR	2
> +#define MEMIF_VERSION_MINOR	0
> +#define MEMIF_VERSION		((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR)
> +
> +/*
> + *  Type definitions
> + */
> +
> +typedef enum memif_msg_type {
> +	MEMIF_MSG_TYPE_NONE = 0,
> +	MEMIF_MSG_TYPE_ACK = 1,
> +	MEMIF_MSG_TYPE_HELLO = 2,
> +	MEMIF_MSG_TYPE_INIT = 3,
> +	MEMIF_MSG_TYPE_ADD_REGION = 4,
> +	MEMIF_MSG_TYPE_ADD_RING = 5,
> +	MEMIF_MSG_TYPE_CONNECT = 6,
> +	MEMIF_MSG_TYPE_CONNECTED = 7,
> +	MEMIF_MSG_TYPE_DISCONNECT = 8,
> +} memif_msg_type_t;
> +
> +typedef enum {
> +	MEMIF_RING_S2M = 0,
> +	MEMIF_RING_M2S = 1
> +} memif_ring_type_t;
> +
> +typedef enum {
> +	MEMIF_INTERFACE_MODE_ETHERNET = 0,
> +	MEMIF_INTERFACE_MODE_IP = 1,
> +	MEMIF_INTERFACE_MODE_PUNT_INJECT = 2,
> +} memif_interface_mode_t;
> +
> +typedef uint16_t memif_region_index_t;
> +typedef uint32_t memif_region_offset_t;
> +typedef uint64_t memif_region_size_t;
> +typedef uint16_t memif_ring_index_t;
> +typedef uint32_t memif_interface_id_t;
> +typedef uint16_t memif_version_t;
> +typedef uint8_t memif_log2_ring_size_t;
> +
> +/*
> + *  Socket messages
> + */
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint8_t name[32];
> +	memif_version_t min_version;
> +	memif_version_t max_version;
> +	memif_region_index_t max_region;
> +	memif_ring_index_t max_m2s_ring;
> +	memif_ring_index_t max_s2m_ring;
> +	memif_log2_ring_size_t max_log2_ring_size;
> +} memif_msg_hello_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	memif_version_t version;
> +	memif_interface_id_t id;
> +	memif_interface_mode_t mode:8;
> +	uint8_t secret[24];
> +	uint8_t name[32];
> +} memif_msg_init_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	memif_region_index_t index;
> +	memif_region_size_t size;
> +} memif_msg_add_region_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint16_t flags;
> +#define MEMIF_MSG_ADD_RING_FLAG_S2M	(1 << 0)
> +	memif_ring_index_t index;
> +	memif_region_index_t region;
> +	memif_region_offset_t offset;
> +	memif_log2_ring_size_t log2_ring_size;
> +	uint16_t private_hdr_size;	/* used for private metadata */
> +} memif_msg_add_ring_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint8_t if_name[32];
> +} memif_msg_connect_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint8_t if_name[32];
> +} memif_msg_connected_t;
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint32_t code;
> +	uint8_t string[96];
> +} memif_msg_disconnect_t;
> +
> +typedef struct __attribute__ ((packed, aligned(128))) {
> +	memif_msg_type_t type:16;
> +	union {
> +		memif_msg_hello_t hello;
> +		memif_msg_init_t init;
> +		memif_msg_add_region_t add_region;
> +		memif_msg_add_ring_t add_ring;
> +		memif_msg_connect_t connect;
> +		memif_msg_connected_t connected;
> +		memif_msg_disconnect_t disconnect;
> +	};
> +} memif_msg_t;
> +
> +_Static_assert(sizeof(memif_msg_t) == 128, "Size of memif_msg_t must be 128");
> +
> +/*
> + *  Ring and Descriptor Layout
> + */
> +
> +typedef struct __attribute__ ((packed)) {
> +	uint16_t flags;
> +#define MEMIF_DESC_FLAG_NEXT (1 << 0)
> +	memif_region_index_t region;
> +	uint32_t length;
> +	memif_region_offset_t offset;
> +	uint32_t metadata;
> +} memif_desc_t;
> +
> +_Static_assert(sizeof(memif_desc_t) == 16,
> +	       "Size of memif_dsct_t must be 16 bytes");
> +
> +#define MEMIF_CACHELINE_ALIGN_MARK(mark) \
> +  uint8_t mark[0] __attribute__((aligned(MEMIF_CACHELINE_SIZE)))
> +
> +typedef struct {
> +	MEMIF_CACHELINE_ALIGN_MARK(cacheline0);
> +	uint32_t cookie;
> +	uint16_t flags;
> +#define MEMIF_RING_FLAG_MASK_INT 1
> +	volatile uint16_t head;
> +	 MEMIF_CACHELINE_ALIGN_MARK(cacheline1);
> +	volatile uint16_t tail;
> +	 MEMIF_CACHELINE_ALIGN_MARK(cacheline2);
> +	memif_desc_t desc[0];
> +} memif_ring_t;
> +
> +#endif				/* _MEMIF_H_ */
> +
> +/*
> + * fd.io coding-style-patch-verification: ON
> + *
> + * Local Variables:
> + * eval: (c-set-style "gnu")
> + * End:
> + */
> diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
> new file mode 100644
> index 000000000..afd4ac888
> --- /dev/null
> +++ b/drivers/net/memif/memif_socket.c
> @@ -0,0 +1,1085 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#include <stdlib.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/ioctl.h>
> +#include <errno.h>
> +
> +#include <rte_version.h>
> +#include <rte_mbuf.h>
> +#include <rte_ether.h>
> +#include <rte_ethdev_driver.h>
> +#include <rte_ethdev_vdev.h>
> +#include <rte_malloc.h>
> +#include <rte_kvargs.h>
> +#include <rte_bus_vdev.h>
> +#include <rte_hash.h>
> +#include <rte_jhash.h>
> +
> +#include <rte_eth_memif.h>
> +#include <memif_socket.h>
> +
> +static void memif_intr_handler(void *arg);
> +
> +static inline ssize_t memif_msg_send(int fd, memif_msg_t * msg, int afd)
> +{
> +	struct msghdr mh = { 0 };
> +	struct iovec iov[1];
> +	char ctl[CMSG_SPACE(sizeof(int))];
> +
> +	iov[0].iov_base = (void *)msg;
> +	iov[0].iov_len = sizeof(memif_msg_t);
> +	mh.msg_iov = iov;
> +	mh.msg_iovlen = 1;
> +
> +	if (afd > 0) {
> +		struct cmsghdr *cmsg;
> +		memset(&ctl, 0, sizeof(ctl));
> +		mh.msg_control = ctl;
> +		mh.msg_controllen = sizeof(ctl);
> +		cmsg = CMSG_FIRSTHDR(&mh);
> +		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
> +		cmsg->cmsg_level = SOL_SOCKET;
> +		cmsg->cmsg_type = SCM_RIGHTS;
> +		rte_memcpy(CMSG_DATA(cmsg), &afd, sizeof(int));
> +	}
> +
> +	return sendmsg(fd, &mh, 0);
> +}
> +
> +static inline int memif_msg_send_from_queue(struct memif_control_channel *cc)
> +{
> +	ssize_t size;
> +	int ret = 0;
> +	struct memif_msg_queue_elt *e;
> +	e = TAILQ_FIRST(&cc->msg_queue);
> +	if (e == NULL)
> +		return 0;
> +
> +	size = memif_msg_send(cc->intr_handle.fd, &e->msg, e->fd);
> +	if (size != sizeof(memif_msg_t)) {
> +		MIF_LOG(ERR, "sendmsg fail: %s.", strerror(errno));
> +		ret = -1;
> +	} else {
> +		MIF_LOG(DEBUG, "%s: Sent msg type %u.",
> +			(cc->pmd !=
> +			 NULL) ? rte_vdev_device_name(cc->pmd->
> +						      vdev) : "memif_driver",
> +			e->msg.type);
> +	}
> +	TAILQ_REMOVE(&cc->msg_queue, e, next);
> +	rte_free(e);
> +
> +	return ret;
> +}
> +
> +static inline struct memif_msg_queue_elt *memif_msg_enq(struct
> +							memif_control_channel
> +							*cc)
> +{
> +	struct memif_msg_queue_elt *e = rte_zmalloc("memif_msg",
> +						    sizeof(struct
> +							   memif_msg_queue_elt),
> +						    0);
> +	if (e == NULL) {
> +		MIF_LOG(ERR, "Failed to allocate control message.");
> +		return NULL;
> +	}
> +
> +	e->fd = -1;
> +	TAILQ_INSERT_TAIL(&cc->msg_queue, e, next);
> +
> +	return e;
> +}
> +
> +void
> +memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason,
> +			 int err_code)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(cc);
> +	if (e == NULL) {
> +		MIF_LOG(WARNING, "%s: Failed to enqueue disconnect message.",
> +			(cc->pmd !=
> +			 NULL) ? rte_vdev_device_name(cc->pmd->
> +						      vdev) : "memif_driver");
> +		return;
> +	}
> +
> +	memif_msg_disconnect_t *d = &e->msg.disconnect;
> +
> +	e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
> +	d->code = err_code;
> +
> +	if (reason != NULL) {
> +		strncpy((char *)d->string, reason, strlen(reason));
> +		if (cc->pmd != NULL) {
> +			strncpy(cc->pmd->local_disc_string, reason,
> +				strlen(reason));
> +		}
> +	}
> +}
> +
> +static int memif_msg_enq_hello(struct memif_control_channel *cc)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(cc);
> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_hello_t *h = &e->msg.hello;
> +
> +	e->msg.type = MEMIF_MSG_TYPE_HELLO;
> +	h->min_version = MEMIF_VERSION;
> +	h->max_version = MEMIF_VERSION;
> +	h->max_s2m_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
> +	h->max_m2s_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
> +	h->max_region = ETH_MEMIF_MAX_REGION_IDX;
> +	h->max_log2_ring_size = ETH_MEMIF_MAX_LOG2_RING_SIZE;
> +
> +	strncpy((char *)h->name, rte_version(), strlen(rte_version()));
> +
> +	return 0;
> +}
> +
> +static int memif_msg_receive_hello(struct pmd_internals *pmd, memif_msg_t * msg)
> +{
> +	memif_msg_hello_t *h = &msg->hello;
> +
> +	if (h->min_version > MEMIF_VERSION || h->max_version < MEMIF_VERSION) {
> +		memif_msg_enq_disconnect(pmd->cc, "Incompatible memif version",
> +					 0);
> +		return -1;
> +	}
> +
> +	/* Set parameters for active connection */
> +	pmd->run.num_s2m_rings = memif_min(h->max_s2m_ring + 1,
> +					   pmd->cfg.num_s2m_rings);
> +	pmd->run.num_m2s_rings = memif_min(h->max_m2s_ring + 1,
> +					   pmd->cfg.num_m2s_rings);
> +	pmd->run.log2_ring_size = memif_min(h->max_log2_ring_size,
> +					    pmd->cfg.log2_ring_size);
> +	pmd->run.buffer_size = pmd->cfg.buffer_size;
> +
> +	strncpy(pmd->remote_name, (char *)h->name, strlen((char *)h->name));
> +
> +	MIF_LOG(DEBUG, "%s: Connecting to %s.",
> +		rte_vdev_device_name(pmd->vdev), pmd->remote_name);
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t * msg)
> +{
> +	memif_msg_init_t *i = &msg->init;
> +	struct memif_socket_pmd_list_elt *elt;
> +	struct pmd_internals *pmd;
> +
> +	if (i->version != MEMIF_VERSION) {
> +		memif_msg_enq_disconnect(cc, "Incompatible memif version", 0);
> +		return -1;
> +	}
> +
> +	if (cc->socket == NULL) {
> +		memif_msg_enq_disconnect(cc, "Device error", 0);
> +		return -1;
> +	}
> +
> +	/* Find device with requested ID */
> +	TAILQ_FOREACH(elt, &cc->socket->pmd_queue, next) {
> +		pmd = elt->pmd;
> +		if (((pmd->flags & ETH_MEMIF_FLAG_DISABLED) == 0)
> +		    && (pmd->id == i->id)) {
> +			/* assign control channel to device */
> +			cc->pmd = pmd;
> +			pmd->cc = cc;
> +
> +			if (i->mode != MEMIF_INTERFACE_MODE_ETHERNET) {
> +				memif_msg_enq_disconnect(pmd->cc,
> +							 "Only ethernet mode supported",
> +							 0);
> +				return -1;
> +			}
> +
> +			if (pmd->flags && (ETH_MEMIF_FLAG_CONNECTING |
> +					   ETH_MEMIF_FLAG_CONNECTED)) {
> +				memif_msg_enq_disconnect(pmd->cc,
> +							 "Already connected",
> +							 0);
> +				return -1;
> +			}
> +			strncpy(pmd->remote_name, (char *)i->name,
> +				strlen((char *)i->name));
> +
> +			if (*pmd->secret != '\0') {
> +				if (*i->secret == '\0') {
> +					memif_msg_enq_disconnect(pmd->cc,
> +								 "Secret required",
> +								 0);
> +					return -1;
> +				}
> +				if (strcmp(pmd->secret, (char *)i->secret) != 0) {
> +					memif_msg_enq_disconnect(pmd->cc,
> +								 "Incorrect secret",
> +								 0);
> +					return -1;
> +				}
> +			}
> +
> +			pmd->flags |= ETH_MEMIF_FLAG_CONNECTING;
> +			return 0;
> +		}
> +	}
> +
> +	/* ID not found on this socket */
> +	MIF_LOG(DEBUG, "ID %u not found.", i->id);
> +	memif_msg_enq_disconnect(cc, "ID not found", 0);
> +	return -1;
> +}
> +
> +static int
> +memif_msg_receive_add_region(struct pmd_internals *pmd, memif_msg_t * msg,
> +			     int fd)
> +{
> +	memif_msg_add_region_t *ar = &msg->add_region;
> +
> +	if (fd < 0) {
> +		memif_msg_enq_disconnect(pmd->cc, "Missing region fd", 0);
> +		return -1;
> +	}
> +
> +	struct memif_region *mr;
> +
> +	if (ar->index > ETH_MEMIF_MAX_REGION_IDX) {
> +		memif_msg_enq_disconnect(pmd->cc, "Invalid region index", 0);
> +		return -1;
> +	}
> +
> +	mr = rte_realloc(pmd->regions, sizeof(struct memif_region) *
> +			 (ar->index + 1), 0);
> +	if (mr == NULL) {
> +		memif_msg_enq_disconnect(pmd->cc, "Device error", 0);
> +		return -1;
> +	}
> +
> +	pmd->regions = mr;
> +	pmd->regions[ar->index].fd = fd;
> +	pmd->regions[ar->index].region_size = ar->size;
> +	pmd->regions[ar->index].addr = NULL;
> +	pmd->regions_num++;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_add_ring(struct pmd_internals *pmd, memif_msg_t * msg, int fd)
> +{
> +	memif_msg_add_ring_t *ar = &msg->add_ring;
> +
> +	if (fd < 0) {
> +		memif_msg_enq_disconnect(pmd->cc, "Missing interrupt fd", 0);
> +		return -1;
> +	}
> +
> +	struct memif_queue *mq;
> +
> +	/* check if we have enough queues */
> +	if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) {
> +		if (ar->index >= pmd->cfg.num_s2m_rings) {
> +			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index",
> +						 0);
> +			return -1;
> +		}
> +		pmd->run.num_s2m_rings++;
> +	} else {
> +		if (ar->index >= pmd->cfg.num_m2s_rings) {
> +			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index",
> +						 0);
> +			return -1;
> +		}
> +		pmd->run.num_m2s_rings++;
> +	}
> +
> +	mq = (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) ?
> +	    &pmd->rx_queues[ar->index] : &pmd->tx_queues[ar->index];
> +
> +	mq->intr_handle.fd = fd;
> +	mq->log2_ring_size = ar->log2_ring_size;
> +	mq->region = ar->region;
> +	mq->offset = ar->offset;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_connect(struct pmd_internals *pmd, memif_msg_t * msg)
> +{
> +	memif_msg_connect_t *c = &msg->connect;
> +	int ret;
> +
> +	ret = memif_connect(pmd);
> +	if (ret < 0)
> +		return ret;
> +
> +	strncpy(pmd->remote_if_name, (char *)c->if_name,
> +		strlen((char *)c->if_name));
> +	MIF_LOG(INFO, "%s: Remote interface %s connected.",
> +		rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_connected(struct pmd_internals *pmd, memif_msg_t * msg)
> +{
> +	memif_msg_connected_t *c = &msg->connected;
> +	int ret;
> +
> +	ret = memif_connect(pmd);
> +	if (ret < 0)
> +		return ret;
> +
> +	strncpy(pmd->remote_if_name, (char *)c->if_name,
> +		strlen((char *)c->if_name));
> +	MIF_LOG(INFO, "%s: Remote interface %s connected.",
> +		rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_receive_disconnect(struct pmd_internals *pmd, memif_msg_t * msg)
> +{
> +	memif_msg_disconnect_t *d = &msg->disconnect;
> +
> +	memset(pmd->remote_disc_string, 0, sizeof(pmd->remote_disc_string));
> +	strncpy(pmd->remote_disc_string, (char *)d->string,
> +		strlen((char *)d->string));
> +
> +	MIF_LOG(INFO, "%s: Disconnect received: %s",
> +		rte_vdev_device_name(pmd->vdev), pmd->remote_disc_string);
> +
> +	memset(pmd->local_disc_string, 0, 96);
> +	memif_disconnect(rte_eth_dev_allocated
> +			 (rte_vdev_device_name(pmd->vdev)));
> +	return 0;
> +}
> +
> +static int memif_msg_enq_ack(struct pmd_internals *pmd)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
> +	if (e == NULL)
> +		return -1;
> +
> +	e->msg.type = MEMIF_MSG_TYPE_ACK;
> +
> +	return 0;
> +}
> +
> +static int memif_msg_enq_init(struct pmd_internals *pmd)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_init_t *i = &e->msg.init;
> +
> +	e->msg.type = MEMIF_MSG_TYPE_INIT;
> +	i->version = MEMIF_VERSION;
> +	i->id = pmd->id;
> +	i->mode = MEMIF_INTERFACE_MODE_ETHERNET;
> +
> +	strncpy((char *)i->name, rte_version(), strlen(rte_version()));
> +
> +	if (pmd->secret) {
> +		strncpy((char *)i->secret, pmd->secret, sizeof(i->secret) - 1);
> +	}
> +
> +	return 0;
> +}
> +
> +static int memif_msg_enq_add_region(struct pmd_internals *pmd, uint8_t idx)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_add_region_t *ar = &e->msg.add_region;
> +	struct memif_region *mr = &pmd->regions[idx];
> +
> +	e->msg.type = MEMIF_MSG_TYPE_ADD_REGION;
> +	e->fd = mr->fd;
> +	ar->index = idx;
> +	ar->size = mr->region_size;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_msg_enq_add_ring(struct pmd_internals *pmd, uint8_t idx,
> +		       memif_ring_type_t type)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_add_ring_t *ar = &e->msg.add_ring;
> +	struct memif_queue *mq;
> +
> +	mq = (type == MEMIF_RING_S2M) ? &pmd->tx_queues[idx] :
> +	    &pmd->rx_queues[idx];
> +
> +	e->msg.type = MEMIF_MSG_TYPE_ADD_RING;
> +	e->fd = mq->intr_handle.fd;
> +	ar->index = idx;
> +	ar->offset = mq->offset;
> +	ar->region = mq->region;
> +	ar->log2_ring_size = mq->log2_ring_size;
> +	ar->flags = (type == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0;
> +	ar->private_hdr_size = 0;
> +
> +	return 0;
> +}
> +
> +static int memif_msg_enq_connect(struct pmd_internals *pmd)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_connect_t *c = &e->msg.connect;
> +	const char *name = rte_vdev_device_name(pmd->vdev);
> +
> +	e->msg.type = MEMIF_MSG_TYPE_CONNECT;
> +	strncpy((char *)c->if_name, name, strlen(name));
> +
> +	return 0;
> +}
> +
> +static int memif_msg_enq_connected(struct pmd_internals *pmd)
> +{
> +	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
> +	if (e == NULL)
> +		return -1;
> +
> +	memif_msg_connected_t *c = &e->msg.connected;
> +
> +	const char *name = rte_vdev_device_name(pmd->vdev);
> +
> +	e->msg.type = MEMIF_MSG_TYPE_CONNECTED;
> +	strncpy((char *)c->if_name, name, strlen(name));
> +
> +	return 0;
> +}
> +
> +static void
> +memif_intr_unregister_handler(struct rte_intr_handle *intr_handle, void *arg)
> +{
> +	struct memif_msg_queue_elt *elt;
> +	struct memif_control_channel *cc = arg;
> +	/* close control channel fd */
> +	close(intr_handle->fd);
> +	/* clear message queue */
> +	while ((elt = TAILQ_FIRST(&cc->msg_queue)) != NULL) {
> +		TAILQ_REMOVE(&cc->msg_queue, elt, next);
> +		free(elt);
> +	}
> +	/* free control channel */
> +	rte_free(cc);
> +}
> +
> +void memif_disconnect(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_msg_queue_elt *elt;
> +	int i;
> +	int ret;
> +
> +	if (pmd->cc != NULL) {
> +		/* Clear control message queue (except disconnect message if any). */
> +		while ((elt = TAILQ_FIRST(&pmd->cc->msg_queue)) != NULL) {
> +			if (elt->msg.type != MEMIF_MSG_TYPE_DISCONNECT) {
> +				TAILQ_REMOVE(&pmd->cc->msg_queue, elt, next);
> +				free(elt);
> +			}
> +		}
> +		/* send disconnect message (if there is any in queue) */
> +		memif_msg_send_from_queue(pmd->cc);
> +
> +		/* at this point, there should be no more messages in queue */
> +		if (TAILQ_FIRST(&pmd->cc->msg_queue) != NULL) {
> +			MIF_LOG(WARNING,
> +				"%s: Unexpected message(s) in message queue.",
> +				rte_vdev_device_name(pmd->vdev));
> +		}
> +
> +		if (pmd->cc->intr_handle.fd > 0) {
> +			ret =
> +			    rte_intr_callback_unregister(&pmd->cc->intr_handle,
> +							 memif_intr_handler,
> +							 pmd->cc);
> +			/*
> +			 * If callback is active (disconnecting based on
> +			 * received control message).
> +			 */
> +			if (ret == -EAGAIN) {
> +				ret =
> +				    rte_intr_callback_unregister_pending(&pmd->
> +									 cc->
> +									 intr_handle,
> +									 memif_intr_handler,
> +									 pmd->
> +									 cc,
> +									 memif_intr_unregister_handler);
> +			} else if (ret > 0) {
> +				close(pmd->cc->intr_handle.fd);
> +				rte_free(pmd->cc);
> +			}
> +			if (ret <= 0)
> +				MIF_LOG(WARNING,
> +					"%s: Failed to unregister control channel callback.",
> +					rte_vdev_device_name(pmd->vdev));
> +		}
> +	}
> +
> +	/* unconfig interrupts */
> +	struct memif_queue *mq;
> +	for (i = 0; i < pmd->cfg.num_s2m_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +		    &pmd->tx_queues[i] : &pmd->rx_queues[i];
> +		if (mq->intr_handle.fd > 0) {
> +			rte_intr_disable(&mq->intr_handle);
> +			close(mq->intr_handle.fd);
> +			mq->intr_handle.fd = -1;
> +		}
> +		mq->ring = NULL;
> +	}
> +	for (i = 0; i < pmd->cfg.num_m2s_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +		    &pmd->rx_queues[i] : &pmd->tx_queues[i];
> +		if (mq->intr_handle.fd > 0) {
> +			rte_intr_disable(&mq->intr_handle);
> +			close(mq->intr_handle.fd);
> +			mq->intr_handle.fd = -1;
> +		}
> +		mq->ring = NULL;
> +	}
> +
> +	memif_free_regions(pmd);
> +
> +	dev->data->dev_link.link_status = ETH_LINK_DOWN;
> +	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
> +	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTED;
> +	MIF_LOG(DEBUG, "%s: Disconnected.", rte_vdev_device_name(pmd->vdev));
> +}
> +
> +static int memif_msg_receive(struct memif_control_channel *cc)
> +{
> +	char ctl[CMSG_SPACE(sizeof(int)) +
> +		 CMSG_SPACE(sizeof(struct ucred))] = { 0 };
> +	struct msghdr mh = { 0 };
> +	struct iovec iov[1];
> +	memif_msg_t msg = { 0 };
> +	ssize_t size;
> +	int ret = 0;
> +	struct ucred *cr __rte_unused;
> +	cr = 0;
> +	struct cmsghdr *cmsg;
> +	int afd = -1;
> +	int i;
> +
> +	iov[0].iov_base = (void *)&msg;
> +	iov[0].iov_len = sizeof(memif_msg_t);
> +	mh.msg_iov = iov;
> +	mh.msg_iovlen = 1;
> +	mh.msg_control = ctl;
> +	mh.msg_controllen = sizeof(ctl);
> +
> +	size = recvmsg(cc->intr_handle.fd, &mh, 0);
> +	if (size != sizeof(memif_msg_t)) {
> +		MIF_LOG(DEBUG, "Invalid message size.");
> +		memif_msg_enq_disconnect(cc, "Invalid message size", 0);
> +		return -1;
> +	}
> +	MIF_LOG(DEBUG, "Received msg type: %u.", msg.type);
> +
> +	cmsg = CMSG_FIRSTHDR(&mh);
> +	while (cmsg) {
> +		if (cmsg->cmsg_level == SOL_SOCKET) {
> +			if (cmsg->cmsg_type == SCM_CREDENTIALS) {
> +				cr = (struct ucred *)CMSG_DATA(cmsg);
> +			} else if (cmsg->cmsg_type == SCM_RIGHTS) {
> +				afd = *(int *)CMSG_DATA(cmsg);
> +			}
> +		}
> +		cmsg = CMSG_NXTHDR(&mh, cmsg);
> +	}
> +
> +	if ((cc->pmd == NULL) && msg.type != MEMIF_MSG_TYPE_INIT) {
> +		MIF_LOG(DEBUG, "Unexpected message.");
> +		memif_msg_enq_disconnect(cc, "Unexpected message", 0);
> +		return -1;
> +	}
> +
> +	/* get device from hash data */
> +	switch (msg.type) {
> +	case MEMIF_MSG_TYPE_ACK:
> +		break;
> +	case MEMIF_MSG_TYPE_HELLO:
> +		ret = memif_msg_receive_hello(cc->pmd, &msg);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_init_regions_and_queues(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_init(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		for (i = 0; i < cc->pmd->regions_num; i++) {
> +			ret = memif_msg_enq_add_region(cc->pmd, i);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		for (i = 0; i < cc->pmd->run.num_s2m_rings; i++) {
> +			ret = memif_msg_enq_add_ring(cc->pmd, i,
> +						     MEMIF_RING_S2M);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		for (i = 0; i < cc->pmd->run.num_m2s_rings; i++) {
> +			ret = memif_msg_enq_add_ring(cc->pmd, i,
> +						     MEMIF_RING_M2S);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		ret = memif_msg_enq_connect(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_INIT:
> +		ret = memif_msg_receive_init(cc, &msg);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_ack(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_ADD_REGION:
> +		ret = memif_msg_receive_add_region(cc->pmd, &msg, afd);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_ack(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_ADD_RING:
> +		ret = memif_msg_receive_add_ring(cc->pmd, &msg, afd);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_ack(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_CONNECT:
> +		ret = memif_msg_receive_connect(cc->pmd, &msg);
> +		if (ret < 0)
> +			goto exit;
> +		ret = memif_msg_enq_connected(cc->pmd);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	case MEMIF_MSG_TYPE_CONNECTED:
> +		ret = memif_msg_receive_connected(cc->pmd, &msg);
> +		break;
> +	case MEMIF_MSG_TYPE_DISCONNECT:
> +		ret = memif_msg_receive_disconnect(cc->pmd, &msg);
> +		if (ret < 0)
> +			goto exit;
> +		break;
> +	default:
> +		memif_msg_enq_disconnect(cc, "Unknown message type", 0);
> +		ret = -1;
> +		goto exit;
> +	}
> +
> + exit:
> +	return ret;
> +}
> +
> +static void memif_intr_handler(void *arg)
> +{
> +	struct memif_control_channel *cc = arg;
> +	struct rte_eth_dev *dev;
> +	int ret;
> +
> +	ret = memif_msg_receive(cc);
> +	/* if driver failed to assign device */
> +	if (cc->pmd == NULL) {
> +		ret = rte_intr_callback_unregister_pending(&cc->intr_handle,
> +							   memif_intr_handler,
> +							   cc,
> +							   memif_intr_unregister_handler);
> +		if (ret < 0)
> +			MIF_LOG(WARNING,
> +				"Failed to unregister control channel callback.");
> +		return;
> +	}
> +	/* if memif_msg_receive failed */
> +	if (ret < 0)
> +		goto disconnect;
> +
> +	ret = memif_msg_send_from_queue(cc);
> +	if (ret < 0)
> +		goto disconnect;
> +
> +	return;
> +
> + disconnect:
> +	dev = rte_eth_dev_allocated(rte_vdev_device_name(cc->pmd->vdev));
> +	if (dev == NULL) {
> +		MIF_LOG(WARNING, "%s: eth dev not allocated",
> +			rte_vdev_device_name(cc->pmd->vdev));
> +		return;
> +	}
> +	memif_disconnect(dev);
> +}
> +
> +static void memif_listener_handler(void *arg)
> +{
> +	struct memif_socket *socket = arg;
> +	int sockfd;
> +	int addr_len;
> +	struct sockaddr_un client;
> +	struct memif_control_channel *cc;
> +	int ret;
> +
> +	addr_len = sizeof(client);
> +	sockfd = accept(socket->intr_handle.fd, (struct sockaddr *)&client,
> +			(socklen_t *) & addr_len);
> +	if (sockfd < 0) {
> +		MIF_LOG(ERR,
> +			"Failed to accept connection request on socket fd %d",
> +			socket->intr_handle.fd);
> +		return;
> +	}
> +
> +	MIF_LOG(DEBUG, "%s: Connection request accepted.", socket->filename);
> +
> +	cc = rte_zmalloc("memif-cc", sizeof(struct memif_control_channel), 0);
> +	if (cc == NULL) {
> +		MIF_LOG(ERR, "Failed to allocate control channel.");
> +		goto error;
> +	}
> +
> +	cc->intr_handle.fd = sockfd;
> +	cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +	cc->socket = socket;
> +	cc->pmd = NULL;
> +	TAILQ_INIT(&cc->msg_queue);
> +
> +	ret =
> +	    rte_intr_callback_register(&cc->intr_handle, memif_intr_handler,
> +				       cc);
> +	if (ret < 0) {
> +		MIF_LOG(ERR, "Failed to register control channel callback.");
> +		goto error;
> +	}
> +
> +	ret = memif_msg_enq_hello(cc);
> +	if (ret < 0) {
> +		MIF_LOG(ERR, "Failed to enqueue hello message.");
> +		goto error;
> +	}
> +	ret = memif_msg_send_from_queue(cc);
> +	if (ret < 0)
> +		goto error;
> +
> +	return;
> +
> + error:
> +	if (sockfd > 0) {
> +		close(sockfd);
> +		sockfd = -1;
> +	}
> +	if (cc != NULL) {
> +		rte_free(cc);
> +		cc = NULL;
> +	}
> +}
> +
> +static inline struct memif_socket *memif_socket_create(struct pmd_internals
> +						       *pmd, char *key,
> +						       uint8_t listener)
> +{
> +	struct memif_socket *sock;
> +	struct sockaddr_un un;
> +	int sockfd;
> +	int ret;
> +	int on = 1;
> +
> +	sock = rte_zmalloc("memif-socket", sizeof(struct memif_socket), 0);
> +	if (sock == NULL) {
> +		MIF_LOG(ERR, "Failed to allocate memory for memif socket");
> +		return NULL;
> +	}
> +
> +	sock->listener = listener;
> +	rte_memcpy(sock->filename, key, 256);
> +	TAILQ_INIT(&sock->pmd_queue);
> +
> +	if (listener != 0) {
> +		sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
> +		if (sockfd < 0)
> +			goto error;;
> +
> +		un.sun_family = AF_UNIX;
> +		strncpy((char *)un.sun_path, (char *)sock->filename,
> +			sizeof(un.sun_path) - 1);
> +
> +		ret = setsockopt(sockfd, SOL_SOCKET, SO_PASSCRED, &on,
> +				 sizeof(on));
> +		if (ret < 0) {
> +			goto error;
> +		}
> +		ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> +		if (ret < 0) {
> +			goto error;
> +		}
> +		ret = listen(sockfd, 1);
> +		if (ret < 0) {
> +			goto error;
> +		}
> +
> +		MIF_LOG(DEBUG, "%s: Memif listener socket %s created.",
> +			rte_vdev_device_name(pmd->vdev), sock->filename);
> +
> +		sock->intr_handle.fd = sockfd;
> +		sock->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +		ret = rte_intr_callback_register(&sock->intr_handle,
> +						 memif_listener_handler, sock);
> +		if (ret < 0) {
> +			MIF_LOG(ERR, "%s: Failed to register interrupt "
> +				"callback for listener socket",
> +				rte_vdev_device_name(pmd->vdev));
> +			return NULL;
> +		}
> +	}
> +
> +	return sock;
> +
> + error:
> +	MIF_LOG(ERR, "%s: Failed to setup socket %s: %s",
> +		rte_vdev_device_name(pmd->vdev), key, strerror(errno));
> +	if (sock != NULL)
> +		rte_free(sock);
> +	return NULL;
> +}
> +
> +static inline struct rte_hash *memif_create_socket_hash(void)
> +{
> +	struct rte_hash_parameters params = { 0 };
> +	params.name = MEMIF_SOCKET_HASH_NAME;
> +	params.entries = 256;
> +	params.key_len = 256;
> +	params.hash_func = rte_jhash;
> +	params.hash_func_init_val = 0;
> +	return rte_hash_create(&params);
> +}
> +
> +int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_socket *socket = NULL;
> +	struct memif_socket_pmd_list_elt *elt;
> +	int ret;
> +	char key[256];
> +
> +	struct rte_hash *hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
> +	if (hash == NULL) {
> +		hash = memif_create_socket_hash();
> +		if (hash == NULL) {
> +			MIF_LOG(ERR, "Failed to create memif socket hash.");
> +			return -1;
> +		}
> +	}
> +
> +	memset(key, 0, 256);
> +	rte_memcpy(key, socket_filename, strlen(socket_filename));
> +	ret = rte_hash_lookup_data(hash, key, (void **)&socket);
> +	if (ret < 0) {
> +		socket = memif_socket_create(pmd, key,
> +					     (pmd->role ==
> +					      MEMIF_ROLE_SLAVE) ? 0 : 1);
> +		if (socket == NULL) {
> +			return -1;
> +		}
> +		ret = rte_hash_add_key_data(hash, key, socket);
> +		if (ret < 0) {
> +			MIF_LOG(ERR, "Failed to add socket to socket hash.");
> +			return ret;
> +		}
> +	}
> +	pmd->socket_filename = socket->filename;
> +
> +	if ((socket->listener != 0) && (pmd->role == MEMIF_ROLE_SLAVE)) {
> +		MIF_LOG(ERR, "Socket is a listener.");
> +		return -1;
> +	} else if ((socket->listener == 0) && (pmd->role == MEMIF_ROLE_MASTER)) {
> +		MIF_LOG(ERR, "Socket is not a listener.");
> +		return -1;
> +	}
> +
> +	TAILQ_FOREACH(elt, &socket->pmd_queue, next) {
> +		if (elt->pmd->id == pmd->id) {
> +			MIF_LOG(ERR, "Memif device with id %d already "
> +				"exists on socket %s",
> +				pmd->id, socket->filename);
> +			return -1;
> +		}
> +	}
> +
> +	elt =
> +	    rte_malloc("pmd-queue", sizeof(struct memif_socket_pmd_list_elt),
> +		       0);
> +	if (elt == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to add device to socket device list.",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -1;
> +	}
> +	elt->pmd = pmd;
> +	TAILQ_INSERT_TAIL(&socket->pmd_queue, elt, next);
> +
> +	return 0;
> +}
> +
> +void memif_socket_remove_device(struct pmd_internals *pmd)
> +{
> +	struct memif_socket *socket = NULL;
> +	struct memif_socket_pmd_list_elt *elt, *next;
> +
> +	struct rte_hash *hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
> +	if (hash == NULL) {
> +		return;
> +	}
> +
> +	if (rte_hash_lookup_data(hash, pmd->socket_filename, (void **)&socket) <
> +	    0)
> +		return;
> +
> +	for (elt = TAILQ_FIRST(&socket->pmd_queue); elt != NULL; elt = next) {
> +		next = TAILQ_NEXT(elt, next);
> +		if (elt->pmd == pmd) {
> +			TAILQ_REMOVE(&socket->pmd_queue, elt, next);
> +			free(elt);
> +			pmd->socket_filename = NULL;
> +		}
> +	}
> +
> +	/* remove socket, if this was the last device using it */
> +	if (TAILQ_EMPTY(&socket->pmd_queue)) {
> +		rte_hash_del_key(hash, socket->filename);
> +		if (socket->listener) {
> +			/* remove listener socket file,
> +			 * so we can create new one later.
> +			 */
> +			remove(socket->filename);
> +		}
> +		rte_free(socket);
> +	}
> +}
> +
> +int memif_connect_master(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	if ((pmd->rx_queues == NULL) || (pmd->tx_queues == NULL) ||
> +	    (pmd->socket_filename == NULL)) {
> +		MIF_LOG(ERR, "%s: Device not configured!",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -1;
> +	}
> +	memset(pmd->local_disc_string, 0, 96);
> +	memset(pmd->remote_disc_string, 0, 96);
> +	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
> +	return 0;
> +}
> +
> +int memif_connect_slave(struct rte_eth_dev *dev)
> +{
> +	int sockfd;
> +	int ret;
> +	struct sockaddr_un sun;
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +
> +	if ((pmd->rx_queues == NULL) || (pmd->tx_queues == NULL) ||
> +	    (pmd->socket_filename == NULL)) {
> +		MIF_LOG(ERR, "%s: Device not configured!",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -1;
> +	}
> +
> +	memset(pmd->local_disc_string, 0, 96);
> +	memset(pmd->remote_disc_string, 0, 96);
> +	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
> +
> +	sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
> +	if (sockfd < 0) {
> +		MIF_LOG(ERR, "%s: Failed to open socket.",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -1;
> +	}
> +
> +	sun.sun_family = AF_UNIX;
> +
> +	strncpy(sun.sun_path, pmd->socket_filename, sizeof(sun.sun_path) - 1);
> +
> +	ret = connect(sockfd, (struct sockaddr *)&sun,
> +		      sizeof(struct sockaddr_un));
> +	if (ret < 0) {
> +		MIF_LOG(ERR, "%s: Failed to connect socket: %s.",
> +			rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
> +		goto error;
> +	}
> +
> +	MIF_LOG(DEBUG, "%s: Memif socket: %s connected.",
> +		rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
> +
> +	pmd->cc = rte_zmalloc("memif-cc",
> +			      sizeof(struct memif_control_channel), 0);
> +	if (pmd->cc == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to allocate control channel.",
> +			rte_vdev_device_name(pmd->vdev));
> +		goto error;
> +	}
> +
> +	pmd->cc->intr_handle.fd = sockfd;
> +	pmd->cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +	pmd->cc->socket = NULL;
> +	pmd->cc->pmd = pmd;
> +	TAILQ_INIT(&pmd->cc->msg_queue);
> +
> +	ret = rte_intr_callback_register(&pmd->cc->intr_handle,
> +					 memif_intr_handler, pmd->cc);
> +	if (ret < 0) {
> +		MIF_LOG(ERR, "%s: Failed to register interrupt callback "
> +			"for controll fd", rte_vdev_device_name(pmd->vdev));
> +		goto error;
> +	}
> +
> +	return 0;
> +
> + error:
> +	if (sockfd > 0) {
> +		close(sockfd);
> +		sockfd = -1;
> +	}
> +	if (pmd->cc != NULL) {
> +		rte_free(pmd->cc);
> +		pmd->cc = NULL;
> +	}
> +	return -1;
> +}
> diff --git a/drivers/net/memif/memif_socket.h b/drivers/net/memif/memif_socket.h
> new file mode 100644
> index 000000000..f9136dbe5
> --- /dev/null
> +++ b/drivers/net/memif/memif_socket.h
> @@ -0,0 +1,57 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#ifndef _MEMIF_SOCKET_H_
> +#define _MEMIF_SOCKET_H_
> +
> +#include <sys/queue.h>
> +
> +/*
> + * Remove device from socket device list. If no device is left on the socket,
> + * remove the socket as well.
> + */
> +void memif_socket_remove_device(struct pmd_internals *pmd);
> +
> +void memif_msg_enq_disconnect(struct memif_control_channel *cc,
> +			      const char *reason, int err_code);
> +
> +int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename);
> +
> +void memif_disconnect(struct rte_eth_dev *dev);
> +
> +/* Allow master to receive connection requests. */
> +int memif_connect_master(struct rte_eth_dev *dev);
> +
> +/* Send connection request. */
> +int memif_connect_slave(struct rte_eth_dev *dev);
> +
> +struct memif_socket_pmd_list_elt {
> +	TAILQ_ENTRY(memif_socket_pmd_list_elt) next;
> +	struct pmd_internals *pmd;
> +};
> +
> +#define MEMIF_SOCKET_HASH_NAME			"memif-sh"
> +struct memif_socket {
> +	struct rte_intr_handle intr_handle;
> +	uint8_t listener;
> +	char filename[256];
> +
> +	 TAILQ_HEAD(, memif_socket_pmd_list_elt) pmd_queue;
> +};
> +
> +/* Control mesage queue. */
> +struct memif_msg_queue_elt {
> +	TAILQ_ENTRY(memif_msg_queue_elt) next;
> +	memif_msg_t msg;
> +	int fd;
> +};
> +
> +struct memif_control_channel {
> +	struct rte_intr_handle intr_handle;
> +	 TAILQ_HEAD(, memif_msg_queue_elt) msg_queue;
> +	struct memif_socket *socket;
> +	struct pmd_internals *pmd;
> +};
> +
> +#endif				/* MEMIF_SOCKET_H */
> diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
> new file mode 100644
> index 000000000..ea18394fd
> --- /dev/null
> +++ b/drivers/net/memif/meson.build
> @@ -0,0 +1,8 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> +
> +if host_machine.system() != 'linux'
> +        build = false
> +endif
> +sources = files('rte_eth_memif.c',
> +		'memif_socket.c')
> diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c
> new file mode 100644
> index 000000000..7988010c4
> --- /dev/null
> +++ b/drivers/net/memif/rte_eth_memif.c
> @@ -0,0 +1,1172 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#include <stdlib.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <linux/if_ether.h>
> +#include <errno.h>
> +#include <sys/eventfd.h>
> +
> +#include <rte_version.h>
> +#include <rte_mbuf.h>
> +#include <rte_ether.h>
> +#include <rte_ethdev_driver.h>
> +#include <rte_ethdev_vdev.h>
> +#include <rte_malloc.h>
> +#include <rte_kvargs.h>
> +#include <rte_bus_vdev.h>
> +
> +#include <rte_eth_memif.h>
> +#include <memif_socket.h>
> +
> +#define ETH_MEMIF_ID_ARG		"id"
> +#define ETH_MEMIF_ROLE_ARG		"role"
> +#define ETH_MEMIF_BUFFER_SIZE_ARG	"bsize"
> +#define ETH_MEMIF_RING_SIZE_ARG		"rsize"
> +#define ETH_MEMIF_NRXQ_ARG		"nrxq"
> +#define ETH_MEMIF_NTXQ_ARG		"ntxq"
> +#define ETH_MEMIF_SOCKET_ARG		"socket"
> +#define ETH_MEMIF_MAC_ARG		"mac"
> +#define ETH_MEMIF_ZC_ARG		"zero-copy"
> +#define ETH_MEMIF_SECRET_ARG		"secret"
> +
> +static const char *valid_arguments[] = {
> +	ETH_MEMIF_ID_ARG,
> +	ETH_MEMIF_ROLE_ARG,
> +	ETH_MEMIF_BUFFER_SIZE_ARG,
> +	ETH_MEMIF_RING_SIZE_ARG,
> +	ETH_MEMIF_NRXQ_ARG,
> +	ETH_MEMIF_NTXQ_ARG,
> +	ETH_MEMIF_SOCKET_ARG,
> +	ETH_MEMIF_MAC_ARG,
> +	ETH_MEMIF_ZC_ARG,
> +	ETH_MEMIF_SECRET_ARG,
> +	NULL
> +};
> +
> +static struct rte_vdev_driver pmd_memif_drv;
> +
> +const char *memif_version(void)
> +{
> +#define STR_HELP(s)	#s
> +#define STR(s)		STR_HELP(s)
> +	return ("memif-" STR(MEMIF_VERSION_MAJOR) "." STR(MEMIF_VERSION_MINOR));
> +#undef STR
> +#undef STR_HELP
> +}
> +
> +static void
> +memif_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +
> +	dev_info->if_index = pmd->if_index;
> +	dev_info->max_mac_addrs = 1;
> +	dev_info->max_rx_pktlen = (uint32_t) ETH_FRAME_LEN;
> +	dev_info->max_rx_queues = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +	    pmd->cfg.num_m2s_rings : pmd->cfg.num_s2m_rings;
> +	dev_info->max_tx_queues = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +	    pmd->cfg.num_s2m_rings : pmd->cfg.num_m2s_rings;
> +	dev_info->min_rx_bufsize = 0;
> +}
> +
> +static inline memif_ring_t *memif_get_ring(struct pmd_internals *pmd,
> +					   memif_ring_type_t type,
> +					   uint16_t ring_num)
> +{
> +	/* rings only in region 0 */
> +	void *p = pmd->regions[0].addr;
> +	int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
> +	    (1 << pmd->run.log2_ring_size);
> +	p += (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
> +
> +	return (memif_ring_t *) p;
> +}
> +
> +static inline void *memif_get_buffer(struct pmd_internals *pmd,
> +				     memif_desc_t * d)
> +{
> +	return (pmd->regions[d->region].addr + d->offset);
> +}
> +
> +static uint16_t
> +eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> +{
> +	struct memif_queue *mq = queue;
> +	struct pmd_internals *pmd = mq->pmd;
> +	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
> +		return 0;
> +	memif_ring_t *ring = mq->ring;
> +	if (unlikely(ring == NULL))
> +		return 0;
> +	uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
> +	uint16_t n_rx_pkts = 0;
> +	uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
> +	    RTE_PKTMBUF_HEADROOM;
> +	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
> +	memif_ring_type_t type = mq->type;
> +	memif_desc_t *d0;
> +	struct rte_mbuf *mbuf;
> +	struct rte_mbuf *mbuf_head = NULL;
> +
> +	/* consume interrupt */
> +	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
> +		uint64_t b;
> +		ssize_t size __rte_unused;
> +		size = read(mq->intr_handle.fd, &b, sizeof(b));
> +	}
> +
> +	ring_size = 1 << mq->log2_ring_size;
> +	mask = ring_size - 1;
> +
> +	cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail;
> +	last_slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
> +	if (cur_slot == last_slot)
> +		goto refill;
> +	n_slots = last_slot - cur_slot;
> +
> +	while (n_slots && n_rx_pkts < nb_pkts) {
> +		mbuf_head = rte_pktmbuf_alloc(mq->mempool);
> +		if (unlikely(mbuf_head == NULL))
> +			goto no_free_bufs;
> +		mbuf = mbuf_head;
> +		mbuf->port = mq->in_port;
> +
> + next_slot:
> +		s0 = cur_slot & mask;
> +		d0 = &ring->desc[s0];
> +
> +		src_len = d0->length;
> +		dst_off = 0;
> +		src_off = 0;
> +
> +		do {
> +			dst_len = mbuf_size - dst_off;
> +			if (dst_len == 0) {
> +				dst_off = 0;
> +				dst_len = mbuf_size + RTE_PKTMBUF_HEADROOM;
> +
> +				mbuf = rte_pktmbuf_alloc(mq->mempool);
> +				if (unlikely(mbuf == NULL))
> +					goto no_free_bufs;
> +				mbuf->port = mq->in_port;
> +				rte_pktmbuf_chain(mbuf_head, mbuf);
> +			}
> +			cp_len = memif_min(dst_len, src_len);
> +
> +			rte_pktmbuf_pkt_len(mbuf) =
> +			    rte_pktmbuf_data_len(mbuf) += cp_len;
> +
> +			memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
> +			       memif_get_buffer(pmd, d0) + src_off, cp_len);
> +
> +			mq->n_bytes += cp_len;
> +			src_off += cp_len;
> +			dst_off += cp_len;
> +			src_len -= cp_len;
> +		} while (src_len);
> +
> +		cur_slot++;
> +		n_slots--;
> +		if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
> +			goto next_slot;
> +		}
> +
> +		*bufs++ = mbuf_head;
> +		n_rx_pkts++;
> +
> +	}
> +
> + no_free_bufs:
> +	if (type == MEMIF_RING_S2M) {
> +		rte_mb();
> +		ring->tail = mq->last_head = cur_slot;
> +	} else {
> +		mq->last_tail = cur_slot;
> +	}
> +
> + refill:
> +	if (type == MEMIF_RING_M2S) {
> +		uint16_t head = ring->head;
> +		n_slots = ring_size - head + mq->last_tail;
> +
> +		while (n_slots--) {
> +			s0 = head++ & mask;
> +			d0 = &ring->desc[s0];
> +			d0->length = pmd->run.buffer_size;
> +		}
> +		rte_mb();
> +		ring->head = head;
> +	}
> +
> +	mq->n_pkts += n_rx_pkts;
> +	return n_rx_pkts;
> +}
> +
> +static uint16_t
> +eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> +{
> +	struct memif_queue *mq = queue;
> +	struct pmd_internals *pmd = mq->pmd;
> +	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
> +		return 0;
> +	memif_ring_t *ring = mq->ring;
> +	if (unlikely(ring == NULL))
> +		return 0;
> +	uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
> +	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
> +	memif_ring_type_t type = mq->type;
> +	memif_desc_t *d0;
> +	struct rte_mbuf *mbuf;
> +	struct rte_mbuf *mbuf_head;
> +
> +	ring_size = 1 << mq->log2_ring_size;
> +	mask = ring_size - 1;
> +
> +	n_free = ring->tail - mq->last_tail;
> +	mq->last_tail += n_free;
> +	slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
> +
> +	if (type == MEMIF_RING_S2M)
> +		n_free = ring_size - ring->head + mq->last_tail;
> +	else
> +		n_free = ring->head - ring->tail;
> +
> +	while (n_free && n_tx_pkts < nb_pkts) {
> +		mbuf_head = *bufs++;
> +		mbuf = mbuf_head;
> +
> +		saved_slot = slot;
> +		d0 = &ring->desc[slot & mask];
> +		dst_off = 0;
> +		dst_len =
> +		    (type ==
> +		     MEMIF_RING_S2M) ? pmd->run.buffer_size : d0->length;
> +
> + next_in_chain:
> +		src_off = 0;
> +		src_len = rte_pktmbuf_data_len(mbuf);
> +
> +		while (src_len) {
> +			if (dst_len == 0) {
> +				if (n_free) {
> +					slot++;
> +					n_free--;
> +					d0->flags |= MEMIF_DESC_FLAG_NEXT;
> +					d0 = &ring->desc[slot & mask];
> +					dst_off = 0;
> +					dst_len = (type == MEMIF_RING_S2M) ?
> +					    pmd->run.buffer_size : d0->length;
> +					d0->flags = 0;
> +				} else {
> +					slot = saved_slot;
> +					goto no_free_slots;
> +				}
> +			}
> +			cp_len = memif_min(dst_len, src_len);
> +
> +			memcpy(memif_get_buffer(pmd, d0) + dst_off,
> +			       rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
> +			       cp_len);
> +
> +			mq->n_bytes += cp_len;
> +			src_off += cp_len;
> +			dst_off += cp_len;
> +			src_len -= cp_len;
> +			dst_len -= cp_len;
> +
> +			d0->length = dst_off;
> +		}
> +
> +		if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
> +			mbuf = mbuf->next;
> +			goto next_in_chain;
> +		}
> +
> +		n_tx_pkts++;
> +		slot++;
> +		n_free--;
> +		rte_pktmbuf_free(mbuf_head);
> +	}
> +
> + no_free_slots:
> +	rte_mb();
> +	if (type == MEMIF_RING_S2M)
> +		ring->head = slot;
> +	else
> +		ring->tail = slot;
> +
> +	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
> +		uint64_t a = 1;
> +		ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
> +		if (unlikely(size < 0)) {
> +			MIF_LOG(WARNING,
> +				"%s: Failed to send interrupt on qid %ld: %s",
> +				rte_vdev_device_name(pmd->vdev),
> +				mq - pmd->tx_queues, strerror(errno));
> +		}
> +	}
> +
> +	mq->n_err += nb_pkts - n_tx_pkts;
> +	mq->n_pkts += n_tx_pkts;
> +	return n_tx_pkts;
> +}
> +
> +void memif_free_regions(struct pmd_internals *pmd)
> +{
> +	int i;
> +	struct memif_region *r;
> +
> +	for (i = 0; i < pmd->regions_num; i++) {
> +		r = pmd->regions + i;
> +		if (r == NULL)
> +			return;
> +		if (r->addr == NULL)
> +			return;
> +		munmap(r->addr, r->region_size);
> +		if (r->fd > 0) {
> +			close(r->fd);
> +			r->fd = -1;
> +		}
> +	}
> +	rte_free(pmd->regions);
> +}
> +
> +static int memif_alloc_regions(struct pmd_internals *pmd, uint8_t brn)
> +{
> +	struct memif_region *r;
> +	char shm_name[32];
> +	int i;
> +	int ret = 0;
> +
> +	r = rte_zmalloc("memif_region", sizeof(struct memif_region) * (brn + 1),
> +			0);
> +	if (r == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to allocate regions.",
> +			rte_vdev_device_name(pmd->vdev));
> +		return -ENOMEM;
> +	}
> +
> +	pmd->regions = r;
> +	pmd->regions_num = brn + 1;
> +
> +	/*
> +	 * Create shm for every region. Region 0 is reserved for descriptors.
> +	 * Other regions contain buffers.
> +	 */
> +	for (i = 0; i < (brn + 1); i++) {
> +		r = &pmd->regions[i];
> +
> +		r->buffer_offset = (i == 0) ? (pmd->run.num_s2m_rings +
> +					       pmd->run.num_m2s_rings) *
> +		    (sizeof(memif_ring_t) +
> +		     sizeof(memif_desc_t) * (1 << pmd->run.log2_ring_size)) : 0;
> +		r->region_size = (i == 0) ? r->buffer_offset :
> +		    (uint32_t) (pmd->run.buffer_size *
> +				(1 << pmd->run.log2_ring_size) *
> +				(pmd->run.num_s2m_rings +
> +				 pmd->run.num_m2s_rings));
> +
> +		memset(shm_name, 0, sizeof(char) * 32);
> +		sprintf(shm_name, "memif region %d", i);
> +
> +		r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
> +		if (r->fd < 0) {
> +			MIF_LOG(ERR, "%s: Failed to create shm file: %s.",
> +				rte_vdev_device_name(pmd->vdev),
> +				strerror(errno));
> +			return -1;
> +		}
> +
> +		ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
> +		if (ret < 0) {
> +			MIF_LOG(ERR, "%s: Failed to add seals to shm file: %s.",
> +				rte_vdev_device_name(pmd->vdev),
> +				strerror(errno));
> +			return -1;
> +		}
> +
> +		ret = ftruncate(r->fd, r->region_size);
> +		if (ret < 0) {
> +			MIF_LOG(ERR, "%s: Failed to truncate shm file: %s.",
> +				rte_vdev_device_name(pmd->vdev),
> +				strerror(errno));
> +			return -1;
> +		}
> +
> +		r->addr = mmap(NULL, r->region_size, PROT_READ |
> +			       PROT_WRITE, MAP_SHARED, r->fd, 0);
> +		if (r->addr == NULL) {
> +			MIF_LOG(ERR, "%s: Failed to mmap shm region: %s.",
> +				rte_vdev_device_name(pmd->vdev),
> +				strerror(errno));
> +			return -1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void memif_init_rings(struct pmd_internals *pmd)
> +{
> +	memif_ring_t *ring;
> +	int i, j;
> +
> +	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
> +		ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
> +		ring->head = ring->tail = 0;
> +		ring->cookie = MEMIF_COOKIE;
> +		ring->flags = 0;
> +		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
> +			uint16_t slot = i * (1 << pmd->run.log2_ring_size) + j;
> +			ring->desc[j].region = 1;
> +			ring->desc[j].offset = pmd->regions[1].buffer_offset +
> +			    (uint32_t) (slot * pmd->run.buffer_size);
> +			ring->desc[j].length = pmd->run.buffer_size;
> +		}
> +	}
> +
> +	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
> +		ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
> +		ring->head = ring->tail = 0;
> +		ring->cookie = MEMIF_COOKIE;
> +		ring->flags = 0;
> +		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
> +			uint16_t slot = (i + pmd->run.num_s2m_rings) *
> +			    (1 << pmd->run.log2_ring_size) + j;
> +			ring->desc[j].region = 1;
> +			ring->desc[j].offset = pmd->regions[1].buffer_offset +
> +			    (uint32_t) (slot * pmd->run.buffer_size);
> +			ring->desc[j].length = pmd->run.buffer_size;
> +		}
> +	}
> +}
> +
> +static void memif_init_queues(struct pmd_internals *pmd)
> +{
> +	struct memif_queue *mq;
> +	int i;
> +
> +	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
> +		mq = &pmd->tx_queues[i];
> +		mq->ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
> +		mq->log2_ring_size = pmd->run.log2_ring_size;
> +		/* queues located only in region 0 */
> +		mq->region = 0;
> +		mq->offset = (void *)mq->ring - (void *)pmd->regions[0].addr;
> +		mq->last_head = mq->last_tail = 0;
> +		mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
> +		if (mq->intr_handle.fd < 0) {
> +			MIF_LOG(WARNING,
> +				"%s: Failed to create eventfd for tx queue %d: %s.",
> +				rte_vdev_device_name(pmd->vdev), i,
> +				strerror(errno));
> +		}
> +	}
> +
> +	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
> +		mq = &pmd->rx_queues[i];
> +		mq->ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
> +		mq->log2_ring_size = pmd->run.log2_ring_size;
> +		/* queues located only in region 0 */
> +		mq->region = 0;
> +		mq->offset = (void *)mq->ring - (void *)pmd->regions[0].addr;
> +		mq->last_head = mq->last_tail = 0;
> +		mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
> +		if (mq->intr_handle.fd < 0) {
> +			MIF_LOG(WARNING,
> +				"%s: Failed to create eventfd for rx queue %d: %s.",
> +				rte_vdev_device_name(pmd->vdev), i,
> +				strerror(errno));
> +		}
> +	}
> +}
> +
> +int memif_init_regions_and_queues(struct pmd_internals *pmd)
> +{
> +	int ret;
> +
> +	ret = memif_alloc_regions(pmd, /* num of buffer regions */ 1);
> +	if (ret < 0) {
> +		return ret;
> +	}
> +
> +	memif_init_rings(pmd);
> +
> +	memif_init_queues(pmd);
> +
> +	return 0;
> +}
> +
> +int memif_connect(struct pmd_internals *pmd)
> +{
> +	struct rte_eth_dev *eth_dev =
> +	    rte_eth_dev_allocated(rte_vdev_device_name(pmd->vdev));
> +	struct memif_region *mr;
> +	struct memif_queue *mq;
> +	int i;
> +
> +	for (i = 0; i < pmd->regions_num; i++) {
> +		mr = pmd->regions + i;
> +		if (mr != NULL) {
> +			if (mr->addr == NULL) {
> +				if (mr->fd < 0)
> +					return -1;
> +				mr->addr = mmap(NULL, mr->region_size,
> +						PROT_READ | PROT_WRITE,
> +						MAP_SHARED, mr->fd, 0);
> +				if (mr->addr == NULL)
> +					return -1;
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +		    &pmd->tx_queues[i] : &pmd->rx_queues[i];
> +		mq->ring = pmd->regions[mq->region].addr + mq->offset;
> +		if (mq->ring->cookie != MEMIF_COOKIE) {
> +			MIF_LOG(ERR, "%s: Wrong cookie",
> +				rte_vdev_device_name(pmd->vdev));
> +			return -1;
> +		}
> +		mq->ring->head = mq->ring->tail = mq->last_head =
> +		    mq->last_tail = 0;
> +		/* polling mode by default */
> +		if (pmd->role == MEMIF_ROLE_MASTER) {
> +			mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
> +		}
> +	}
> +	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
> +		    &pmd->rx_queues[i] : &pmd->tx_queues[i];
> +		mq->ring = pmd->regions[mq->region].addr + mq->offset;
> +		if (mq->ring->cookie != MEMIF_COOKIE) {
> +			MIF_LOG(ERR, "%s: Wrong cookie",
> +				rte_vdev_device_name(pmd->vdev));
> +			return -1;
> +		}
> +		mq->ring->head = mq->ring->tail = mq->last_head =
> +		    mq->last_tail = 0;
> +		/* polling mode by default */
> +		if (pmd->role == MEMIF_ROLE_SLAVE) {
> +			mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
> +		}
> +	}
> +
> +	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
> +	pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
> +	eth_dev->data->dev_link.link_status = ETH_LINK_UP;
> +	MIF_LOG(INFO, "%s: Connected.", rte_vdev_device_name(pmd->vdev));
> +	return 0;
> +}
> +
> +static int memif_dev_start(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	int ret = 0;
> +
> +	switch (pmd->role) {
> +	case MEMIF_ROLE_SLAVE:
> +		ret = memif_connect_slave(dev);
> +		break;
> +	case MEMIF_ROLE_MASTER:
> +		ret = memif_connect_master(dev);
> +		break;
> +	default:
> +		MIF_LOG(ERR, "%s: Unknown role: %d.",
> +			rte_vdev_device_name(pmd->vdev), pmd->role);
> +		ret = -1;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +static int memif_dev_configure(struct rte_eth_dev *dev __rte_unused)
> +{
> +	return 0;
> +}
> +
> +static int
> +memif_tx_queue_setup(struct rte_eth_dev *dev,
> +		     uint16_t qid,
> +		     uint16_t nb_tx_desc __rte_unused,
> +		     unsigned int socket_id __rte_unused,
> +		     const struct rte_eth_txconf *tx_conf __rte_unused)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_queue *mq;
> +
> +	mq = rte_realloc(pmd->tx_queues, sizeof(struct memif_queue) * (qid + 1),
> +			 0);
> +	if (mq == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to alloc tx queue %u.",
> +			rte_vdev_device_name(pmd->vdev), qid);
> +		return -ENOMEM;
> +	}
> +
> +	pmd->tx_queues = mq;
> +
> +	mq->type =
> +	    (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
> +	mq->n_pkts = 0;
> +	mq->n_bytes = 0;
> +	mq->n_err = 0;
> +	mq->intr_handle.fd = -1;
> +	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +	mq->pmd = pmd;
> +	dev->data->tx_queues[qid] = mq;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_rx_queue_setup(struct rte_eth_dev *dev,
> +		     uint16_t qid,
> +		     uint16_t nb_rx_desc __rte_unused,
> +		     unsigned int socket_id __rte_unused,
> +		     const struct rte_eth_rxconf *rx_conf __rte_unused,
> +		     struct rte_mempool *mb_pool)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_queue *mq;
> +
> +	mq = rte_realloc(pmd->rx_queues, sizeof(struct memif_queue) * (qid + 1),
> +			 0);
> +	if (mq == NULL) {
> +		MIF_LOG(ERR, "%s: Failed to alloc rx queue %u.",
> +			rte_vdev_device_name(pmd->vdev), qid);
> +		return -ENOMEM;
> +	}
> +
> +	pmd->rx_queues = mq;
> +
> +	mq->type =
> +	    (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
> +	mq->n_pkts = 0;
> +	mq->n_bytes = 0;
> +	mq->n_err = 0;
> +	mq->intr_handle.fd = -1;
> +	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
> +	mq->mempool = mb_pool;
> +	mq->in_port = dev->data->port_id;
> +	mq->pmd = pmd;
> +	dev->data->rx_queues[qid] = mq;
> +
> +	return 0;
> +}
> +
> +static int
> +memif_link_update(struct rte_eth_dev *dev __rte_unused,
> +		  int wait_to_complete __rte_unused)
> +{
> +	return 0;
> +}
> +
> +static int memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	struct memif_queue *mq;
> +	int i;
> +
> +	stats->ipackets = 0;
> +	stats->ibytes = 0;
> +	stats->opackets = 0;
> +	stats->obytes = 0;
> +	stats->oerrors = 0;
> +
> +	uint8_t tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings :
> +	    pmd->run.num_m2s_rings;
> +	uint8_t nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
> +	    RTE_ETHDEV_QUEUE_STAT_CNTRS;
> +
> +	/* RX stats */
> +	for (i = 0; i < nq; i++) {
> +		mq = &pmd->rx_queues[i];
> +		stats->q_ipackets[i] = mq->n_pkts;
> +		stats->q_ibytes[i] = mq->n_bytes;
> +		stats->ipackets += mq->n_pkts;
> +		stats->ibytes += mq->n_bytes;
> +	}
> +
> +	tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
> +	    pmd->run.num_s2m_rings;
> +	nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
> +	    RTE_ETHDEV_QUEUE_STAT_CNTRS;
> +
> +	/* TX stats */
> +	for (i = 0; i < nq; i++) {
> +		mq = &pmd->tx_queues[i];
> +		stats->q_opackets[i] = mq->n_pkts;
> +		stats->q_obytes[i] = mq->n_bytes;
> +		stats->q_errors[i] = mq->n_err;
> +		stats->opackets += mq->n_pkts;
> +		stats->obytes += mq->n_bytes;
> +		stats->oerrors += mq->n_err;
> +	}
> +	return 0;
> +}
> +
> +static void memif_stats_reset(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +	int i;
> +	struct memif_queue *mq;
> +
> +	for (i = 0; i < pmd->run.num_s2m_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ? &pmd->tx_queues[i] :
> +		    &pmd->rx_queues[i];
> +		mq->n_pkts = 0;
> +		mq->n_bytes = 0;
> +		mq->n_err = 0;
> +	}
> +	for (i = 0; i < pmd->run.num_m2s_rings; i++) {
> +		mq = (pmd->role == MEMIF_ROLE_SLAVE) ? &pmd->rx_queues[i] :
> +		    &pmd->tx_queues[i];
> +		mq->n_pkts = 0;
> +		mq->n_bytes = 0;
> +		mq->n_err = 0;
> +	}
> +}
> +
> +static int
> +memif_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
> +{
> +	struct pmd_internals *pmd = dev->data->dev_private;
> +
> +	MIF_LOG(WARNING, "%s: Interrupt mode not supported.",
> +		rte_vdev_device_name(pmd->vdev));
> +
> +	/* Enable MEMIF interrupts. */
> +	/* pmd->rx_queues[qid].ring->flags  &= ~MEMIF_RING_FLAG_MASK_INT; */
> +
> +	/*
> +	 * TODO: Tell dpdk to use interrupt mode.
> +	 *
> +	 * return rte_intr_enable(&pmd->rx_queues[qid].intr_handle);
> +	 */
> +	return -1;
> +}
> +
> +static int
> +memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
> +{
> +	struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
> +
> +	/* Disable MEMIF interrupts. */
> +	/* pmd->rx_queues[qid].ring->flags |= MEMIF_RING_FLAG_MASK_INT; */
> +
> +	/*
> +	 * TODO: Tell dpdk to use polling mode.
> +	 *
> +	 * return rte_intr_disable(&pmd->rx_queues[qid].intr_handle);
> +	 */
> +	return 0;
> +}
> +
> +static const struct eth_dev_ops ops = {
> +	.dev_start = memif_dev_start,
> +	.dev_infos_get = memif_dev_info,
> +	.dev_configure = memif_dev_configure,
> +	.tx_queue_setup = memif_tx_queue_setup,
> +	.rx_queue_setup = memif_rx_queue_setup,
> +	.rx_queue_intr_enable = memif_rx_queue_intr_enable,
> +	.rx_queue_intr_disable = memif_rx_queue_intr_disable,
> +	.link_update = memif_link_update,
> +	.stats_get = memif_stats_get,
> +	.stats_reset = memif_stats_reset,
> +};
> +
> +static int
> +memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
> +	     memif_interface_id_t id, uint32_t flags,
> +	     const char *socket_filename,
> +	     memif_log2_ring_size_t log2_ring_size, uint8_t nrxq,
> +	     uint8_t ntxq, uint16_t buffer_size, const char *secret,
> +	     const char *eth_addr)
> +{
> +	int ret = 0;
> +	struct rte_eth_dev *eth_dev;
> +	struct rte_eth_dev_data *data;
> +	struct pmd_internals *pmd;
> +	const unsigned int numa_node = vdev->device.numa_node;
> +	const char *name = rte_vdev_device_name(vdev);
> +
> +	if (flags & ETH_MEMIF_FLAG_ZERO_COPY) {
> +		MIF_LOG(ERR, "Zero-copy not supported.");
> +		return -1;
> +	}
> +
> +	eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
> +	if (eth_dev == NULL) {
> +		MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
> +		return -1;
> +	}
> +
> +	pmd = eth_dev->data->dev_private;
> +	memset(pmd, 0, sizeof(*pmd));
> +
> +	pmd->if_index = id;
> +	pmd->vdev = vdev;
> +	pmd->id = id;
> +	pmd->flags = flags;
> +	pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
> +	pmd->role = role;
> +	ret = memif_socket_init(eth_dev, socket_filename);
> +	if (ret < 0)
> +		return ret;
> +
> +	memset(pmd->secret, 0, sizeof(char) * 24);
> +	if (secret != NULL)
> +		strncpy(pmd->secret, secret,
> +			(strlen(secret) >= 24) ? 24 : strlen(secret));
> +
> +	pmd->cfg.log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
> +	if (log2_ring_size != 0)
> +		pmd->cfg.log2_ring_size = log2_ring_size;
> +	pmd->cfg.num_s2m_rings = ETH_MEMIF_DEFAULT_NRXQ;
> +	pmd->cfg.num_m2s_rings = ETH_MEMIF_DEFAULT_NTXQ;
> +
> +	if (nrxq != 0) {
> +		if (role == MEMIF_ROLE_SLAVE)
> +			pmd->cfg.num_m2s_rings = nrxq;
> +		else
> +			pmd->cfg.num_s2m_rings = nrxq;
> +	}
> +	if (ntxq != 0) {
> +		if (role == MEMIF_ROLE_SLAVE)
> +			pmd->cfg.num_s2m_rings = ntxq;
> +		else
> +			pmd->cfg.num_m2s_rings = ntxq;
> +	}
> +
> +	pmd->cfg.buffer_size = ETH_MEMIF_DEFAULT_BUFFER_SIZE;
> +	if (buffer_size != 0)
> +		pmd->cfg.buffer_size = buffer_size;
> +
> +	/* FIXME: generate mac? */
> +	if (eth_addr == NULL)
> +		eth_addr = ETH_MEMIF_DEFAULT_ETH_ADDR;
> +
> +	sscanf(eth_addr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
> +	       &pmd->eth_addr.addr_bytes[0], &pmd->eth_addr.addr_bytes[1],
> +	       &pmd->eth_addr.addr_bytes[2], &pmd->eth_addr.addr_bytes[3],
> +	       &pmd->eth_addr.addr_bytes[4], &pmd->eth_addr.addr_bytes[5]);
> +
> +	data = eth_dev->data;
> +	data->dev_private = pmd;
> +	data->numa_node = numa_node;
> +	data->mac_addrs = &pmd->eth_addr;
> +
> +	eth_dev->dev_ops = &ops;
> +	eth_dev->device = &vdev->device;
> +	eth_dev->rx_pkt_burst = eth_memif_rx;
> +	eth_dev->tx_pkt_burst = eth_memif_tx;
> +
> +	rte_eth_dev_probing_finish(eth_dev);
> +
> +	return ret;
> +}
> +
> +static int
> +memif_set_role(const char *key __rte_unused, const char *value,
> +	       void *extra_args)
> +{
> +	enum memif_role_t *role = (enum memif_role_t *)extra_args;
> +	if (strstr(value, "master") != NULL) {
> +		*role = MEMIF_ROLE_MASTER;
> +	} else if (strstr(value, "slave") != NULL) {
> +		*role = MEMIF_ROLE_SLAVE;
> +	} else {
> +		MIF_LOG(ERR, "Unknown role: %s.", value);
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
> +static int
> +memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	uint32_t *flags = (uint32_t *) extra_args;
> +
> +	if (strstr(value, "yes") != NULL) {
> +		*flags |= ETH_MEMIF_FLAG_ZERO_COPY;
> +	} else if (strstr(value, "no") != NULL) {
> +		*flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
> +	} else {
> +		MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
> +static int
> +memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	memif_interface_id_t *id = (memif_interface_id_t *) extra_args;
> +	/* even if parsing fails, 0 is a valid id */
> +	*id = strtoul(value, NULL, 10);
> +	return 0;
> +}
> +
> +static int
> +memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	unsigned long int tmp;
> +	uint16_t *buffer_size = (uint16_t *) extra_args;
> +
> +	tmp = strtoul(value, NULL, 10);
> +	if ((tmp == 0) || (tmp > 0xFFFF)) {
> +		MIF_LOG(ERR, "Invalid buffer size: %s.", value);
> +		return -EINVAL;
> +	}
> +	*buffer_size = tmp;
> +	return 0;
> +}
> +
> +static int
> +memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	unsigned long int tmp;
> +	memif_log2_ring_size_t *log2_ring_size =
> +	    (memif_log2_ring_size_t *) extra_args;
> +
> +	tmp = strtoul(value, NULL, 10);
> +	if ((tmp == 0) || (tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE)) {
> +		MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
> +			value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
> +		return -EINVAL;
> +	}
> +	*log2_ring_size = tmp;
> +	return 0;
> +}
> +
> +static int
> +memif_set_nq(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	unsigned long int tmp;
> +	uint16_t *nq = (uint16_t *) extra_args;
> +
> +	tmp = strtoul(value, NULL, 10);
> +	if ((tmp == 0) || (tmp > 0xFF)) {
> +		MIF_LOG(ERR, "Invalid number of queues: %s.", value);
> +		return -EINVAL;
> +	}
> +	*nq = tmp;
> +	return 0;
> +}
> +
> +/* check if directory exists and if we have permission to read/write */
> +static inline int memif_check_socket_filename(const char *filename)
> +{
> +	char *dir = NULL, *tmp;
> +	uint32_t idx;
> +	int ret = 0;
> +
> +	tmp = strrchr(filename, '/');
> +	if (tmp != NULL) {
> +		idx = tmp - filename;
> +		dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 2), 0);
> +		if (dir == NULL) {
> +			MIF_LOG(ERR, "Failed to allocate memory.");
> +			return -1;
> +		}
> +		strncpy(dir, filename, idx);
> +	}
> +
> +	if ((dir == NULL) || (faccessat(-1, dir, F_OK | R_OK |
> +					W_OK, AT_EACCESS) < 0)) {
> +		MIF_LOG(ERR, "Invalid directory: %s.", dir);
> +		ret = -EINVAL;
> +	}
> +
> +	if (dir != NULL)
> +		rte_free(dir);
> +
> +	return ret;
> +}
> +
> +static int rte_pmd_memif_probe(struct rte_vdev_device *vdev)
> +{
> +	int ret = 0;
> +	unsigned int i;
> +	struct rte_kvargs *kvlist;
> +	const struct rte_kvargs_pair *pair;
> +
> +	const char *name = rte_vdev_device_name(vdev);
> +
> +	enum memif_role_t role;
> +	memif_interface_id_t id;
> +
> +	uint16_t buffer_size;
> +	memif_log2_ring_size_t log2_ring_size;
> +	uint8_t nrxq, ntxq;
> +	const char *socket_filename;
> +	const char *eth_addr;
> +	uint32_t flags;
> +	const char *secret;
> +
> +	MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
> +
> +	kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
> +
> +	/* set default values */
> +	role = MEMIF_ROLE_SLAVE;
> +	flags = 0;
> +	id = 0;
> +	buffer_size = 2048;
> +	log2_ring_size = 10;
> +	nrxq = 1;
> +	ntxq = 1;
> +	socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
> +	secret = NULL;
> +	eth_addr = NULL;
> +
> +	/* parse parameters */
> +	if (kvlist != NULL) {
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_ROLE_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
> +						 &memif_set_role, &role);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_ID_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
> +						 &memif_set_id, &id);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_BUFFER_SIZE_ARG) == 1) {
> +			ret =
> +			    rte_kvargs_process(kvlist,
> +					       ETH_MEMIF_BUFFER_SIZE_ARG,
> +					       &memif_set_bs, &buffer_size);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_RING_SIZE_ARG) == 1) {
> +			ret =
> +			    rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
> +					       &memif_set_rs, &log2_ring_size);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_NRXQ_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_NRXQ_ARG,
> +						 &memif_set_nq, &nrxq);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_NTXQ_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_NTXQ_ARG,
> +						 &memif_set_nq, &ntxq);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_SOCKET_ARG) == 1) {
> +			for (i = 0; i < kvlist->count; i++) {
> +				pair = &kvlist->pairs[i];
> +				if (strcmp(pair->key, ETH_MEMIF_SOCKET_ARG) ==
> +				    0) {
> +					socket_filename = pair->value;
> +					ret =
> +					    memif_check_socket_filename
> +					    (socket_filename);
> +					if (ret < 0)
> +						goto exit;
> +				}
> +			}
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_MAC_ARG) == 1) {
> +			for (i = 0; i < kvlist->count; i++) {
> +				pair = &kvlist->pairs[i];
> +				if (strcmp(pair->key, ETH_MEMIF_MAC_ARG) == 0) {
> +					eth_addr = pair->value;
> +				}
> +			}
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_ZC_ARG) == 1) {
> +			ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
> +						 &memif_set_zc, &flags);
> +			if (ret < 0)
> +				goto exit;
> +		}
> +		if (rte_kvargs_count(kvlist, ETH_MEMIF_SECRET_ARG) == 1) {
> +			for (i = 0; i < kvlist->count; i++) {
> +				pair = &kvlist->pairs[i];
> +				if (strcmp(pair->key, ETH_MEMIF_SECRET_ARG) ==
> +				    0) {
> +					secret = pair->value;
> +				}
> +			}
> +		}
> +	}
> +
> +	/* create interface */
> +	ret =
> +	    memif_create(vdev, role, id, flags, socket_filename, log2_ring_size,
> +			 nrxq, ntxq, buffer_size, secret, eth_addr);
> +
> + exit:
> +	if (kvlist != NULL)
> +		rte_kvargs_free(kvlist);
> +	return ret;
> +}
> +
> +static int rte_pmd_memif_remove(struct rte_vdev_device *vdev)
> +{
> +	struct rte_eth_dev *eth_dev;
> +
> +	eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
> +	if (eth_dev == NULL)
> +		return 0;
> +
> +	struct pmd_internals *pmd = eth_dev->data->dev_private;
> +
> +	memif_msg_enq_disconnect(pmd->cc, "Invalid message size", 0);
> +	memif_disconnect(eth_dev);
> +
> +	memif_socket_remove_device(pmd);
> +
> +	pmd->vdev = NULL;
> +
> +	rte_free(eth_dev->data->dev_private);
> +
> +	rte_eth_dev_release_port(eth_dev);
> +
> +	return 0;
> +}
> +
> +static struct rte_vdev_driver pmd_memif_drv = {
> +	.probe = rte_pmd_memif_probe,
> +	.remove = rte_pmd_memif_remove,
> +};
> +
> +RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
> +RTE_PMD_REGISTER_ALIAS(net_memif, eth_memif);
> +RTE_PMD_REGISTER_PARAM_STRING(net_memif,
> +			      ETH_MEMIF_ID_ARG "=<int>"
> +			      ETH_MEMIF_ROLE_ARG "=<string>"
> +			      ETH_MEMIF_BUFFER_SIZE_ARG "=<int>"
> +			      ETH_MEMIF_RING_SIZE_ARG "=<int>"
> +			      ETH_MEMIF_NRXQ_ARG "=<int>"
> +			      ETH_MEMIF_NTXQ_ARG "=<int>"
> +			      ETH_MEMIF_SOCKET_ARG "=<string>"
> +			      ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
> +			      ETH_MEMIF_ZC_ARG "=<string>"
> +			      ETH_MEMIF_SECRET_ARG "=<string>");
> +
> +RTE_INIT(memif_init_log)
> +{
> +	memif_logtype = rte_log_register("pmd.net.memif");
> +	if (memif_logtype >= 0)
> +		rte_log_set_level(memif_logtype, RTE_LOG_NOTICE);
> +}
> diff --git a/drivers/net/memif/rte_eth_memif.h b/drivers/net/memif/rte_eth_memif.h
> new file mode 100644
> index 000000000..bbd79e1a5
> --- /dev/null
> +++ b/drivers/net/memif/rte_eth_memif.h
> @@ -0,0 +1,189 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Cisco Systems, Inc.  All rights reserved.
> + */
> +
> +#ifndef _RTE_ETH_MEMIF_H_
> +#define _RTE_ETH_MEMIF_H_
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif				/* GNU_SOURCE */
> +
> +#include <stdio.h>
> +#include <sys/queue.h>
> +
> +#include <rte_ethdev_driver.h>
> +#include <rte_ether.h>
> +#include <rte_timer.h>
> +#include <rte_interrupts.h>
> +
> +#include <memif.h>
> +
> +/* generate mac? */
> +#define ETH_MEMIF_DEFAULT_ETH_ADDR		"01:ab:23:cd:45:ef"
> +
> +#define ETH_MEMIF_DEFAULT_SOCKET_FILENAME	"/tmp/memif.sock"
> +#define ETH_MEMIF_DEFAULT_RING_SIZE		10
> +#define ETH_MEMIF_DEFAULT_NRXQ			1
> +#define ETH_MEMIF_DEFAULT_NTXQ			1
> +#define ETH_MEMIF_DEFAULT_BUFFER_SIZE		2048
> +
> +#define ETH_MEMIF_MAX_NUM_Q_PAIRS		256
> +#define ETH_MEMIF_MAX_LOG2_RING_SIZE		14
> +#define ETH_MEMIF_MAX_REGION_IDX		255
> +
> +int memif_logtype;
> +
> +#define memif_min(a,b) (((a) < (b)) ? (a) : (b))
> +
> +#define MIF_LOG(level, fmt, args...) \
> +do {							\
> +	rte_log(RTE_LOG_ ## level, memif_logtype,	\
> +		"%s(): " fmt "\n", __func__, ##args);	\
> +} while (0)
> +
> +enum memif_role_t {
> +	MEMIF_ROLE_MASTER = 0,
> +	MEMIF_ROLE_SLAVE = 1,
> +};
> +
> +/* Shared memory region. */
> +struct memif_region {
> +	void *addr;
> +	memif_region_size_t region_size;
> +	int fd;
> +	uint32_t buffer_offset;
> +};
> +
> +struct memif_queue {
> +	struct rte_mempool *mempool;
> +	uint16_t in_port;
> +
> +	struct pmd_internals *pmd;
> +
> +	struct rte_intr_handle intr_handle;
> +
> +	/* ring info */
> +	memif_ring_type_t type;
> +	memif_ring_t *ring;
> +	memif_log2_ring_size_t log2_ring_size;
> +
> +	memif_region_index_t region;
> +	memif_region_offset_t offset;
> +
> +	uint16_t last_head;
> +	uint16_t last_tail;
> +	uint32_t *buffers;
> +
> +	/* rx/tx info */
> +	uint64_t n_pkts;
> +	uint64_t n_bytes;
> +	uint64_t n_err;
> +};
> +
> +struct pmd_internals {
> +	int if_index;
> +	memif_interface_id_t id;
> +	enum memif_role_t role;
> +	uint32_t flags;
> +#define ETH_MEMIF_FLAG_CONNECTING	(1 << 0)
> +#define ETH_MEMIF_FLAG_CONNECTED	(1 << 1)
> +#define ETH_MEMIF_FLAG_ZERO_COPY	(1 << 2)
> +/* device has not been configured and can not accept connection requests */
> +#define ETH_MEMIF_FLAG_DISABLED		(1 << 3)
> +
> +	struct ether_addr eth_addr;
> +	char *socket_filename;
> +	char secret[24];
> +
> +	struct memif_control_channel *cc;
> +
> +	struct memif_region *regions;
> +	uint8_t regions_num;
> +
> +	struct memif_queue *rx_queues;
> +	struct memif_queue *tx_queues;
> +
> +	/* remote info */
> +	char remote_name[64];
> +	char remote_if_name[64];
> +
> +	/* Configured parameters (max values) */
> +	struct {
> +		memif_log2_ring_size_t log2_ring_size;
> +		uint8_t num_s2m_rings;
> +		uint8_t num_m2s_rings;
> +		uint16_t buffer_size;
> +	} cfg;
> +
> +	/* Parameters used in active connection */
> +	struct {
> +		memif_log2_ring_size_t log2_ring_size;
> +		uint8_t num_s2m_rings;
> +		uint8_t num_m2s_rings;
> +		uint16_t buffer_size;
> +	} run;
> +
> +	char local_disc_string[96];
> +	char remote_disc_string[96];
> +
> +	/* vdev handle */
> +	struct rte_vdev_device *vdev;
> +};
> +
> +void memif_free_regions(struct pmd_internals *pmd);
> +
> +/*
> + * Finalize connection establishment process. Map shared memory file
> + * (master role), initialize ring queue, set link status up.
> + */
> +int memif_connect(struct pmd_internals *pmd);
> +
> +/*
> + * Create shared memory file and initialize ring queue.
> + * Only called by slave when establishing connection
> + */
> +int memif_init_regions_and_queues(struct pmd_internals *pmd);
> +
> +const char *memif_version(void);
> +
> +#ifndef MFD_HUGETLB
> +#ifndef __NR_memfd_create
> +
> +#if defined __x86_64__
> +#define __NR_memfd_create 319
> +#elif defined __arm__
> +#define __NR_memfd_create 385
> +#elif defined __aarch64__
> +#define __NR_memfd_create 279
> +#else
> +#error "__NR_memfd_create unknown for this architecture"
> +#endif
> +
> +#endif				/* __NR_memfd_create */
> +
> +static inline int memfd_create(const char *name, unsigned int flags)
> +{
> +	return syscall(__NR_memfd_create, name, flags);
> +}
> +#endif				/* MFD_HUGETLB */
> +
> +#ifndef F_LINUX_SPECIFIC_BASE
> +#define F_LINUX_SPECIFIC_BASE 1024
> +#endif
> +
> +#ifndef MFD_ALLOW_SEALING
> +#define MFD_ALLOW_SEALING       0x0002U
> +#endif
> +
> +#ifndef F_ADD_SEALS
> +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
> +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
> +
> +#define F_SEAL_SEAL     0x0001	/* prevent further seals from being set */
> +#define F_SEAL_SHRINK   0x0002	/* prevent file from shrinking */
> +#define F_SEAL_GROW     0x0004	/* prevent file from growing */
> +#define F_SEAL_WRITE    0x0008	/* prevent writes */
> +#endif
> +
> +#endif				/* RTE_ETH_MEMIF_H */
> diff --git a/drivers/net/memif/rte_pmd_memif_version.map b/drivers/net/memif/rte_pmd_memif_version.map
> new file mode 100644
> index 000000000..aee560afa
> --- /dev/null
> +++ b/drivers/net/memif/rte_pmd_memif_version.map
> @@ -0,0 +1,4 @@
> +DPDK_2.0 {
> +
> +        local: *;
> +};
> diff --git a/drivers/net/meson.build b/drivers/net/meson.build
> index 980eec233..b0becbf31 100644
> --- a/drivers/net/meson.build
> +++ b/drivers/net/meson.build
> @@ -21,6 +21,7 @@ drivers = ['af_packet',
>  	'ixgbe',
>  	'kni',
>  	'liquidio',
> +	'memif',
>  	'mlx4',
>  	'mlx5',
>  	'mvneta',
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 5699d979d..f236c5ebc 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -168,6 +168,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
>  endif
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD)        += -lrte_pmd_lio
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF)      += -lrte_pmd_memif
>  ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -ldl
>  else
> --
> 2.17.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-10 10:50     ` Richardson, Bruce
@ 2018-12-12 10:19       ` Burakov, Anatoly
  2018-12-12 10:30         ` Bruce Richardson
  0 siblings, 1 reply; 13+ messages in thread
From: Burakov, Anatoly @ 2018-12-12 10:19 UTC (permalink / raw)
  To: Richardson, Bruce, Jakub Grajciar, dev

On 10-Dec-18 10:50 AM, Richardson, Bruce wrote:
> 
> 
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Burakov, Anatoly
>> Sent: Monday, December 10, 2018 10:43 AM
>> To: Jakub Grajciar <jgrajcia@cisco.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
>>
>> On 10-Dec-18 10:06 AM, Jakub Grajciar wrote:
>>> Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
>>> ---
>>
>> As a general comment, some description/cover letter would have been nice.
>>
>>> +
>>> +	memif_msg_disconnect_t *d = &e->msg.disconnect;
>>> +
>>> +	e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
>>> +	d->code = err_code;
>>> +
>>> +	if (reason != NULL) {
>>> +		strncpy((char *)d->string, reason, strlen(reason));
>>> +		if (cc->pmd != NULL) {
>>> +			strncpy(cc->pmd->local_disc_string, reason,
>>> +				strlen(reason));
>>> +		}
>>
>> I haven't looked at the entire thing, this is just something that caught
>> my eye during quick skimming through code.
>>
>> On the face of it, this looks dangerous - you're setting the destination
>> buffer size from source buffer size. What if `d->string` is shorter than
>> `reason`?
>>
> 
> And strncpy is dangerous - use strlcpy instead.

Isn't strscpy the string copy function du jour now? :)

> 
> /Bruce
> 


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-12 10:19       ` Burakov, Anatoly
@ 2018-12-12 10:30         ` Bruce Richardson
  0 siblings, 0 replies; 13+ messages in thread
From: Bruce Richardson @ 2018-12-12 10:30 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: Jakub Grajciar, dev

On Wed, Dec 12, 2018 at 10:19:02AM +0000, Burakov, Anatoly wrote:
> On 10-Dec-18 10:50 AM, Richardson, Bruce wrote:
> > 
> > 
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Burakov, Anatoly
> > > Sent: Monday, December 10, 2018 10:43 AM
> > > To: Jakub Grajciar <jgrajcia@cisco.com>; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
> > > 
> > > On 10-Dec-18 10:06 AM, Jakub Grajciar wrote:
> > > > Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
> > > > ---
> > > 
> > > As a general comment, some description/cover letter would have been nice.
> > > 
> > > > +
> > > > +	memif_msg_disconnect_t *d = &e->msg.disconnect;
> > > > +
> > > > +	e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
> > > > +	d->code = err_code;
> > > > +
> > > > +	if (reason != NULL) {
> > > > +		strncpy((char *)d->string, reason, strlen(reason));
> > > > +		if (cc->pmd != NULL) {
> > > > +			strncpy(cc->pmd->local_disc_string, reason,
> > > > +				strlen(reason));
> > > > +		}
> > > 
> > > I haven't looked at the entire thing, this is just something that caught
> > > my eye during quick skimming through code.
> > > 
> > > On the face of it, this looks dangerous - you're setting the destination
> > > buffer size from source buffer size. What if `d->string` is shorter than
> > > `reason`?
> > > 
> > 
> > And strncpy is dangerous - use strlcpy instead.
> 
> Isn't strscpy the string copy function du jour now? :)
> 
Well, it's certainly the new one! It's only available as a DPDK-specific
function though - "rte_strscpy". Therefore, I think strlcpy is generally
preferred, but either will do.

/Bruce

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-10 15:13     ` Wiles, Keith
@ 2018-12-17  4:26       ` Honnappa Nagarahalli
  2018-12-17  4:54         ` Honnappa Nagarahalli
  0 siblings, 1 reply; 13+ messages in thread
From: Honnappa Nagarahalli @ 2018-12-17  4:26 UTC (permalink / raw)
  To: Wiles, Keith, Jakub Grajciar; +Cc: dev, nd

> >
> >> On Dec 10, 2018, at 4:06 AM, Jakub Grajciar <jgrajcia@cisco.com> wrote:
> >
> > I do not like being the coding style police, but that is most of the comments
> here and I will try to test this one later this week. Plus I am sure I missed some
> style problems, if you have not read the coding style for DPDK please have a
> read.
> >
> > http://doc.dpdk.org/guides/contributing/coding_style.html
> >
> > One comment, why did you include all of the code to handle memif instead
> of including the libmemif.a from VPP. I worry if libmemif is changed then we
> have a breakage. I do not mind the PMD being standalone and I do like not
> having the dependence.
Just for my understanding, do you mean to say we could include the libmemif.a as a binary in DPDK?

IMO, I would like to view DPDK as the device abstraction and VPP as the protocol stack built on top. From this perspective, it is good to have standalone memif in DPDK.

> >
> > As I did not dive into the code much it does look reasonable and I hope to
> give it a try later this week.
> >>
> 
> A couple more items, do you plan on writing the documentation for the PMD
> and provide an example program?
+1, would be good to have a cover letter.
I would like to run this on Arm platforms, mostly in the beginning of Jan.

> 
> Regards,
> Keith

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [RFC v2] /net: memory interface (memif)
  2018-12-17  4:26       ` Honnappa Nagarahalli
@ 2018-12-17  4:54         ` Honnappa Nagarahalli
  0 siblings, 0 replies; 13+ messages in thread
From: Honnappa Nagarahalli @ 2018-12-17  4:54 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Wiles, Keith, Jakub Grajciar; +Cc: dev, nd, nd

> > >> On Dec 10, 2018, at 4:06 AM, Jakub Grajciar <jgrajcia@cisco.com> wrote:
> > >
> > > I do not like being the coding style police, but that is most of the
> > > comments
> > here and I will try to test this one later this week. Plus I am sure I
> > missed some style problems, if you have not read the coding style for
> > DPDK please have a read.
> > >
> > > http://doc.dpdk.org/guides/contributing/coding_style.html
> > >
> > > One comment, why did you include all of the code to handle memif
> > > instead
> > of including the libmemif.a from VPP. I worry if libmemif is changed
> > then we have a breakage. I do not mind the PMD being standalone and I
> > do like not having the dependence.
> Just for my understanding, do you mean to say we could include the
> libmemif.a as a binary in DPDK?
> 
> IMO, I would like to view DPDK as the device abstraction and VPP as the
> protocol stack built on top. From this perspective, it is good to have
> standalone memif in DPDK.
> 
> > >
> > > As I did not dive into the code much it does look reasonable and I
> > > hope to
> > give it a try later this week.
> > >>
> >
> > A couple more items, do you plan on writing the documentation for the
> > PMD and provide an example program?
> +1, would be good to have a cover letter.
Please ignore, I already see V3 having some documentation.

> I would like to run this on Arm platforms, mostly in the beginning of Jan.
> 
> >
> > Regards,
> > Keith

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-12-17  4:54 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-10  9:14 [dpdk-dev] [PATCH v3] eal_interrupts: add option for pending callback unregister Jakub Grajciar
2018-12-10  9:42 ` [dpdk-dev] [RFC] /net: memory interface (memif) Jakub Grajciar
2018-12-10 10:06 ` [dpdk-dev] [RFC v2] " Jakub Grajciar
2018-12-10 10:42   ` Burakov, Anatoly
2018-12-10 10:50     ` Richardson, Bruce
2018-12-12 10:19       ` Burakov, Anatoly
2018-12-12 10:30         ` Bruce Richardson
2018-12-10 14:48   ` Wiles, Keith
2018-12-10 15:13     ` Wiles, Keith
2018-12-17  4:26       ` Honnappa Nagarahalli
2018-12-17  4:54         ` Honnappa Nagarahalli
2018-12-10 16:20   ` Stephen Hemminger
2018-12-11  7:39   ` Ananyev, Konstantin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).