DPDK patches and discussions
 help / color / mirror / Atom feed
From: Jan Blunck <jblunck@infradead.org>
To: dev@dpdk.org
Cc: jblunck@brocade.com, shemming@brocade.com,
	Stephen Hemminger <stephen@networkplumber.org>
Subject: [dpdk-dev] [PATCH v3 2/3] xen: Add netfront poll mode driver
Date: Tue, 22 Mar 2016 10:55:28 +0100	[thread overview]
Message-ID: <1458640529-9183-3-git-send-email-jblunck@infradead.org> (raw)
In-Reply-To: <1458640529-9183-1-git-send-email-jblunck@infradead.org>

This implements a poll mode driver that has the same functionality as
the Xen netfront driver in the Linux kernel.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Jan Blunck <jblunck@infradead.org>
---
 config/common_base                 |   6 +
 drivers/net/Makefile               |   1 +
 drivers/net/xen/Makefile           |  30 ++
 drivers/net/xen/uio.c              | 245 ++++++++++++
 drivers/net/xen/uio.h              |  54 +++
 drivers/net/xen/xen_adapter_info.h |  64 ++++
 drivers/net/xen/xen_dev.c          | 489 ++++++++++++++++++++++++
 drivers/net/xen/xen_dev.h          |  30 ++
 drivers/net/xen/xen_logs.h         |  19 +
 drivers/net/xen/xen_rxtx.c         | 757 +++++++++++++++++++++++++++++++++++++
 drivers/net/xen/xen_rxtx.h         | 131 +++++++
 11 files changed, 1826 insertions(+)
 create mode 100644 drivers/net/xen/Makefile
 create mode 100644 drivers/net/xen/uio.c
 create mode 100644 drivers/net/xen/uio.h
 create mode 100644 drivers/net/xen/xen_adapter_info.h
 create mode 100644 drivers/net/xen/xen_dev.c
 create mode 100644 drivers/net/xen/xen_dev.h
 create mode 100644 drivers/net/xen/xen_logs.h
 create mode 100644 drivers/net/xen/xen_rxtx.c
 create mode 100644 drivers/net/xen/xen_rxtx.h

diff --git a/config/common_base b/config/common_base
index dbd405b..36e4b59 100644
--- a/config/common_base
+++ b/config/common_base
@@ -306,6 +306,12 @@ CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n
 CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
 
 #
+# Compile XEN UIO net-front PMD driver
+#
+CONFIG_RTE_LIBRTE_PMD_XEN=n
+CONFIG_RTE_LIBRTE_PMD_XEN_DEBUG_INIT=n
+
+#
 # Compile null PMD
 #
 CONFIG_RTE_LIBRTE_PMD_NULL=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 0c3393f..003e51b 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,5 +51,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += xen
 
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/drivers/net/xen/Makefile b/drivers/net/xen/Makefile
new file mode 100644
index 0000000..9e75157
--- /dev/null
+++ b/drivers/net/xen/Makefile
@@ -0,0 +1,30 @@
+#
+#   Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+#   All rights reserved.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_xen.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+VPATH += $(RTE_SDK)/drivers/net/xen
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += uio.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += xen_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += xen_rxtx.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += lib/librte_net lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/xen/uio.c b/drivers/net/xen/uio.c
new file mode 100644
index 0000000..54e10b9
--- /dev/null
+++ b/drivers/net/xen/uio.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/types.h>
+
+#include <rte_log.h>
+
+#include "uio.h"
+
+#define SYSFS_DEVICES_PATH "/sys/devices"
+#define SYSFS_CLASS_UIO_PATH "/sys/class/uio"
+
+int
+sysfs_device_get_uio_num(const char *device)
+{
+	DIR *dir;
+	struct dirent *e;
+	char path[PATH_MAX];
+	unsigned long uio_num;
+
+	snprintf(path, sizeof(path), "%s/%s/uio", SYSFS_DEVICES_PATH, device);
+	dir = opendir(path);
+	if (!dir)
+		return -errno;
+
+	while ((e = readdir(dir)) != NULL) {
+		char *endptr;
+
+		if (strncmp(e->d_name, "uio", 3) != 0)
+			continue;
+
+		uio_num = strtoul(e->d_name + 3, &endptr, 10);
+		if (endptr == e->d_name || *endptr != '\0' ||
+		    uio_num == ULONG_MAX)
+			continue;
+
+		RTE_LOG(DEBUG, PMD, "%s uio_num = %lu\n", device, uio_num);
+		break;
+	}
+
+	closedir(dir);
+
+	if (!e)
+		return -ENODEV;
+
+	if (uio_num > 255)
+		return -EINVAL;
+
+	return (int)uio_num;
+}
+
+static int
+sysfs_get_buffer(const char *filename, char *buf, size_t bufsize)
+{
+	FILE *f;
+	char *ptr;
+
+	f = fopen(filename, "r");
+	if (!f) {
+		RTE_LOG(ERR, EAL, "cannot open sysfs file %s\n", filename);
+		return -1;
+	}
+
+	ptr = fgets(buf, bufsize, f);
+	fclose(f);
+	if (!ptr) {
+		RTE_LOG(ERR, EAL, "cannot read sysfs file %s\n", filename);
+		return -1;
+	}
+
+	/* Jump to the end (on success fgets adds a terminating null byte)
+	 * and eat the trailing newline.
+	 */
+	ptr += strlen(ptr) - 1;
+	if (*ptr == '\n')
+		*ptr = '\0';
+
+	return 0;
+}
+
+static int
+sysfs_get_value(const char *filename, uint64_t *val)
+{
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	if (sysfs_get_buffer(filename, buf, sizeof(buf)) < 0)
+		return -1;
+
+	*val = strtoull(buf, &end, 0);
+	if ((buf[0] == '\0') || !end || (*end != '\0')) {
+		RTE_LOG(ERR, EAL, "cannot parse sysfs value %s\n", filename);
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+sysfs_uio_get_info(struct uio_resource *uio)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), SYSFS_CLASS_UIO_PATH "/uio%u/name",
+		 uio->idx);
+	if (sysfs_get_buffer(path, uio->name, ARRAY_SIZE(uio->name)))
+		return -ENODEV;
+
+	snprintf(path, sizeof(path), SYSFS_CLASS_UIO_PATH "/uio%u/version",
+		 uio->idx);
+	if (sysfs_get_buffer(path, uio->version, ARRAY_SIZE(uio->version)))
+		return -ENODEV;
+
+	return 0;
+}
+
+#define OFF_MAX ((uint64_t)(off_t)-1)
+static ssize_t
+__uio_get_mappings(const char *name, struct uio_map maps[], size_t nb_maps)
+{
+	size_t i;
+	char dirname[PATH_MAX];
+	char filename[PATH_MAX];
+	uint64_t offset, size;
+
+	for (i = 0; i < nb_maps; i++) {
+		snprintf(dirname, sizeof(dirname), "%s/maps/map%zu", name, i);
+
+		if (access(dirname, F_OK) != 0)
+			break;
+
+		snprintf(filename, sizeof(filename), "%s/offset", dirname);
+		if (sysfs_get_value(filename, &offset) < 0) {
+			RTE_LOG(ERR, EAL, "cannot parse offset of %s\n",
+				dirname);
+			return -1;
+		}
+
+		snprintf(filename, sizeof(filename), "%s/size", dirname);
+		if (sysfs_get_value(filename, &size) < 0) {
+			RTE_LOG(ERR, EAL, "cannot parse size of %s\n", dirname);
+			return -1;
+		}
+
+		snprintf(filename, sizeof(filename), "%s/addr", dirname);
+		if (sysfs_get_value(filename, &maps[i].phaddr) < 0) {
+			RTE_LOG(ERR, EAL, "cannot parse addr of %s\n", dirname);
+			return -1;
+		}
+
+		if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+			RTE_LOG(ERR, EAL,
+				"offset/size exceed system max value\n");
+			return -1;
+		}
+
+		maps[i].addr = NULL;
+		maps[i].offset = offset;
+		maps[i].size = size;
+	}
+
+	return i;
+}
+
+int
+sysfs_uio_get_mappings(struct uio_resource *uio)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), SYSFS_CLASS_UIO_PATH "/uio%u", uio->idx);
+	uio->nb_maps = __uio_get_mappings(path, uio->maps,
+					  ARRAY_SIZE(uio->maps));
+	return uio->nb_maps;
+}
+
+int
+uio_map_addresses(struct uio_resource *uio, unsigned int max_addresses)
+{
+	char path[PATH_MAX];
+	unsigned int j;
+	int fd;
+	const uint64_t pagesz = sysconf(_SC_PAGESIZE);
+
+	snprintf(path, sizeof(path), "/dev/uio%u", uio->idx);
+
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "%s can't open file descriptor: %s\n",
+			path, strerror(errno));
+		return -errno;
+	}
+
+	for (j = 0; j < uio->nb_maps && j < max_addresses; j++) {
+		struct uio_map *map = &uio->maps[j];
+
+		map->addr = mmap(NULL, map->size, PROT_READ | PROT_WRITE,
+				    MAP_SHARED, fd, j * pagesz);
+		if (map->addr == MAP_FAILED) {
+			RTE_LOG(ERR, PMD,
+				"%s mmap map%u (%ld@0x%lx) failed (%s)\n",
+				path, j, map->size, map->offset,
+				strerror(errno));
+			close(fd);
+			return -errno;
+		}
+
+		RTE_LOG(DEBUG, PMD, "%s mmap map%u (%ld@0x%lx) to %p\n",
+			path, j, map->size, map->offset, map->addr);
+	}
+
+	close(fd);
+	return 0;
+}
+
+void
+uio_unmap_addresses(struct uio_resource *uio, unsigned int max_addresses)
+{
+	char path[PATH_MAX];
+	unsigned int j;
+
+	snprintf(path, sizeof(path), "/dev/uio%u", uio->idx);
+
+	for (j = 0; j < uio->nb_maps && j < max_addresses; ++j) {
+		struct uio_map *map = &uio->maps[j];
+
+		if (!map->addr || map->addr == MAP_FAILED)
+			continue;
+
+		munmap(map->addr, map->size);
+		RTE_LOG(DEBUG, PMD, "%s munmap map%u (%ld@0x%lx) to %p\n",
+			path, j, map->size, map->offset, map->addr);
+	}
+}
diff --git a/drivers/net/xen/uio.h b/drivers/net/xen/uio.h
new file mode 100644
index 0000000..737b984
--- /dev/null
+++ b/drivers/net/xen/uio.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _DRIVERS_NET_XEN_UIO_H_
+#define _DRIVERS_NET_XEN_UIO_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define MAX_UIO_NAME 64
+
+/* from include/linux/uio_driver.h */
+#ifndef MAX_UIO_MAPS
+#define MAX_UIO_MAPS 5
+#endif
+
+struct uio_map {
+	void *addr;
+	uint64_t offset;
+	uint64_t size;
+	uint64_t phaddr;
+};
+
+struct uio_resource {
+	unsigned int idx;
+	char name[MAX_UIO_NAME];
+	char version[MAX_UIO_NAME];
+	size_t nb_maps;
+	struct uio_map maps[MAX_UIO_MAPS];
+};
+
+int sysfs_device_get_uio_num(const char *device);
+int sysfs_uio_get_info(struct uio_resource *uio);
+int sysfs_uio_get_mappings(struct uio_resource *uio);
+
+int uio_map_addresses(struct uio_resource *uio, unsigned int max_addresses);
+void uio_unmap_addresses(struct uio_resource *uio, unsigned int max_addresses);
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+static inline void *
+uio_get_map_addr(struct uio_resource *uio, unsigned int idx)
+{
+	if (idx >= uio->nb_maps)
+		return NULL;
+
+	return (char *)uio->maps[idx].addr + uio->maps[idx].offset;
+}
+
+#endif /* _DRIVERS_NET_XEN_UIO_H_ */
diff --git a/drivers/net/xen/xen_adapter_info.h b/drivers/net/xen/xen_adapter_info.h
new file mode 100644
index 0000000..80f918d
--- /dev/null
+++ b/drivers/net/xen/xen_adapter_info.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef XEN_ADAPTER_INFO_H_
+#define XEN_ADAPTER_INFO_H_
+
+#define MAX_TARGET 256
+
+#define IOCTL_EVTCHN_NOTIFY_GRANT 7
+
+struct gref_addr {
+	grant_ref_t gref;
+	unsigned long paddr;
+};
+
+struct ioctl_evtchn_notify_grant {
+	unsigned int port;
+	int otherend_id;
+	uint16_t count;
+	uint8_t is_rx;
+	union {
+		struct xen_netif_rx_front_ring *rx_ring;
+		struct xen_netif_tx_front_ring *tx_ring;
+	} u;
+	struct netfront_info *info;
+	uint16_t rel_count;
+	grant_ref_t rel_gref[MAX_TARGET];
+	struct gref_addr s[MAX_TARGET];
+};
+
+#define XEN_PMD_UIO_NAME "xen/pmd_uio"
+
+enum {
+	INFO_MAP = 0,
+	RX_RING_MAP,
+	TX_RING_MAP,
+	XEN_MAP_MAX
+};
+
+struct xen_adapter_info {
+	/*global parameters */
+	struct xen_netif_rx_front_ring *rx_ring;
+	struct xen_netif_tx_front_ring *tx_ring;
+	struct netfront_info *info;
+
+	uint8_t is_connected;
+	uint8_t disconnect_count;
+
+	/*adapter specific data*/
+	int otherend_id;
+	unsigned int rx_evtchn;
+	unsigned int tx_evtchn;
+	u_int8_t mac[6];
+
+	/*params of grefs array*/
+	uint16_t rx_grefs_count;
+	uint16_t tx_grefs_count;
+	/* this field has to be the last */
+	grant_ref_t rxtx_grefs[];
+};
+
+#endif /* XEN_ADAPTER_INFO_H_ */
diff --git a/drivers/net/xen/xen_dev.c b/drivers/net/xen/xen_dev.c
new file mode 100644
index 0000000..e32255e
--- /dev/null
+++ b/drivers/net/xen/xen_dev.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "xen_logs.h"
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include <xen/sys/evtchn.h>
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#define XEN_MAX_RX_PKTLEN  0xFFFF
+#define XEN_MIN_RX_BUFSIZE (2 * PAGE_SIZE)
+
+#define XEN_DEV_PATH "/sys/bus/xen/devices"
+
+static const char *drivername = "xen-netfront PMD";
+
+static int xen_evt_fd = -1;
+
+int
+xen_evtchn_notify_grant_rxtx(struct ioctl_evtchn_notify_grant *ng)
+{
+	int rc;
+
+	if (unlikely(xen_evt_fd < 0))
+		return -1;
+
+	rc = ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY_GRANT, ng);
+	if (rc == -1)
+		rc = errno;
+
+	return rc;
+}
+
+static int
+xen_evtchn_notify_rxtx(unsigned int evtchn)
+{
+	struct ioctl_evtchn_notify notify = { .port = evtchn };
+	int rc;
+
+	if (unlikely(xen_evt_fd < 0))
+		return -1;
+
+	rc = ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY, &notify);
+	if (rc == -1)
+		rc = errno;
+
+	return rc;
+}
+
+static int
+xen_evtchn_notify(struct xen_adapter *xa)
+{
+	int res = 0;
+
+	res += xen_evtchn_notify_rxtx(xa->info_page->tx_evtchn);
+
+	if (xa->info_page->tx_evtchn != xa->info_page->rx_evtchn)
+		res += xen_evtchn_notify_rxtx(xa->info_page->rx_evtchn);
+
+	return res;
+}
+
+static void
+xen_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	unsigned i;
+
+	PMD_INIT_FUNC_TRACE();
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+				dev->data->tx_queues[i])->tx_stats;
+		if (!txs)
+			continue;
+
+		stats->opackets += txs->opackets;
+		stats->obytes += txs->obytes;
+		stats->oerrors += txs->oerrors;
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+				dev->data->rx_queues[i])->rx_stats;
+		if (!rxs)
+			continue;
+
+		stats->ipackets += rxs->ipackets;
+		stats->ierrors += rxs->ierrors;
+		stats->ibytes += rxs->ibytes;
+	}
+}
+
+static void
+xen_dev_stats_reset(struct rte_eth_dev *dev)
+{
+	uint16_t i;
+
+	PMD_INIT_FUNC_TRACE();
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+				dev->data->tx_queues[i])->tx_stats;
+		if (!txs)
+			continue;
+
+		txs->opackets = 0;
+		txs->obytes = 0;
+		txs->oerrors = 0;
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+				dev->data->rx_queues[i])->rx_stats;
+		if (!rxs)
+			continue;
+
+		rxs->ipackets = 0;
+		rxs->ibytes = 0;
+		rxs->ierrors = 0;
+	}
+}
+
+static void
+xen_dev_info_get(struct rte_eth_dev *dev,
+		 struct rte_eth_dev_info *dev_info)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = XEN_MAX_RX_PKTLEN;
+	dev_info->max_rx_queues = dev->data->nb_rx_queues;
+	dev_info->max_tx_queues = dev->data->nb_tx_queues;
+	dev_info->min_rx_bufsize = XEN_MIN_RX_BUFSIZE;
+}
+
+static int
+xen_dev_configure(struct rte_eth_dev *dev)
+{
+	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+	struct xen_adapter *xa = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (rxmode->hw_ip_checksum) {
+		RTE_LOG(ERR, PMD, "HW IP checksum not supported");
+		return -EINVAL;
+	}
+
+	xa->vlan_strip = rxmode->hw_vlan_strip;
+
+	return 0;
+}
+
+static void
+xen_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+	PMD_INIT_FUNC_TRACE();
+}
+
+/**
+ * Atomically writes the link status information into global
+ * structure rte_eth_dev.
+ *
+ * @param dev
+ *   - Pointer to the structure rte_eth_dev to read from.
+ *   - Pointer to the buffer to be saved with the link status.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, negative value.
+ */
+static inline int
+xen_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+				 struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = &dev->data->dev_link;
+	struct rte_eth_link *src = link;
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+				*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+static int
+xen_dev_link_update(struct rte_eth_dev *dev,
+		    int wait_to_complete __rte_unused)
+{
+	struct rte_eth_link link;
+
+	PMD_INIT_FUNC_TRACE();
+
+	link.link_status = 1;
+	link.link_speed = ETH_LINK_SPEED_10G;
+	link.link_duplex = ETH_LINK_FULL_DUPLEX;
+
+	xen_dev_atomic_write_link_status(dev, &link);
+
+	return 0;
+}
+
+static int
+xen_dev_start(struct rte_eth_dev *dev)
+{
+	struct xen_adapter *xa = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	xen_dev_link_update(dev, 0);
+
+	xen_evtchn_notify(xa);
+
+	return 0;
+}
+
+static void
+xen_dev_stop(struct rte_eth_dev *dev __rte_unused)
+{
+	PMD_INIT_FUNC_TRACE();
+}
+
+static int
+wait_uio_init(uint8_t *state, const uint32_t timeout)
+{
+	uint32_t i;
+
+	for (i = 0; i < timeout * 10; i++) {
+		if (*state)
+			return 0;
+		usleep(100000);
+	}
+
+	return -1;
+}
+
+static struct eth_dev_ops xen_eth_dev_ops = {
+	/*dev*/
+	.dev_configure        = xen_dev_configure,
+	.dev_close            = xen_dev_close,
+	.dev_start            = xen_dev_start,
+	.dev_stop             = xen_dev_stop,
+	.dev_infos_get        = xen_dev_info_get,
+	.link_update          = xen_dev_link_update,
+	/*rxtx*/
+	.stats_get            = xen_dev_stats_get,
+	.stats_reset          = xen_dev_stats_reset,
+	.rx_queue_setup       = xen_dev_rx_queue_setup,
+	.rx_queue_release     = xen_dev_rx_queue_release,
+	.tx_queue_setup       = xen_dev_tx_queue_setup,
+	.tx_queue_release     = xen_dev_tx_queue_release,
+};
+
+static int
+xen_uio_connect_netback(const char *name, struct xen_adapter *xa)
+{
+	int err;
+
+	err = uio_map_addresses(&xa->uio, XEN_MAP_MAX);
+	if (err) {
+		RTE_LOG(ERR, PMD, "%s mapping info_page failed (%d)\n", name,
+			err);
+		return -EINVAL;
+	}
+
+	xa->info_page =	uio_get_map_addr(&xa->uio, INFO_MAP);
+
+	if (wait_uio_init(&xa->info_page->is_connected, 8)) {
+		RTE_LOG(ERR, PMD, "%s no connection to xen_netback\n", name);
+		uio_unmap_addresses(&xa->uio, XEN_MAP_MAX);
+		return -ENODEV;
+	}
+
+	PMD_INIT_LOG(DEBUG, "%s rx:%d, rx_evtchn:%d, tx:%d, tx_evtchn:%d\n",
+		     name, (int)xa->info_page->rx_grefs_count,
+		     (int)xa->info_page->rx_evtchn,
+		     (int)xa->info_page->tx_grefs_count,
+		     (int)xa->info_page->tx_evtchn);
+
+	return 0;
+}
+
+static int
+xen_dev_create(const char *name, const unsigned node)
+{
+	int ret = -ENOMEM;
+	struct xen_adapter *internals = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+	struct rte_eth_dev_data *data = NULL;
+
+	if (!name)
+		return -EINVAL;
+
+	RTE_LOG(INFO, PMD, "%s Creating %s ethdev on socket %u\n", name,
+		drivername, node);
+
+	internals = rte_zmalloc_socket(name, sizeof(*internals), 0, node);
+	if (!internals) {
+		RTE_LOG(ERR, PMD, "%s internals allocation has failed (%d)\n",
+			name, ENOMEM);
+		goto error;
+	}
+
+	ret = sysfs_device_get_uio_num(name);
+	if (ret < 0) {
+		RTE_LOG(ERR, PMD, "%s getting UIO number failed (%d)\n",
+			name, ret);
+		goto error;
+	}
+
+	internals->uio.idx = ret;
+
+	ret = sysfs_uio_get_info(&internals->uio);
+	if (ret < 0) {
+		RTE_LOG(ERR, PMD, "%s getting UIO info failed (%d)\n",
+			name, ret);
+		goto error;
+	}
+	if (strcmp(internals->uio.name, "xen_uio") != 0 ||
+	    strcmp(internals->uio.version, "0.1") != 0) {
+		RTE_LOG(ERR, PMD, "%s invalid UIO name/version (%s/%s)\n",
+			name, internals->uio.name, internals->uio.version);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	ret = sysfs_uio_get_mappings(&internals->uio);
+	if (ret < 0) {
+		RTE_LOG(ERR, PMD, "%s getting UIO mappings failed (%d)\n",
+			name, ret);
+		goto error;
+	}
+
+	eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+	if (!eth_dev) {
+		RTE_LOG(ERR, PMD, "%s eth_dev allocation has failed (%d)\n",
+			name, ENOMEM);
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* use eth_dev->data allocated in rte_eth_dev_allocate() */
+	data = eth_dev->data;
+
+	data->dev_private = internals;
+	data->nb_rx_queues = (uint16_t)1;
+	data->nb_tx_queues = (uint16_t)1;
+	data->rx_mbuf_alloc_failed = 0;
+	data->mtu = ETHER_MTU;
+
+	eth_dev->dev_ops = &xen_eth_dev_ops;
+	eth_dev->data->kdrv = RTE_KDRV_NONE;
+	eth_dev->data->drv_name = drivername;
+	eth_dev->data->numa_node = node;
+	eth_dev->driver = NULL;
+
+	TAILQ_INIT(&eth_dev->link_intr_cbs);
+
+	if (xen_uio_connect_netback(name, internals)) {
+		ret = -ENODEV;
+		goto uninit;
+	}
+
+	/* copy mac-addr */
+	data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0, node);
+	memcpy(&data->mac_addrs->addr_bytes[0],
+	       &internals->info_page->mac[0], ETHER_ADDR_LEN);
+
+	PMD_INIT_LOG(DEBUG, "%s MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", name,
+		     data->mac_addrs->addr_bytes[0],
+		     data->mac_addrs->addr_bytes[1],
+		     data->mac_addrs->addr_bytes[2],
+		     data->mac_addrs->addr_bytes[3],
+		     data->mac_addrs->addr_bytes[4],
+		     data->mac_addrs->addr_bytes[5]);
+
+	return 0;
+
+uninit:
+	rte_eth_dev_release_port(eth_dev);
+error:
+	rte_free(internals);
+	return ret;
+}
+
+static const char *
+sysfs_get_device_driver(const char *device_path, char *buf)
+{
+	char *driver;
+	int count;
+
+	count = readlink(device_path, buf, PATH_MAX);
+	if (count >= PATH_MAX)
+		return NULL;
+
+	/* device doesn't have a driver */
+	if (count < 0)
+		return "";
+
+	buf[count] = '\0';
+	driver = strrchr(buf, '/');
+	if (driver)
+		++driver;
+	else
+		driver = buf;
+
+	return driver;
+}
+
+static int
+rte_xen_netfront_init(const char *name __rte_unused,
+		      const char *args __rte_unused)
+{
+	struct dirent *e;
+	DIR *dir;
+	char dirname[PATH_MAX];
+	unsigned int devices = 0;
+
+	PMD_INIT_FUNC_TRACE();
+
+	xen_evt_fd = open("/dev/" XEN_PMD_UIO_NAME, O_RDWR);
+
+	if (xen_evt_fd == -1) {
+		if (errno != ENOENT)
+			RTE_LOG(ERR, PMD, "cannot open event device %s",
+				"/dev/" XEN_PMD_UIO_NAME);
+		return -1;
+	}
+
+	dir = opendir(XEN_DEV_PATH);
+	if (!dir) {
+		RTE_LOG(ERR, PMD, "%s(): opendir failed: %s\n", __func__,
+			strerror(errno));
+		return -1;
+	}
+
+	while ((e = readdir(dir)) != NULL) {
+		unsigned int devid;
+		char buf[PATH_MAX];
+		const char *driver;
+
+		if (e->d_name[0] == '.')
+			continue;
+
+		if (sscanf(e->d_name, "vif-%d", &devid) != 1)
+			continue;
+
+		snprintf(dirname, sizeof(dirname), "%s/%s/driver",
+			 XEN_DEV_PATH, e->d_name);
+		driver = sysfs_get_device_driver(dirname, buf);
+		if (!driver)
+			continue;
+
+		/* only interested in devices bound to our uio kmod */
+		if (strcmp(driver, "xen_uio")) {
+			RTE_LOG(DEBUG, PMD,
+				"%s skipping device with driver %s\n",
+				e->d_name, driver);
+			continue;
+		}
+
+		if (xen_dev_create(e->d_name, rte_socket_id()) < 0)
+			continue;
+
+		devices++;
+	}
+
+	closedir(dir);
+	return devices ? 0 : -1;
+}
+
+static struct rte_driver rte_xen_netfront_driver = {
+	.name = "rte_xen_netfront",
+	.type = PMD_PDEV,
+	.init = rte_xen_netfront_init,
+};
+
+PMD_REGISTER_DRIVER(rte_xen_netfront_driver);
diff --git a/drivers/net/xen/xen_dev.h b/drivers/net/xen/xen_dev.h
new file mode 100644
index 0000000..df6c747
--- /dev/null
+++ b/drivers/net/xen/xen_dev.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_ETHDEV_H_
+#define _XEN_ETHDEV_H_
+
+#include "uio.h"
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <sys/user.h>
+
+#include <xen/io/netif.h>
+#include "xen_adapter_info.h"
+
+struct xen_adapter {
+	/* it's a place for all uio resources */
+	struct uio_resource uio;
+
+	/*pointer to the info page*/
+	struct xen_adapter_info *info_page;
+	uint8_t	    vlan_strip;
+};
+
+int
+xen_evtchn_notify_grant_rxtx(struct ioctl_evtchn_notify_grant *ng);
+
+#endif /* _XEN_ETHDEV_H_ */
diff --git a/drivers/net/xen/xen_logs.h b/drivers/net/xen/xen_logs.h
new file mode 100644
index 0000000..e1f3cc6
--- /dev/null
+++ b/drivers/net/xen/xen_logs.h
@@ -0,0 +1,19 @@
+#ifndef _XEN_LOGS_H_
+#define _XEN_LOGS_H_
+
+#ifdef RTE_LIBRTE_PMD_XEN_DEBUG_INIT
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+
+#else /* RTE_LIBRTE_PMD_XEN_DEBUG_INIT */
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+	do { } while (0)
+
+#endif /* RTE_LIBRTE_PMD_XEN_DEBUG_INIT */
+
+#define PMD_INIT_FUNC_TRACE() \
+	PMD_INIT_LOG(DEBUG, " >>")
+
+#endif /* _XEN_LOGS_H_ */
diff --git a/drivers/net/xen/xen_rxtx.c b/drivers/net/xen/xen_rxtx.c
new file mode 100644
index 0000000..91d95b5
--- /dev/null
+++ b/drivers/net/xen/xen_rxtx.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "xen_logs.h"
+
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_malloc.h>
+
+#include <linux/if_ether.h>
+
+#define RTE_MBUF_DATA_DMA_ADDR(mb)             \
+	((uint64_t)((mb)->buf_physaddr + (mb)->data_off))
+
+#if __XEN_LATEST_INTERFACE_VERSION__ > 0x0003020a
+
+#define FRONT_RING_ATTACH(_r, _s, __size) do {   \
+	(_r)->sring = (_s);                      \
+	(_r)->req_prod_pvt = (_s)->req_prod;     \
+	(_r)->rsp_cons = (_s)->rsp_prod;         \
+	(_r)->nr_ents = __RING_SIZE(_s, __size); \
+} while (0)
+
+#endif
+
+static void
+xen_set_rx_ng(struct xen_rx_queue *rxq)
+{
+	rxq->ng_rx.port = rxq->xa->info_page->rx_evtchn;
+	rxq->ng_rx.info = rxq->xa->info_page->info;
+	rxq->ng_rx.u.rx_ring = rxq->xa->info_page->rx_ring;
+	rxq->ng_rx.otherend_id = rxq->xa->info_page->otherend_id;
+}
+
+static void
+xen_set_tx_ng(struct xen_tx_queue *txq)
+{
+	txq->ng_tx.port = txq->xa->info_page->tx_evtchn;
+	txq->ng_tx.info = txq->xa->info_page->info;
+	txq->ng_tx.u.tx_ring = txq->xa->info_page->tx_ring;
+	txq->ng_tx.otherend_id = txq->xa->info_page->otherend_id;
+}
+
+static int
+xen_evtchn_notify_grant_rx(struct xen_rx_queue *rxq)
+{
+	xen_set_rx_ng(rxq);
+	return xen_evtchn_notify_grant_rxtx(&rxq->ng_rx);
+}
+
+static int
+xen_evtchn_notify_grant_tx(struct xen_tx_queue *txq)
+{
+	xen_set_tx_ng(txq);
+	return xen_evtchn_notify_grant_rxtx(&txq->ng_tx);
+}
+
+static int
+xen_dev_rx_send_requests(struct xen_rx_queue *rxq)
+{
+	uint16_t i;
+	struct netif_rx_request *req;
+	RING_IDX req_prod = rxq->ring.req_prod_pvt;
+	RING_IDX prod = req_prod;
+	uint16_t free_space = RING_FREE_REQUESTS(&rxq->ring);
+
+	xen_set_rx_ng(rxq);
+
+	for (i = 0; i < free_space; i++) {
+		struct rte_mbuf *mbuf;
+
+		prod = (req_prod + i) & (RING_SIZE(&rxq->ring) - 1);
+
+		req = RING_GET_REQUEST(&rxq->ring, prod);
+
+		mbuf = rte_pktmbuf_alloc(rxq->mb_pool);
+		if (unlikely(!mbuf)) {
+			PMD_INIT_LOG(ERR, "no mbuf");
+			break; /*skip*/
+		}
+
+		rxq->mbuf[prod] = mbuf;
+
+		/*set data at the begin of the next page*/
+		uint64_t phys_addr = RTE_MBUF_DATA_DMA_ADDR(mbuf);
+		uint64_t phys_addr_shifted =
+			(phys_addr + PAGE_SIZE - 1) &
+			(~((uint64_t)PAGE_SIZE - 1));
+		uint64_t shift =  phys_addr_shifted - phys_addr;
+
+		mbuf->data_off += shift;
+		rxq->ng_rx.s[i].gref = rxq->gref[prod];
+
+		rxq->ng_rx.s[i].paddr = __phys_to_pfn(phys_addr_shifted);
+
+		req->gref = rxq->gref[prod];
+		req->id = prod;
+	}
+
+	rxq->ring.req_prod_pvt = (req_prod + i);
+
+	rxq->ng_rx.count = i;
+
+	if (rxq->ng_rx.count > 0 || rxq->ng_rx.rel_count > 0) {
+		xen_evtchn_notify_grant_rx(rxq);
+		rxq->ng_rx.count = 0;
+		rxq->ng_rx.rel_count = 0;
+	}
+
+	return 0;
+}
+
+static void
+xen_dev_rx_recv_extra(struct xen_rx_queue *rxq, struct netif_extra_info *extra)
+{
+	if (unlikely(!extra)) {
+		PMD_INIT_LOG(ERR, "Invalid rxq state transition: %d",
+			     rxq->state);
+		rxq->estate = RX_RESP_GENERAL;
+	}
+
+	if (unlikely(!extra->type ||
+		     extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+		PMD_INIT_LOG(WARNING, "Invalid extra type: %d", extra->type);
+		rxq->estate = RX_RESP_GENERAL;
+	}
+
+	if (!(extra->flags & XEN_NETIF_EXTRA_FLAG_MORE)) {
+		PMD_INIT_LOG(DEBUG, "No XEN_NETIF_EXTRA_FLAG_MORE");
+		rxq->estate = RX_RESP_GENERAL;
+	}
+}
+
+static uint16_t
+get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
+{
+	if (ethertype == ETHER_TYPE_IPv4)
+		return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
+	else /* assume ethertype == ETHER_TYPE_IPv6 */
+		return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
+}
+
+static void
+recalculate_checksum(struct rte_mbuf *mbuf)
+{
+	struct ether_hdr *eth_hdr;
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+	struct udp_hdr *udp_hdr;
+	struct tcp_hdr *tcp_hdr;
+	void *l3_hdr;
+#define l2_len sizeof(struct ether_hdr)
+	uint16_t ethertype, l3_len;
+	uint8_t l4_proto;
+
+	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+	ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+	switch (ethertype) {
+	case ETHER_TYPE_IPv4:
+		ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + l2_len);
+		l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
+		l4_proto = ipv4_hdr->next_proto_id;
+		break;
+	case ETHER_TYPE_IPv6:
+		ipv6_hdr = (struct ipv6_hdr *)((char *)eth_hdr + l2_len);
+		l3_len = sizeof(struct ipv6_hdr);
+		l4_proto = ipv6_hdr->proto;
+		break;
+	default:
+		l3_len = 0;
+		l4_proto = 0;
+	}
+
+	l3_hdr = (char *)eth_hdr + l2_len;
+
+	if (l4_proto == IPPROTO_TCP) {
+		tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + l3_len);
+		tcp_hdr->cksum = 0;
+		tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr,
+						     ethertype);
+	} else if (l4_proto == IPPROTO_UDP) {
+		udp_hdr = (struct udp_hdr *)((char *)l3_hdr + l3_len);
+		/* do not recalculate udp cksum if it was 0 */
+		if (udp_hdr->dgram_cksum != 0) {
+			udp_hdr->dgram_cksum = 0;
+			udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr,
+								   udp_hdr,
+								   ethertype);
+		}
+	}
+}
+
+static uint16_t
+xen_dev_rx_recv_responses(struct xen_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+			  uint16_t nb_pkts)
+{
+	uint16_t nb_rx;
+	uint16_t i;
+	struct netif_rx_response *rsp;
+	struct netif_extra_info *extra = NULL;
+	RING_IDX rsp_cons = rxq->ring.rsp_cons;
+	RING_IDX cons = rsp_cons;
+	uint16_t work_todo;
+
+	nb_rx = 0;
+	work_todo = RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring);
+	for (i = 0; i < work_todo && nb_rx < nb_pkts; i++) {
+		struct rte_mbuf *mbuf;
+
+		cons = (rsp_cons + i) & (RING_SIZE(&rxq->ring) - 1);
+
+		rsp = RING_GET_RESPONSE(&rxq->ring, cons);
+
+		PMD_INIT_LOG(DEBUG, "id:%u status:%u offset:%u flags:%x",
+			     rsp->id, rsp->status, rsp->offset, rsp->flags);
+
+		rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] = rxq->gref[cons];
+		rxq->ng_rx.rel_count++;
+
+		if (unlikely(rsp->status < 0 ||
+			     rsp->offset + rsp->status > PAGE_SIZE)) {
+			PMD_INIT_LOG(WARNING, "bad rsp->status: %d offset: %d",
+				     rsp->status, rsp->offset);
+			rte_pktmbuf_free(rxq->mbuf[cons]);
+			rxq->mbuf[cons] = NULL;
+			rxq->state = RX_RESP_GENERAL;
+			rxq->first_frag = rxq->prev_frag = NULL;
+			continue;
+		}
+
+		if (unlikely(rxq->estate & RX_RESP_EXTRA)) {
+			extra = (struct netif_extra_info *)rsp;
+			xen_dev_rx_recv_extra(rxq, extra);
+			rte_pktmbuf_free(rxq->mbuf[cons]);
+			rxq->mbuf[cons] = NULL;
+			continue;
+		}
+
+		if (unlikely(rsp->flags & NETRXF_extra_info)) {
+			PMD_INIT_LOG(DEBUG, "EXTRA_NETRXF_extra_info");
+			rxq->estate = RX_RESP_EXTRA;
+			/* next ring will contain extra info */
+			/* current ring entry is still valid */
+		}
+
+		if (rxq->state == RX_RESP_GENERAL) {
+			/* normal receive */
+			if (likely(!!rxq->mbuf[cons])) {
+				mbuf = rxq->mbuf[cons];
+				mbuf->port = rxq->port_id;
+				mbuf->data_len = mbuf->pkt_len = rsp->status;
+				mbuf->data_off += rsp->offset;
+				if (rxq->xa->vlan_strip)
+					rte_vlan_strip(mbuf);
+
+				if (rsp->flags & NETRXF_more_data) {
+					rxq->state = RX_RESP_CONTINUE;
+					rxq->first_frag =
+						rxq->prev_frag = mbuf;
+				} else {
+					/*send to the upper level*/
+					rx_pkts[nb_rx++] = mbuf;
+					recalculate_checksum(mbuf);
+					rxq->rx_stats.ipackets++;
+					rxq->rx_stats.ibytes +=
+						mbuf->pkt_len;
+				}
+
+				rxq->mbuf[cons] = NULL;
+			} else {
+				PMD_INIT_LOG(WARNING, "no rxq->mbuf[%d]",
+					     cons);
+				rxq->rx_stats.ierrors++;
+			}
+		} else { /* RX_RESP_CONTINUE -- packet is segmented */
+			if (likely(!!rxq->mbuf[cons])) {
+				mbuf = rxq->mbuf[cons];
+				/* mbuf->in_port = rxq->port_id; */
+				mbuf->data_len = mbuf->pkt_len =
+					rsp->status;
+				mbuf->data_off += rsp->offset;
+
+				rxq->first_frag->nb_segs++;
+				rxq->first_frag->pkt_len += mbuf->data_len;
+				rxq->prev_frag->next = mbuf;
+
+				if (rsp->flags & NETRXF_more_data) {
+					rxq->prev_frag = mbuf;
+				} else {
+					rxq->state = RX_RESP_GENERAL;
+					/*send to the upper level*/
+					rx_pkts[nb_rx++] = rxq->first_frag;
+					recalculate_checksum(rxq->first_frag);
+					rxq->rx_stats.ipackets++;
+					rxq->rx_stats.ibytes +=
+						rxq->first_frag->pkt_len;
+					rxq->first_frag = rxq->prev_frag
+						= NULL;
+				}
+
+				rxq->mbuf[cons] = NULL;
+			} else {
+				PMD_INIT_LOG(WARNING, "no cntn rxq->mbuf[%d]",
+					     cons);
+				rxq->rx_stats.ierrors++;
+			}
+		}
+
+		rxq->mbuf[cons] = NULL;
+	}
+	rxq->ring.rsp_cons = (rsp_cons + i);
+
+	return nb_rx;
+}
+
+static void
+xen_rx_queue_release(struct xen_rx_queue *rxq)
+{
+	uint16_t i;
+
+	rxq->ng_rx.count = 0;
+	rxq->ng_rx.rel_count = 0;
+
+	for (i = 0; i < (RING_SIZE(&rxq->ring)); i++) {
+		rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] =
+			rxq->gref[i];
+		rxq->ng_rx.rel_count++;
+		if (rxq->mbuf[i]) {
+			rte_pktmbuf_free(rxq->mbuf[i]);
+			rxq->mbuf[i] = NULL;
+		}
+	}
+	xen_evtchn_notify_grant_rx(rxq);
+}
+
+void
+xen_dev_rx_queue_release(void *rxq)
+{
+	struct xen_rx_queue *rx_q = (struct xen_rx_queue *)rxq;
+
+	if (rx_q) {
+		xen_rx_queue_release(rx_q);
+		rte_free(rx_q);
+	}
+}
+
+static void
+xen_rx_ring_init(struct xen_rx_queue *rxq)
+{
+	SHARED_RING_INIT(rxq->rxs);
+	FRONT_RING_ATTACH(&rxq->ring, rxq->rxs, PAGE_SIZE);
+	xen_dev_rx_send_requests(rxq);
+	rxq->rx_disconnect_count = rxq->xa->info_page->disconnect_count;
+	xen_set_rx_ng(rxq);
+}
+
+static uint16_t
+xen_dev_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	uint16_t res = 0;
+
+	struct xen_rx_queue *rxq = (struct xen_rx_queue *)rx_queue;
+
+	if (likely(rxq->xa->info_page->is_connected)) {
+		if (unlikely(rxq->xa->info_page->disconnect_count !=
+					rxq->rx_disconnect_count)) {
+			xen_rx_queue_release(rxq);
+
+			xen_rx_ring_init(rxq);
+		}
+
+		res = xen_dev_rx_recv_responses(rxq, rx_pkts, nb_pkts);
+
+		xen_dev_rx_send_requests(rxq);
+	}
+
+	return res;
+}
+
+int
+xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+		       uint16_t nb_desc __rte_unused,
+		       unsigned int socket_id __rte_unused,
+		       const struct rte_eth_rxconf *rx_conf __rte_unused,
+		       struct rte_mempool *mp)
+{
+	struct xen_adapter *xa = dev->data->dev_private;
+	struct xen_adapter_info *info = xa->info_page;
+	struct xen_rx_queue *rxq;
+
+	PMD_INIT_FUNC_TRACE();
+	if (info->rx_grefs_count < NET_RX_RING_SIZE) {
+		RTE_LOG(ERR, PMD, "rx ring size greater than rx grefs count");
+		return -ENOMEM;
+	}
+
+	rxq = rte_zmalloc("rx_queue", sizeof(struct xen_rx_queue),
+			  RTE_CACHE_LINE_SIZE);
+	if (!rxq) {
+		RTE_LOG(ERR, PMD, "rte_zmalloc for rxq failed!");
+		return -ENOMEM;
+	}
+
+	rxq->xa = xa;
+	rxq->queue_id = queue_idx;
+	rxq->port_id = dev->data->port_id;
+	rxq->state = RX_RESP_GENERAL;
+	rxq->estate = RX_RESP_GENERAL;
+	rxq->first_frag = rxq->prev_frag = NULL;
+	rxq->mb_pool = mp;
+	rxq->ng_rx.is_rx = 1;
+	rxq->ng_rx.rel_count = 0;
+	rxq->gref = &info->rxtx_grefs[0];
+
+	rxq->rxs = uio_get_map_addr(&xa->uio, RX_RING_MAP);
+
+	dev->data->rx_queues[queue_idx] = rxq;
+
+	xen_rx_ring_init(rxq);
+
+	dev->rx_pkt_burst = xen_dev_recv_pkts;
+
+	return 0;
+}
+
+static void
+xen_dev_tx_prepare_request(struct xen_tx_queue *txq, uint16_t i,
+			   uint16_t pkt_len, uint16_t size, uint16_t offset,
+			   uint16_t segno, uint16_t flags, unsigned long paddr)
+{
+	RING_IDX prod = (txq->ring.req_prod_pvt + i) &
+		(RING_SIZE(&txq->ring) - 1);
+	struct netif_tx_request *req = RING_GET_REQUEST(&txq->ring, prod);
+	struct slot *slot = STAILQ_FIRST(&txq->slotlist);
+
+	STAILQ_REMOVE_HEAD(&txq->slotlist, list_entry);
+	txq->freeslots--;
+
+	txq->ng_tx.s[i].gref = txq->gref[slot->id];
+	txq->ng_tx.s[i].paddr = paddr;
+
+	req->id = slot->id;
+	req->flags = flags;
+	req->offset = offset;
+	req->gref = txq->gref[slot->id];
+	req->size = segno == 0 ? pkt_len : size;
+
+	PMD_INIT_LOG(DEBUG, "id:%u size:%u offset:%u gref:%u flags:%x",
+		     req->id, req->size, req->offset, req->gref, req->flags);
+}
+
+static int
+xen_dev_tx_send_requests(struct xen_tx_queue *txq, struct rte_mbuf **tx_pkts,
+			 uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	unsigned long paddr;
+	uint16_t offset;
+	uint16_t flags;
+	uint16_t size;
+	uint16_t i = 0;
+	uint16_t nb_tx = 0;
+	uint16_t pkt_len;
+	uint16_t nsegs;
+	uint16_t free_space = RTE_MIN(RING_FREE_REQUESTS(&txq->ring),
+				      txq->freeslots);
+	struct slot *slot;
+
+	xen_set_tx_ng(txq);
+
+	while (i < free_space && nb_tx < nb_pkts) {
+		slot = STAILQ_FIRST(&txq->slotlist);	/* peek ahead */
+
+		mbuf = tx_pkts[nb_tx];
+
+		if (unlikely(!mbuf)) {
+			PMD_INIT_LOG(WARNING, "no mbuf for req");
+			break;
+		}
+
+		/* each segment could be splited because of offset
+		 * so it must be twice
+		 */
+		if (i + (tx_pkts[nb_tx]->nb_segs * 2 + 1) > free_space) {
+			PMD_INIT_LOG(WARNING, "no ring space for req");
+			txq->tx_stats.oerrors++;
+			break;
+		}
+
+		/* Do VLAN tag insertion */
+		if ((mbuf->ol_flags & PKT_TX_VLAN_PKT) &&
+		    unlikely(rte_vlan_insert(&mbuf) != 0)) {
+			rte_pktmbuf_free(mbuf);
+			txq->tx_stats.oerrors++;
+			++nb_tx;
+			continue;
+		}
+
+		/* if the first segment is < ETH_HLEN transmit will fail */
+		offset = (RTE_MBUF_DATA_DMA_ADDR(mbuf)) &
+					((uint64_t)PAGE_SIZE - 1);
+		if (PAGE_SIZE - offset < ETH_HLEN) {
+			struct rte_mbuf *mbuf_new;
+
+			mbuf_new = rte_pktmbuf_alloc(txq->mb_pool);
+			if (unlikely(!mbuf_new)) {
+				rte_pktmbuf_free(mbuf);
+				txq->tx_stats.oerrors++;
+				++nb_tx;
+				continue;
+			}
+			rte_memcpy(rte_pktmbuf_mtod(mbuf_new, void *),
+				   rte_pktmbuf_mtod(mbuf, void *),
+				   ETH_HLEN);
+			mbuf_new->pkt_len = mbuf_new->data_len = ETH_HLEN;
+			rte_pktmbuf_adj(mbuf, ETH_HLEN);
+
+			mbuf_new->pkt_len += mbuf->pkt_len;
+			mbuf_new->nb_segs = mbuf->nb_segs + 1;
+			mbuf_new->next = mbuf;
+
+			mbuf = mbuf_new;
+		}
+
+		txq->mbuf[slot->id] = mbuf;
+		pkt_len = mbuf->pkt_len;
+		nsegs = 0;
+
+		/* prepare request for each mbuf segment */
+		do {
+			size = mbuf->data_len;
+			flags = (mbuf->next ? NETTXF_more_data : 0);
+			paddr = __phys_to_pfn(RTE_MBUF_DATA_DMA_ADDR(mbuf));
+			offset = (RTE_MBUF_DATA_DMA_ADDR(mbuf)) &
+				((uint64_t)PAGE_SIZE - 1);
+
+			/* check if additional segmentation is needed */
+			if (size + offset > PAGE_SIZE) {
+				size = PAGE_SIZE - offset;
+				xen_dev_tx_prepare_request(txq, i, pkt_len,
+							   size, offset, nsegs,
+							   NETTXF_more_data,
+							   paddr);
+				paddr++;
+				offset = (offset + size) % PAGE_SIZE;
+				size = mbuf->data_len - size;
+				i++;
+				nsegs++;
+			}
+
+			xen_dev_tx_prepare_request(txq, i, pkt_len, size,
+						   offset, nsegs, flags,
+						   paddr);
+			i++;
+			nsegs++;
+
+		} while ((mbuf = mbuf->next));
+
+		nb_tx++;
+		txq->tx_stats.opackets++;
+		txq->tx_stats.obytes += pkt_len;
+	}
+
+	txq->ring.req_prod_pvt += i;
+	txq->ng_tx.count = i;
+	if (txq->ng_tx.count > 0 || txq->ng_tx.rel_count > 0) {
+		xen_evtchn_notify_grant_tx(txq);
+		txq->ng_tx.rel_count = 0;
+		txq->ng_tx.count = 0;
+	}
+
+	return nb_tx;
+}
+
+static int
+xen_dev_tx_recv_responses(struct xen_tx_queue *txq)
+{
+	uint16_t i, id;
+	struct netif_tx_response *rsp;
+	RING_IDX rsp_cons = txq->ring.rsp_cons;
+	RING_IDX cons;
+	uint16_t work_todo;
+
+	work_todo = RING_HAS_UNCONSUMED_RESPONSES(&txq->ring);
+	for (i = 0; i < work_todo; i++) {
+		cons = (rsp_cons + i) & (RING_SIZE(&txq->ring) - 1);
+
+		rsp = RING_GET_RESPONSE(&txq->ring, cons);
+		id = rsp->id;
+
+		STAILQ_INSERT_TAIL(&txq->slotlist, &txq->slots[id], list_entry);
+		txq->freeslots++;
+
+		if (unlikely(rsp->status == NETIF_RSP_NULL))
+			PMD_INIT_LOG(WARNING, "NETIF_RSP_NULL");
+
+		txq->ng_tx.rel_gref[txq->ng_tx.rel_count] = txq->gref[id];
+		txq->ng_tx.rel_count++;
+
+		if (likely(!!txq->mbuf[id])) {
+			rte_pktmbuf_free(txq->mbuf[id]);
+			txq->mbuf[id] = NULL;
+		}
+	}
+	txq->ring.rsp_cons = (rsp_cons + i);
+
+	return 0;
+}
+
+static void
+xen_tx_queue_release(struct xen_tx_queue *txq)
+{
+	uint16_t i;
+
+	txq->ng_tx.count = 0;
+	txq->ng_tx.rel_count = 0;
+
+	for (i = 0; i < (RING_SIZE(&txq->ring)); i++) {
+		if (txq->mbuf[i]) {
+			rte_pktmbuf_free(txq->mbuf[i]);
+			txq->mbuf[i] = NULL;
+			txq->ng_tx.rel_gref[txq->ng_tx.rel_count] =
+				txq->gref[i];
+			txq->ng_tx.rel_count++;
+		}
+	}
+	xen_evtchn_notify_grant_tx(txq);
+}
+
+void
+xen_dev_tx_queue_release(void *txq)
+{
+	struct xen_tx_queue *tx_q = (struct xen_tx_queue *)txq;
+
+	if (tx_q) {
+		xen_tx_queue_release(tx_q);
+		rte_free(tx_q->slots);
+		rte_free(tx_q);
+	}
+}
+
+static void
+xen_tx_ring_init(struct xen_tx_queue *txq)
+{
+	SHARED_RING_INIT(txq->txs);
+	FRONT_RING_ATTACH(&txq->ring, txq->txs, PAGE_SIZE);
+	xen_dev_tx_recv_responses(txq);
+	txq->tx_disconnect_count = txq->xa->info_page->disconnect_count;
+	xen_set_tx_ng(txq);
+}
+
+static uint16_t
+xen_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	uint16_t res = 0;
+
+	struct xen_tx_queue *txq = (struct xen_tx_queue *)tx_queue;
+
+	if (likely(txq->xa->info_page->is_connected)) {
+		if (unlikely(txq->xa->info_page->disconnect_count !=
+					txq->tx_disconnect_count)) {
+			xen_tx_queue_release(txq);
+
+			xen_tx_ring_init(txq);
+		}
+
+		xen_dev_tx_recv_responses(txq);
+
+		res = xen_dev_tx_send_requests(txq, tx_pkts, nb_pkts);
+	}
+
+	return res;
+}
+
+int
+xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+		       uint16_t nb_desc __rte_unused,
+		       unsigned int socket_id,
+		       const struct rte_eth_txconf *tx_conf)
+{
+	struct xen_adapter *xa = dev->data->dev_private;
+	struct xen_adapter_info *info = xa->info_page;
+	struct xen_tx_queue *txq;
+	char pool_name[RTE_MEMPOOL_NAMESIZE];
+	uint16_t i;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
+	    != ETH_TXQ_FLAGS_NOXSUMS) {
+		RTE_LOG(ERR, PMD, "TX checksum offload not supported\n");
+		return -EINVAL;
+	}
+
+	if (info->tx_grefs_count < NET_TX_RING_SIZE) {
+		RTE_LOG(ERR, PMD, "tx ring size greater than tx grefs count");
+		return -ENOMEM;
+	}
+
+	txq = rte_zmalloc("tx_queue", sizeof(struct xen_tx_queue),
+			  RTE_CACHE_LINE_SIZE);
+	if (!txq) {
+		RTE_LOG(ERR, PMD, "rte_zmalloc for txq failed!");
+		return -ENOMEM;
+	}
+
+	txq->xa = xa;
+	txq->queue_id = queue_idx;
+	txq->port_id = dev->data->port_id;
+
+	snprintf(pool_name, RTE_MEMPOOL_NAMESIZE, "mbuf_%u_tx", txq->port_id);
+	txq->mb_pool = rte_mempool_create(pool_name, NET_TX_RING_SIZE - 1,
+				RTE_PKTMBUF_HEADROOM, 0,
+				sizeof(struct rte_pktmbuf_pool_private),
+				rte_pktmbuf_pool_init, NULL,
+				rte_pktmbuf_init, NULL,
+				socket_id, 0);
+	if (!txq->mb_pool) {
+		RTE_LOG(ERR, PMD, "Could not initialize tx mbuf pool\n");
+		return -ENOMEM;
+	}
+
+	txq->txs = uio_get_map_addr(&xa->uio, TX_RING_MAP);
+
+	txq->slots = rte_zmalloc("slots",
+				 info->tx_grefs_count * sizeof(struct slot),
+				 RTE_CACHE_LINE_SIZE);
+	if (!txq->slots) {
+		RTE_LOG(ERR, PMD, "rte_zmalloc for slots failed!");
+		return -ENOMEM;
+	}
+
+	txq->ng_tx.is_rx = 0;
+	txq->ng_tx.rel_count = 0;
+	txq->gref = &info->rxtx_grefs[info->rx_grefs_count];
+	STAILQ_INIT(&txq->slotlist);
+	for (i = 0; i < info->tx_grefs_count; i++) {
+		txq->slots[i].id = i;
+		STAILQ_INSERT_TAIL(&txq->slotlist, &txq->slots[i], list_entry);
+	}
+	txq->freeslots = info->tx_grefs_count;
+
+	dev->data->tx_queues[queue_idx] = txq;
+
+	xen_tx_ring_init(txq);
+
+	dev->tx_pkt_burst = xen_dev_xmit_pkts;
+
+	return 0;
+}
diff --git a/drivers/net/xen/xen_rxtx.h b/drivers/net/xen/xen_rxtx.h
new file mode 100644
index 0000000..eee633c
--- /dev/null
+++ b/drivers/net/xen/xen_rxtx.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_RXTX_H_
+#define _XEN_RXTX_H_
+
+#include <rte_mbuf.h>
+#include <rte_tailq.h>
+
+#include <xen/io/netif.h>
+#include "xen_adapter_info.h"
+
+#define DEFAULT_RX_FREE_THRESH   0
+#define DEFAULT_TX_FREE_THRESH   512
+
+#undef  PAGE_SIZE
+#define PAGE_SHIFT            12
+#define PAGE_SIZE             (1 << PAGE_SHIFT)
+
+#define __phys_to_pfn(paddr)  ((unsigned long)((paddr) >> PAGE_SHIFT))
+#define __pfn_to_phys(pfn)    ((phys_addr_t)(pfn) << PAGE_SHIFT)
+
+#define NET_TX_RING_SIZE      __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
+#define NET_RX_RING_SIZE      __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
+
+struct xen_tx_stats {
+	uint64_t opackets;
+	uint64_t obytes;
+	uint64_t oerrors;
+};
+
+struct xen_rx_stats {
+	uint64_t ipackets;
+	uint64_t ibytes;
+	uint64_t ierrors;
+};
+
+enum rx_resp_state {
+	RX_RESP_GENERAL = 0,
+	RX_RESP_CONTINUE,
+	RX_RESP_EXTRA
+};
+
+struct xen_rx_queue {
+	/**< RX queue index. */
+	uint16_t                   queue_id;
+	/**< Device port identifier. */
+	uint8_t                    port_id;
+	/**< mbuf pool to populate RX ring. */
+	struct rte_mempool         *mb_pool;
+	/**< Ptr to dev_private data. */
+	struct xen_adapter         *xa;
+
+	/* Xen specific */
+
+	/**< Pointer to the xen rx ring shared with other end. */
+	netif_rx_front_ring_t      ring;
+	struct netif_rx_sring      *rxs;
+	/**< Grefs for sharing with the other end. */
+	grant_ref_t                *gref;
+	/**< Allocated for RING_INX mbufs. */
+	struct rte_mbuf            *mbuf[NET_RX_RING_SIZE];
+	/**< packet state machine */
+	enum rx_resp_state         state;
+	/**< extra state machine */
+	enum rx_resp_state         estate;
+	/**< First packet segment. */
+	struct rte_mbuf            *first_frag;
+	/**< Previous packet segment. */
+	struct rte_mbuf            *prev_frag;
+	/**< Statistics. */
+	struct xen_rx_stats        rx_stats;
+	/**< Number of disconnections. */
+	uint8_t                    rx_disconnect_count;
+	/**< Notify and gnttab ioctl struct. */
+	struct ioctl_evtchn_notify_grant ng_rx;
+};
+
+struct xen_tx_queue {
+	uint16_t                   queue_id;
+	/**< TX queue index. */
+	uint8_t                    port_id;
+	/**< Device port identifier. */
+	struct xen_adapter         *xa;
+	/**< Ptr to dev_private data */
+
+	/* Xen specific */
+
+	/**< Pointer to the xen tx ring shared with other end. */
+	netif_tx_front_ring_t      ring;
+	struct netif_tx_sring      *txs;
+	/**< Grefs for sharing with the other end. */
+	grant_ref_t                *gref;
+	/**< Allocated for RING_INX mbufs. */
+	struct rte_mbuf            *mbuf[NET_TX_RING_SIZE];
+	/**< Used for workaround in tx path */
+	struct rte_mempool         *mb_pool;
+	/**< Statistics. */
+	struct xen_tx_stats        tx_stats;
+	/**< Number of disconnections. */
+	uint8_t                    tx_disconnect_count;
+	/**< Notify and gnttab ioctl struct. */
+	struct ioctl_evtchn_notify_grant ng_tx;
+	/**< List of unused slots in gref[] */
+	STAILQ_HEAD(listhead, slot) slotlist;
+	struct slot {
+		STAILQ_ENTRY(slot) list_entry;
+		uint16_t	   id;
+	} *slots;
+	uint16_t		   freeslots;
+};
+
+struct rte_eth_dev;
+struct rte_eth_rxconf;
+struct rte_eth_txconf;
+
+int xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+			   uint16_t nb_rx_desc, unsigned int socket_id,
+			   const struct rte_eth_rxconf *rx_conf,
+			   struct rte_mempool *mb_pool);
+
+int xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+			   uint16_t nb_tx_desc, unsigned int socket_id,
+			   const struct rte_eth_txconf *tx_conf);
+
+void xen_dev_rx_queue_release(void *rxq);
+void xen_dev_tx_queue_release(void *txq);
+
+#endif /* _XEN_RXTX_H_ */
-- 
2.5.5

  parent reply	other threads:[~2016-03-22  9:56 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-15 15:24 [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Stephen Hemminger
2015-02-15 15:24 ` [dpdk-dev] [PATCH 2/5] enic: fix device to work with Xen DOM0 Stephen Hemminger
2015-03-10  7:08   ` Liu, Jijiang
2015-02-15 15:24 ` [dpdk-dev] [PATCH 3/5] xen: add phys-addr command line argument Stephen Hemminger
2015-02-26  7:55   ` Liu, Jijiang
2015-02-26 16:09     ` Stephen Hemminger
2015-02-15 15:24 ` [dpdk-dev] [PATCH 4/5] xen: add uio driver Stephen Hemminger
2016-03-22  9:55   ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Jan Blunck
2016-03-22  9:55     ` [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver Jan Blunck
2016-03-22 10:42       ` Thomas Monjalon
2016-03-22 11:04         ` Jan Blunck
2016-03-22 11:27           ` Thomas Monjalon
2016-03-22 14:39             ` Jan Blunck
2016-03-22  9:55     ` Jan Blunck [this message]
2016-03-22 10:07       ` [dpdk-dev] [PATCH v3 2/3] xen: Add netfront poll mode driver David Marchand
2016-03-22 10:42         ` Jan Blunck
2016-03-22  9:55     ` [dpdk-dev] [PATCH v3 3/3] xen: Add documentation Jan Blunck
2016-04-20 14:18     ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Bruce Richardson
2016-05-03  9:38       ` Xie, Huawei
2017-02-05 14:44     ` Thomas Monjalon
2017-02-06 14:27       ` Konrad Rzeszutek Wilk
2015-02-15 15:24 ` [dpdk-dev] [PATCH 5/5] xen: net-front " Stephen Hemminger
2015-07-09  0:10 ` [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1458640529-9183-3-git-send-email-jblunck@infradead.org \
    --to=jblunck@infradead.org \
    --cc=dev@dpdk.org \
    --cc=jblunck@brocade.com \
    --cc=shemming@brocade.com \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).