DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime
@ 2015-02-14 18:06 Stephen Hemminger
  2015-02-14 18:06 ` [dpdk-dev] [PATCH 2/4] xen: add phys-addr command line argument Stephen Hemminger
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Stephen Hemminger @ 2015-02-14 18:06 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

The previous code would only allow building library and application
so that it ran on Xen DOM0 or not on DOM0. This changes that to
a runtime flag.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/librte_eal/common/include/rte_memory.h |  4 +++
 lib/librte_eal/linuxapp/eal/eal_memory.c   |  7 ++++
 lib/librte_ether/rte_ethdev.c              | 22 ++++++++++++
 lib/librte_ether/rte_ethdev.h              | 23 ++++++++++++
 lib/librte_mempool/rte_mempool.c           | 26 +++++++-------
 lib/librte_pmd_e1000/em_rxtx.c             | 30 +++-------------
 lib/librte_pmd_e1000/igb_rxtx.c            | 52 +++++++++------------------
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c          | 58 +++++++++---------------------
 8 files changed, 108 insertions(+), 114 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 7f8103f..ab6c1ff 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -176,6 +176,10 @@ unsigned rte_memory_get_nchannel(void);
 unsigned rte_memory_get_nrank(void);
 
 #ifdef RTE_LIBRTE_XEN_DOM0
+
+/**< Internal use only - should DOM0 memory mapping be used */
+extern int is_xen_dom0_supported(void);
+
 /**
  * Return the physical address of elt, which is an element of the pool mp.
  *
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a67a1b0..4afda2a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -98,6 +98,13 @@
 #include "eal_filesystem.h"
 #include "eal_hugepages.h"
 
+#ifdef RTE_LIBRTE_XEN_DOM0
+int is_xen_dom0_supported(void)
+{
+	return internal_config.xen_dom0_support;
+}
+#endif
+
 /**
  * @file
  * Huge page mapping under linux
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..457e0bc 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -2825,6 +2825,27 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	}
 	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
+
+const struct rte_memzone *
+rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
+			 uint16_t queue_id, size_t size, unsigned align,
+			 int socket_id)
+{
+	char z_name[RTE_MEMZONE_NAMESIZE];
+	const struct rte_memzone *mz;
+
+	snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
+		 dev->driver->pci_drv.name, ring_name,
+		 dev->data->port_id, queue_id);
+
+	mz = rte_memzone_lookup(z_name);
+	if (mz)
+		return mz;
+
+	return rte_memzone_reserve_bounded(z_name, size,
+					   socket_id, 0, align, RTE_PGSIZE_2M);
+}
+
 #ifdef RTE_NIC_BYPASS
 int rte_eth_dev_bypass_init(uint8_t port_id)
 {
@@ -3003,6 +3024,7 @@ rte_eth_dev_bypass_wd_reset(uint8_t port_id)
 	(*dev->dev_ops->bypass_wd_reset)(dev);
 	return 0;
 }
+
 #endif
 
 int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 1200c1c..747acb5 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -3664,6 +3664,29 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
 int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
 			enum rte_filter_op filter_op, void *arg);
 
+/**
+ * Create memzone for HW rings.
+ * malloc can't be used as the physical address is needed.
+ * If the memzone is already created, then this function returns a ptr
+ * to the old one.
+ *
+ * @param eth_dev
+ *   The *eth_dev* pointer is the address of the *rte_eth_dev* structure
+ * @param name
+ *   The name of the memory zone
+ * @param queue_id
+ *   The index of the queue to add to name
+ * @param size
+ *   The sizeof of the memory area
+ * @param align
+ *   Alignment for resulting memzone. Must be a power of 2.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in case of NUMA.
+ */
+const struct rte_memzone *
+rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name,
+			 uint16_t queue_id, size_t size,
+			 unsigned align, int socket_id);
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 4cf6c25..5056a4f 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -372,19 +372,21 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
 		   int socket_id, unsigned flags)
 {
 #ifdef RTE_LIBRTE_XEN_DOM0
-	return (rte_dom0_mempool_create(name, n, elt_size,
-		cache_size, private_data_size,
-		mp_init, mp_init_arg,
-		obj_init, obj_init_arg,
-		socket_id, flags));
-#else
-	return (rte_mempool_xmem_create(name, n, elt_size,
-		cache_size, private_data_size,
-		mp_init, mp_init_arg,
-		obj_init, obj_init_arg,
-		socket_id, flags,
-		NULL, NULL, MEMPOOL_PG_NUM_DEFAULT, MEMPOOL_PG_SHIFT_MAX));
+	if (is_xen_dom0_supported())
+		return (rte_dom0_mempool_create(name, n, elt_size,
+					cache_size, private_data_size,
+					mp_init, mp_init_arg,
+					obj_init, obj_init_arg,
+					socket_id, flags));
+	else
 #endif
+		return (rte_mempool_xmem_create(name, n, elt_size,
+					cache_size, private_data_size,
+					mp_init, mp_init_arg,
+					obj_init, obj_init_arg,
+					socket_id, flags,
+					NULL, NULL, MEMPOOL_PG_NUM_DEFAULT,
+					MEMPOOL_PG_SHIFT_MAX));
 }
 
 /*
diff --git a/lib/librte_pmd_e1000/em_rxtx.c b/lib/librte_pmd_e1000/em_rxtx.c
index aa0b88c..9e09cfa 100644
--- a/lib/librte_pmd_e1000/em_rxtx.c
+++ b/lib/librte_pmd_e1000/em_rxtx.c
@@ -1104,28 +1104,6 @@ eth_em_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 #define	EM_MAX_BUF_SIZE     16384
 #define EM_RCTL_FLXBUF_STEP 1024
 
-static const struct rte_memzone *
-ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
-		uint16_t queue_id, uint32_t ring_size, int socket_id)
-{
-	const struct rte_memzone *mz;
-	char z_name[RTE_MEMZONE_NAMESIZE];
-
-	snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-		dev->driver->pci_drv.name, ring_name, dev->data->port_id,
-		queue_id);
-
-	if ((mz = rte_memzone_lookup(z_name)) != 0)
-		return (mz);
-
-#ifdef RTE_LIBRTE_XEN_DOM0
-	return rte_memzone_reserve_bounded(z_name, ring_size,
-			socket_id, 0, RTE_CACHE_LINE_SIZE, RTE_PGSIZE_2M);
-#else
-	return rte_memzone_reserve(z_name, ring_size, socket_id, 0);
-#endif
-}
-
 static void
 em_tx_queue_release_mbufs(struct em_tx_queue *txq)
 {
@@ -1273,8 +1251,8 @@ eth_em_tx_queue_setup(struct rte_eth_dev *dev,
 	 * resizing in later calls to the queue setup function.
 	 */
 	tsize = sizeof (txq->tx_ring[0]) * EM_MAX_RING_DESC;
-	if ((tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx, tsize,
-			socket_id)) == NULL)
+	if ((tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, tsize,
+					   RTE_CACHE_LINE_SIZE, socket_id)) == NULL)
 		return (-ENOMEM);
 
 	/* Allocate the tx queue data structure. */
@@ -1400,8 +1378,8 @@ eth_em_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate RX ring for max possible mumber of hardware descriptors. */
 	rsize = sizeof (rxq->rx_ring[0]) * EM_MAX_RING_DESC;
-	if ((rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx, rsize,
-			socket_id)) == NULL)
+	if ((rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, rsize,
+					   RTE_CACHE_LINE_SIZE, socket_id)) == NULL)
 		return (-ENOMEM);
 
 	/* Allocate the RX queue data structure. */
diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
index 5c394a9..d36469b 100644
--- a/lib/librte_pmd_e1000/igb_rxtx.c
+++ b/lib/librte_pmd_e1000/igb_rxtx.c
@@ -1109,29 +1109,6 @@ eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 #define IGB_MIN_RING_DESC 32
 #define IGB_MAX_RING_DESC 4096
 
-static const struct rte_memzone *
-ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
-		      uint16_t queue_id, uint32_t ring_size, int socket_id)
-{
-	char z_name[RTE_MEMZONE_NAMESIZE];
-	const struct rte_memzone *mz;
-
-	snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-			dev->driver->pci_drv.name, ring_name,
-				dev->data->port_id, queue_id);
-	mz = rte_memzone_lookup(z_name);
-	if (mz)
-		return mz;
-
-#ifdef RTE_LIBRTE_XEN_DOM0
-	return rte_memzone_reserve_bounded(z_name, ring_size,
-			socket_id, 0, IGB_ALIGN, RTE_PGSIZE_2M);
-#else
-	return rte_memzone_reserve_aligned(z_name, ring_size,
-			socket_id, 0, IGB_ALIGN);
-#endif
-}
-
 static void
 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
 {
@@ -1265,8 +1242,8 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
 	 * resizing in later calls to the queue setup function.
 	 */
 	size = sizeof(union e1000_adv_tx_desc) * IGB_MAX_RING_DESC;
-	tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
-					size, socket_id);
+	tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
+				      IGB_ALIGN, socket_id);
 	if (tz == NULL) {
 		igb_tx_queue_release(txq);
 		return (-ENOMEM);
@@ -1284,12 +1261,14 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
 	txq->port_id = dev->data->port_id;
 
 	txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
-#ifndef RTE_LIBRTE_XEN_DOM0
-	txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
-#else
-	txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+#ifdef RTE_LIBRTE_XEN_DOM0
+	if (is_xen_dom0_supported())
+		txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+	else
 #endif
-	 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
+		txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
+
+	txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
 	/* Allocate software ring */
 	txq->sw_ring = rte_zmalloc("txq->sw_ring",
 				   sizeof(struct igb_tx_entry) * nb_desc,
@@ -1414,18 +1393,21 @@ eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
 	 *  resizing in later calls to the queue setup function.
 	 */
 	size = sizeof(union e1000_adv_rx_desc) * IGB_MAX_RING_DESC;
-	rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx, size, socket_id);
+	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
+				      IGB_ALIGN, socket_id);
 	if (rz == NULL) {
 		igb_rx_queue_release(rxq);
 		return (-ENOMEM);
 	}
 	rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
 	rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
-#ifndef RTE_LIBRTE_XEN_DOM0
-	rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
-#else
-	rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+#ifdef RTE_LIBRTE_XEN_DOM0
+	if (is_xen_dom0_supported())
+		rxq->rx_ring_phys_addr =
+			rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+	else
 #endif
+		rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
 	rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
 
 	/* Allocate software ring. */
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index e6766b3..303144d 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -1656,35 +1656,6 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 #define IXGBE_MIN_RING_DESC 32
 #define IXGBE_MAX_RING_DESC 4096
 
-/*
- * Create memzone for HW rings. malloc can't be used as the physical address is
- * needed. If the memzone is already created, then this function returns a ptr
- * to the old one.
- */
-static const struct rte_memzone *
-ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
-		      uint16_t queue_id, uint32_t ring_size, int socket_id)
-{
-	char z_name[RTE_MEMZONE_NAMESIZE];
-	const struct rte_memzone *mz;
-
-	snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-			dev->driver->pci_drv.name, ring_name,
-			dev->data->port_id, queue_id);
-
-	mz = rte_memzone_lookup(z_name);
-	if (mz)
-		return mz;
-
-#ifdef RTE_LIBRTE_XEN_DOM0
-	return rte_memzone_reserve_bounded(z_name, ring_size,
-		socket_id, 0, IXGBE_ALIGN, RTE_PGSIZE_2M);
-#else
-	return rte_memzone_reserve_aligned(z_name, ring_size,
-		socket_id, 0, IXGBE_ALIGN);
-#endif
-}
-
 static void
 ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
 {
@@ -1920,9 +1891,9 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	 * handle the maximum ring size is allocated in order to allow for
 	 * resizing in later calls to the queue setup function.
 	 */
-	tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
+	tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
 			sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
-			socket_id);
+			IXGBE_ALIGN, socket_id);
 	if (tz == NULL) {
 		ixgbe_tx_queue_release(txq);
 		return (-ENOMEM);
@@ -1950,11 +1921,14 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 		txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
 	else
 		txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
-#ifndef	RTE_LIBRTE_XEN_DOM0
-	txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
-#else
-	txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+	if (is_xen_dom0_supported())
+		txq->tx_ring_phys_addr =
+			rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+	else
 #endif
+		txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
 	txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
 
 	/* Allocate software ring */
@@ -2195,8 +2169,8 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 * handle the maximum ring size is allocated in order to allow for
 	 * resizing in later calls to the queue setup function.
 	 */
-	rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
-				   RX_RING_SZ, socket_id);
+	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
+				      RX_RING_SZ, IXGBE_ALIGN, socket_id);
 	if (rz == NULL) {
 		ixgbe_rx_queue_release(rxq);
 		return (-ENOMEM);
@@ -2223,11 +2197,13 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		rxq->rdh_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
 	}
-#ifndef RTE_LIBRTE_XEN_DOM0
-	rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
-#else
-	rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+#ifdef RTE_LIBRTE_XEN_DOM0
+	if (is_xen_dom0_supported())
+		rxq->rx_ring_phys_addr =
+			rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+	else
 #endif
+		rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
 	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
 
 	/*
-- 
2.1.4

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-dev] [PATCH 2/4] xen: add phys-addr command line argument
  2015-02-14 18:06 [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime Stephen Hemminger
@ 2015-02-14 18:06 ` Stephen Hemminger
  2015-02-14 18:06 ` [dpdk-dev] [PATCH 3/4] xen: add uio driver Stephen Hemminger
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Stephen Hemminger @ 2015-02-14 18:06 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Allow overriding default Xen DOM0 behavior to
use physical addresses insted of mfn.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/librte_eal/common/eal_common_options.c |  5 +++++
 lib/librte_eal/common/eal_internal_cfg.h   |  1 +
 lib/librte_eal/common/eal_options.h        |  2 ++
 lib/librte_eal/common/include/rte_memory.h |  3 +++
 lib/librte_eal/linuxapp/eal/eal_memory.c   |  5 +++++
 lib/librte_mempool/rte_dom0_mempool.c      | 10 ++++++++--
 6 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 67e02dc..1742364 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -83,6 +83,7 @@ eal_long_options[] = {
 	{OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM},
 	{OPT_BASE_VIRTADDR, 1, 0, OPT_BASE_VIRTADDR_NUM},
 	{OPT_XEN_DOM0, 0, 0, OPT_XEN_DOM0_NUM},
+	{OPT_XEN_PHYS_ADDR, 0, 0, OPT_XEN_PHYS_ADDR_NUM},
 	{OPT_CREATE_UIO_DEV, 1, NULL, OPT_CREATE_UIO_DEV_NUM},
 	{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM},
 	{0, 0, 0, 0}
@@ -491,6 +492,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		}
 		conf->log_level = log;
 		break;
+
+	case OPT_XEN_PHYS_ADDR_NUM:
+		conf->xen_phys_addr_support = 1;
+		break;
 	}
 
 	/* don't know what to do, leave this to caller */
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index e2ecb0d..41b4169 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -65,6 +65,7 @@ struct internal_config {
 	volatile unsigned force_nrank;    /**< force number of ranks */
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
 	volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
+	volatile unsigned xen_phys_addr_support; /**< support phys addr */
 	volatile unsigned no_pci;         /**< true to disable PCI */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index e476f8d..8aee959 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -73,6 +73,8 @@ enum {
 	OPT_BASE_VIRTADDR_NUM,
 #define OPT_XEN_DOM0    "xen-dom0"
 	OPT_XEN_DOM0_NUM,
+#define OPT_XEN_PHYS_ADDR "xen-phys-addr"
+	OPT_XEN_PHYS_ADDR_NUM,
 #define OPT_CREATE_UIO_DEV "create-uio-dev"
 	OPT_CREATE_UIO_DEV_NUM,
 #define OPT_VFIO_INTR    "vfio-intr"
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index ab6c1ff..c3b8a98 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -180,6 +180,9 @@ unsigned rte_memory_get_nrank(void);
 /**< Internal use only - should DOM0 memory mapping be used */
 extern int is_xen_dom0_supported(void);
 
+/**< Internal use only - should DOM0 use physical addresses insted of mfn */
+extern int is_xen_phys_addr_supported(void);
+
 /**
  * Return the physical address of elt, which is an element of the pool mp.
  *
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 4afda2a..a759ac9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -103,6 +103,11 @@ int is_xen_dom0_supported(void)
 {
 	return internal_config.xen_dom0_support;
 }
+
+int is_xen_phys_addr_supported(void)
+{
+	return internal_config.xen_phys_addr_support;
+}
 #endif
 
 /**
diff --git a/lib/librte_mempool/rte_dom0_mempool.c b/lib/librte_mempool/rte_dom0_mempool.c
index 9ec68fb..ab35826 100644
--- a/lib/librte_mempool/rte_dom0_mempool.c
+++ b/lib/librte_mempool/rte_dom0_mempool.c
@@ -74,8 +74,14 @@ get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num,
     virt_addr =(uintptr_t) mcfg->memseg[memseg_id].addr;
 
     for (i = 0; i != pg_num; i++) {
-        mfn_id = ((uintptr_t)va + i * pg_sz - virt_addr) / RTE_PGSIZE_2M;
-        pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
+	if (!is_xen_phys_addr_supported()) {
+		mfn_id = ((uintptr_t)va + i * pg_sz -
+				virt_addr) / RTE_PGSIZE_2M;
+		pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
+	} else {
+		pa[i] = mcfg->memseg[memseg_id].phys_addr + i * pg_sz +
+			(uintptr_t)va - virt_addr;
+	}
     }
 }
 
-- 
2.1.4

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-dev] [PATCH 3/4] xen: add uio driver
  2015-02-14 18:06 [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime Stephen Hemminger
  2015-02-14 18:06 ` [dpdk-dev] [PATCH 2/4] xen: add phys-addr command line argument Stephen Hemminger
@ 2015-02-14 18:06 ` Stephen Hemminger
  2015-02-14 18:06 ` [dpdk-dev] [PATCH 4/4] xen: net-front poll mode driver Stephen Hemminger
  2015-02-14 19:25 ` [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime Neil Horman
  3 siblings, 0 replies; 6+ messages in thread
From: Stephen Hemminger @ 2015-02-14 18:06 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

New uio helper kernel driver for use by Xen netfront UIO poll mode driver.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/librte_eal/linuxapp/Makefile          |   3 +
 lib/librte_eal/linuxapp/xen_uio/Makefile  |  55 ++
 lib/librte_eal/linuxapp/xen_uio/xen_uio.c | 837 ++++++++++++++++++++++++++++++
 3 files changed, 895 insertions(+)
 create mode 100644 lib/librte_eal/linuxapp/xen_uio/Makefile
 create mode 100644 lib/librte_eal/linuxapp/xen_uio/xen_uio.c

diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 8fcfdf6..d3893e5 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -41,5 +41,8 @@ endif
 ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
 DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_dom0
 endif
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_PMD),y)
+DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_uio
+endif
 
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/xen_uio/Makefile b/lib/librte_eal/linuxapp/xen_uio/Makefile
new file mode 100644
index 0000000..25a9f35
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_uio/Makefile
@@ -0,0 +1,55 @@
+#   BSD LICENSE
+#
+#   Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = xen_uio
+MODULE_PATH = drivers/net/xen_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := xen_uio.c
+
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/xen_uio/xen_uio.c b/lib/librte_eal/linuxapp/xen_uio/xen_uio.c
new file mode 100644
index 0000000..b25b1f3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_uio/xen_uio.c
@@ -0,0 +1,837 @@
+/*
+ * Virtual network driver for conversing with remote driver backends.
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ * Copyright (c) 2005, XenSource Ltd
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/if_ether.h>
+#include <linux/proc_fs.h>
+
+#include <xen/xenbus.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+#include <xen/interface/io/netif.h>
+#include <xen/platform_pci.h>
+
+#include <xen/events.h>
+#include <xen/evtchn.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <linux/uio_driver.h>
+
+#include "../../../librte_pmd_xen/xen_adapter_info.h"
+
+#define GRANT_INVALID_REF 0
+
+#define NET_TX_RING_SIZE \
+	__CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
+#define NET_RX_RING_SIZE \
+	__CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
+
+#define TX_MAX_TARGET \
+	min_t(int, NET_RX_RING_SIZE, 256)
+#define RX_MAX_TARGET \
+	min_t(int, NET_RX_RING_SIZE, 256)
+
+#define RXTX_GREFS (TX_MAX_TARGET + RX_MAX_TARGET)
+
+#define DOMAIN_PROC "xen/domain"
+struct proc_dir_entry *domain_proc;
+char domain_name[9];
+size_t domain_len = sizeof(domain_name);
+static const char *domains[] = { "native", "pv", "hvm", "unknown" };
+
+struct netfront_info *xennet_alloc_resources(struct xenbus_device *xbdev);
+static void xennet_free_resources(struct xenbus_device *xbdev);
+static int xennet_connect_backend(struct netfront_info *info);
+static void xennet_disconnect_backend(struct netfront_info *info,
+		int deffered_free);
+
+/* some helpers */
+static int __gnttab_version(void)
+{
+	int err;
+	struct gnttab_get_version ggv;
+
+	ggv.dom = DOMID_SELF;
+
+	err = HYPERVISOR_grant_table_op(GNTTABOP_get_version, &ggv, 1);
+	if (err >= 0)
+		return (int)ggv.version;
+
+	return err;
+}
+
+static void xennet_end_access(int ref, void *page)
+{
+	/* This frees the page as a side-effect */
+	if (ref != GRANT_INVALID_REF)
+		gnttab_end_foreign_access(ref, 0, (unsigned long)page);
+}
+
+static int xen_net_read_mac(struct xenbus_device *xbdev, u8 *mac)
+{
+	char *macstr;
+	int ret = 0;
+
+	macstr = xenbus_read(XBT_NIL, xbdev->nodename, "mac", NULL);
+	if (IS_ERR(macstr))
+		return PTR_ERR(macstr);
+
+	pr_info("mac addr: %s\n", macstr);
+
+	if (sscanf(macstr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &mac[0], &mac[1],
+			&mac[2], &mac[3], &mac[4], &mac[5])  != ETH_ALEN) {
+		pr_warn("can't parse mac address\n");
+		ret = -ENOENT;
+	}
+
+	kfree(macstr);
+	return ret;
+}
+
+struct xen_uio_dev {
+	struct uio_info info;
+};
+
+struct netfront_info {
+	struct xenbus_device *xbdev;
+
+	int tx_ring_ref;
+	struct xen_netif_tx_front_ring tx;
+
+	int rx_ring_ref;
+	struct xen_netif_rx_front_ring rx;
+
+	struct xen_netif_tx_sring *txs;
+	struct xen_netif_rx_sring *rxs;
+
+	grant_ref_t gref_rxtx_head;
+
+	struct xen_uio_dev *xen_udev;
+
+	struct xen_adapter_info *shared_info_page;
+};
+
+static int xennet_uio_init(struct xenbus_device *xbdev,
+		struct netfront_info *info)
+{
+	int err;
+	struct xen_uio_dev *udev;
+
+	udev = kzalloc(sizeof(struct xen_uio_dev), GFP_KERNEL);
+	if (!udev)
+		return -ENOMEM;
+
+	info->xen_udev = udev;
+
+	/* fill uio infos */
+	udev->info.name = "xen_uio";
+	udev->info.version = "0.1";
+	udev->info.irq = UIO_IRQ_NONE;
+	udev->info.irq_flags = 0;
+
+	/*share all working info here*/
+	udev->info.mem[INFO_MAP].name = "xennet info page";
+	udev->info.mem[INFO_MAP].memtype = UIO_MEM_LOGICAL;
+	udev->info.mem[INFO_MAP].addr = (phys_addr_t)info->shared_info_page;
+	udev->info.mem[INFO_MAP].size = PAGE_SIZE;
+
+	udev->info.mem[RX_RING_MAP].name = "xennet front rx ring";
+	udev->info.mem[RX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+	udev->info.mem[RX_RING_MAP].addr = (phys_addr_t)info->rxs;
+	udev->info.mem[RX_RING_MAP].size = PAGE_SIZE;
+
+	udev->info.mem[TX_RING_MAP].name = "xennet front tx ring";
+	udev->info.mem[TX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+	udev->info.mem[TX_RING_MAP].addr = (phys_addr_t)info->txs;
+	udev->info.mem[TX_RING_MAP].size = PAGE_SIZE;
+
+	err = uio_register_device(&xbdev->dev, &info->xen_udev->info);
+	if (err) {
+		pr_err("uio register failed: %d\n", err);
+		kfree(info->xen_udev);
+		info->xen_udev = NULL;
+	} else {
+		pr_info("uio device registered with irq %lx\n",
+				info->xen_udev->info.irq);
+	}
+
+	return err;
+}
+
+
+static void xennet_uio_uninit(struct netfront_info *info)
+{
+	if (info->xen_udev)
+		uio_unregister_device(&info->xen_udev->info);
+	info->xen_udev = NULL;
+}
+
+struct netfront_info *xennet_alloc_resources(struct xenbus_device *xbdev)
+{
+	int ret;
+	uint16_t i;
+	int gref = 0;
+	grant_ref_t gref_rxtx_head;
+
+	struct netfront_info *info =
+		kzalloc(sizeof(struct netfront_info), GFP_KERNEL);
+	if (NULL == info)
+		goto exit;
+
+	info->gref_rxtx_head = GRANT_INVALID_REF;
+	info->xbdev = xbdev;
+
+	/* allocate place for tx ring */
+	info->txs = (struct xen_netif_tx_sring *)get_zeroed_page(
+			GFP_NOIO | __GFP_HIGH);
+	if (!info->txs) {
+		ret = -ENOMEM;
+		xenbus_dev_fatal(xbdev, ret, "allocating tx ring page");
+		goto exit;
+	}
+
+	/* allocate place for rx ring */
+	info->rxs = (struct xen_netif_rx_sring *)get_zeroed_page(
+			GFP_NOIO | __GFP_HIGH);
+	if (!info->rxs) {
+		ret = -ENOMEM;
+		xenbus_dev_fatal(xbdev, ret, "allocating rx ring page");
+		goto exit;
+	}
+
+	/* allocate shared with user page (info page) */
+	info->shared_info_page =
+		(struct xen_adapter_info *)__get_free_page(GFP_KERNEL);
+	if (NULL == info->shared_info_page) {
+		pr_alert("xen_uio can't alloc shared page\n");
+		goto exit;
+	}
+
+	/* just assertion */
+	if (((char *)&info->shared_info_page->rxtx_grefs[RXTX_GREFS - 1])
+			- ((char *)info->shared_info_page) > PAGE_SIZE) {
+		pr_err("ASSERT: no mem for grefs\n");
+		goto exit;
+	}
+
+	/* allocate grefs for every tx ring and rx ring slot */
+	ret = gnttab_alloc_grant_references(RXTX_GREFS, &info->gref_rxtx_head);
+	if (ret < 0) {
+		pr_err("xen_uio can't alloc rx and tx grefs\n");
+		goto exit;
+	}
+
+	/* fill in all grefs*/
+	gref_rxtx_head = info->gref_rxtx_head;
+	info->shared_info_page->rx_grefs_count = RX_MAX_TARGET;
+	info->shared_info_page->tx_grefs_count = TX_MAX_TARGET;
+	info->shared_info_page->rx_evtchn = 0;
+	info->shared_info_page->tx_evtchn = 0;
+
+	/*go through the list and collect put all grefs to array*/
+	for (i = 0; i < (RXTX_GREFS); i++) {
+		gref = gnttab_claim_grant_reference(&gref_rxtx_head);
+		if (gref < 0) {
+			pr_err("not expected end of list\n");
+			goto exit;
+		}
+		info->shared_info_page->rxtx_grefs[i] = (grant_ref_t)gref;
+	}
+
+	/*setup shared_info_page*/
+	info->shared_info_page->rx_ring = &info->rx;
+	info->shared_info_page->tx_ring = &info->tx;
+	/*it's not secure - we need here something else*/
+	info->shared_info_page->info = info;
+
+	info->shared_info_page->is_connected = 0;
+	info->shared_info_page->disconnect_count = 0;
+
+	/* share struct by UIO */
+	ret = xennet_uio_init(xbdev, info);
+	if (ret) {
+		pr_err("xennet_uio_init failed\n");
+		goto exit;
+	}
+
+	return info;
+exit:
+	if (info) {
+		if (info->gref_rxtx_head != GRANT_INVALID_REF)
+			gnttab_free_grant_references(info->gref_rxtx_head);
+		if (info->shared_info_page)
+			free_page((unsigned long)info->shared_info_page);
+		if (info->rxs)
+			free_page((unsigned long)info->rxs);
+		if (info->txs)
+			free_page((unsigned long)info->txs);
+		kfree(info);
+	}
+	return NULL;
+}
+
+void xennet_free_resources(struct xenbus_device *xbdev)
+{
+	struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+	xennet_uio_uninit(info);
+
+	gnttab_free_grant_references(info->gref_rxtx_head);
+
+	free_page((unsigned long)info->shared_info_page);
+	/*can be deferred free- in that case these pointers are NULL*/
+	if (info->rxs)
+		free_page((unsigned long)info->rxs);
+	if (info->txs)
+		free_page((unsigned long)info->txs);
+
+	kfree(info);
+}
+
+static int setup_netfront(struct xenbus_device *xbdev,
+		struct netfront_info *info)
+{
+	unsigned int feature_split_evtchn;
+	int err;
+
+	info->tx_ring_ref = GRANT_INVALID_REF;
+	info->rx_ring_ref = GRANT_INVALID_REF;
+	info->rx.sring = NULL;
+	info->tx.sring = NULL;
+
+	/* share otherend_id with user */
+	info->shared_info_page->otherend_id = xbdev->otherend_id;
+
+	err = xenbus_scanf(XBT_NIL, xbdev->otherend,
+			"feature-split-event-channels", "%u",
+			&feature_split_evtchn);
+	if (err < 0)
+		feature_split_evtchn = 0;
+
+	/* read mac */
+	err = xen_net_read_mac(xbdev, info->shared_info_page->mac);
+	if (err) {
+		xenbus_dev_fatal(xbdev, err, "parsing %s/mac",
+				xbdev->nodename);
+		goto fail;
+	}
+
+	/* set up queues */
+	SHARED_RING_INIT(info->txs);
+	FRONT_RING_INIT(&info->tx, info->txs, PAGE_SIZE);
+
+	SHARED_RING_INIT(info->rxs);
+	FRONT_RING_INIT(&info->rx, info->rxs, PAGE_SIZE);
+
+	err = xenbus_grant_ring(info->xbdev, virt_to_mfn(info->txs));
+	if (err < 0) {
+		pr_err("xenbus_grant_ring for txs failed!\n");
+		goto fail;
+	}
+	info->tx_ring_ref = err;
+
+	err = xenbus_grant_ring(info->xbdev, virt_to_mfn(info->rxs));
+	if (err < 0) {
+		pr_err("xenbus_grant_ring for rxs failed!\n");
+		goto fail;
+	}
+	info->rx_ring_ref = err;
+
+	/* alloc eventchn */
+	pr_info("feature_split_evtchn: %d\n",
+			(int)feature_split_evtchn);
+
+	err = xenbus_alloc_evtchn(xbdev, &info->shared_info_page->tx_evtchn);
+	if (err)
+		goto fail;
+
+	if (feature_split_evtchn) {
+		err = xenbus_alloc_evtchn(xbdev,
+				&info->shared_info_page->rx_evtchn);
+		if (err)
+			goto fail_split;
+	} else {
+		info->shared_info_page->rx_evtchn =
+			info->shared_info_page->tx_evtchn;
+	}
+
+	return 0;
+fail_split:
+	xenbus_free_evtchn(info->xbdev, info->shared_info_page->tx_evtchn);
+fail:
+	pr_err("setup_netfront failed\n");
+	return err;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_netback(struct xenbus_device *xbdev,
+		struct netfront_info *info)
+{
+	const char *message;
+	struct xenbus_transaction xbt;
+	int err;
+
+	/* Create shared ring, alloc event channel. */
+	err = setup_netfront(xbdev, info);
+	if (err)
+		goto out;
+
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(xbdev, err, "starting transaction");
+		goto destroy_ring;
+	}
+
+	err = xenbus_printf(xbt, xbdev->nodename, "tx-ring-ref",
+			"%u", info->tx_ring_ref);
+	if (err) {
+		message = "writing tx ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, xbdev->nodename, "rx-ring-ref",
+			"%u", info->rx_ring_ref);
+	if (err) {
+		message = "writing rx ring-ref";
+		goto abort_transaction;
+	}
+
+	if (info->shared_info_page->tx_evtchn ==
+			info->shared_info_page->rx_evtchn) {
+		err = xenbus_printf(xbt, xbdev->nodename, "event-channel",
+				"%u", info->shared_info_page->tx_evtchn);
+		if (err) {
+			message = "writing event-channel";
+			goto abort_transaction;
+		}
+	} else {
+		err = xenbus_printf(xbt, xbdev->nodename, "event-channel-tx",
+				"%u", info->shared_info_page->tx_evtchn);
+		if (err) {
+			message = "writing event-channel";
+			goto abort_transaction;
+		}
+		err = xenbus_printf(xbt, xbdev->nodename, "event-channel-rx",
+				"%u", info->shared_info_page->rx_evtchn);
+		if (err) {
+			message = "writing event-channel";
+			goto abort_transaction;
+		}
+	}
+
+	err = xenbus_printf(xbt, xbdev->nodename, "request-rx-copy", "%u", 1);
+	if (err) {
+		message = "writing request-rx-copy";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, xbdev->nodename, "feature-rx-notify",
+			"%d", 1);
+	if (err) {
+		message = "writing feature-rx-notify";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, xbdev->nodename, "feature-sg", "%d", 1);
+	if (err) {
+		message = "writing feature-sg";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, xbdev->nodename, "feature-gso-tcpv4",
+			"%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv4";
+		goto abort_transaction;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err) {
+		if (err == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(xbdev, err, "completing transaction");
+		goto destroy_ring;
+	}
+
+	return 0;
+abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(xbdev, err, "%s", message);
+destroy_ring:
+	xennet_disconnect_backend(info, 1);
+out:
+	pr_err("talk_to_netback failed\n");
+	return err;
+}
+
+static int xennet_connect_backend(struct netfront_info *info)
+{
+	int err;
+	unsigned int feature_rx_copy;
+
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, "feature-rx-copy",
+			"%u", &feature_rx_copy);
+	if (err != 1)
+		feature_rx_copy = 0;
+
+	if (!feature_rx_copy) {
+		pr_info("backend does not support copying receive path\n");
+		return -ENODEV;
+	}
+
+	err = talk_to_netback(info->xbdev, info);
+	if (err)
+		pr_err("talk_to_netback failed!\n");
+
+	info->shared_info_page->is_connected = 1;
+
+	return err;
+}
+
+static void xennet_disconnect_backend(struct netfront_info *info,
+		int deffered_free)
+{
+	if (info->shared_info_page->tx_evtchn !=
+			info->shared_info_page->rx_evtchn) {
+		xenbus_free_evtchn(info->xbdev,
+				info->shared_info_page->rx_evtchn);
+	}
+	xenbus_free_evtchn(info->xbdev, info->shared_info_page->tx_evtchn);
+
+	if (deffered_free) {
+		xennet_end_access(info->tx_ring_ref, info->txs);
+		xennet_end_access(info->rx_ring_ref, info->rxs);
+		info->txs = NULL;
+		info->rxs = NULL;
+	} else {
+		xennet_end_access(info->tx_ring_ref, NULL);
+		xennet_end_access(info->rx_ring_ref, NULL);
+	}
+
+	info->tx_ring_ref = GRANT_INVALID_REF;
+	info->rx_ring_ref = GRANT_INVALID_REF;
+	info->rx.sring = NULL;
+	info->tx.sring = NULL;
+
+	info->shared_info_page->is_connected = 0;
+	info->shared_info_page->disconnect_count++;
+}
+
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those.
+ */
+static int xennet_probe(struct xenbus_device *xbdev,
+		const struct xenbus_device_id *id)
+{
+	struct netfront_info *info;
+
+	info = xennet_alloc_resources(xbdev);
+
+	dev_set_drvdata(&xbdev->dev, info);
+
+	return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int xennet_resume(struct xenbus_device *xbdev)
+{
+	struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+	pr_devel("%s\n", xbdev->nodename);
+
+	/*we can use the same memory region - disable deffered free*/
+	xennet_disconnect_backend(info, 0);
+
+	return 0;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void netback_changed(struct xenbus_device *xbdev,
+		enum xenbus_state backend_state)
+{
+	struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+	pr_devel("%s\n", xenbus_strstate(backend_state));
+
+	switch (backend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
+		break;
+	case XenbusStateUnknown:
+		break;
+
+	case XenbusStateInitWait:
+		if (xbdev->state != XenbusStateInitialising)
+			break;
+		if (xennet_connect_backend(info) != 0) {
+			pr_err("%s\n", xbdev->nodename);
+			break;
+		}
+		xenbus_switch_state(xbdev, XenbusStateConnected);
+		break;
+
+	case XenbusStateConnected:
+		break;
+
+	case XenbusStateClosed:
+		if (xbdev->state == XenbusStateClosed) {
+			xenbus_switch_state(xbdev, XenbusStateInitialising);
+			break;
+		}
+
+	case XenbusStateClosing:
+		xenbus_frontend_closed(xbdev);
+		break;
+	}
+}
+
+static const struct xenbus_device_id netfront_ids[] = {
+	{ "vif" },
+	{ "" }
+};
+
+static int xennet_remove(struct xenbus_device *xbdev)
+{
+	struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+	pr_devel("%s\n", xbdev->nodename);
+
+	xennet_disconnect_backend(info, 1);
+
+	xennet_free_resources(xbdev);
+
+	return 0;
+}
+
+static struct xenbus_driver xenuio_driver = {
+	.ids  = netfront_ids,
+	.probe = xennet_probe,
+	.remove = xennet_remove,
+	.resume = xennet_resume,
+	.otherend_changed = netback_changed,
+};
+
+/*operations that we can't do through the shared memory*/
+static long xennet_ioctl(struct file *file,
+		unsigned int cmd, unsigned long arg) {
+	int rc;
+	void __user *uarg = (void __user *) arg;
+
+	switch (cmd) {
+	case IOCTL_EVTCHN_NOTIFY:
+		{
+			struct ioctl_evtchn_notify notify;
+
+			rc = -EFAULT;
+			if (copy_from_user(&notify, uarg, sizeof(notify)))
+				break;
+			notify_remote_via_evtchn(notify.port);
+			rc = 0;
+		}
+		break;
+	case IOCTL_EVTCHN_NOTIFY_GRANT:
+		{
+			uint16_t i;
+			int notify;
+			struct ioctl_evtchn_notify_grant *ng;
+
+			rc = -EFAULT;
+
+			if (access_ok(VERIFY_READ, uarg, sizeof(ng)))
+				ng = uarg;
+			else
+				break;
+
+			for (i = 0; i < ng->rel_count; i++) {
+				gnttab_end_foreign_access_ref(ng->rel_gref[i],
+						0);
+			}
+
+			if (ng->count) {
+				union {
+					struct xen_netif_rx_front_ring *rx;
+					struct xen_netif_tx_front_ring *tx;
+				} ring;
+
+				for (i = 0; i < ng->count; i++) {
+					gnttab_grant_foreign_access_ref(
+						ng->s[i].gref,
+						ng->otherend_id,
+						pfn_to_mfn(ng->s[i].paddr),
+						(!ng->is_rx));
+				}
+
+				if (ng->is_rx) {
+					ring.rx = ng->u.rx_ring;
+					if (&ng->info->rx != ring.rx) {
+						pr_err(
+						"bad info or rx ring addr\n");
+						return -(ENOSYS);
+					}
+					ring.rx->req_prod_pvt += ng->count;
+					RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(
+							ring.rx, notify);
+				} else {
+					ring.tx = ng->u.tx_ring;
+					if (&ng->info->tx != ring.tx) {
+						pr_err(
+						"bad info or tx ring addr\n");
+						return -(ENOSYS);
+					}
+					ring.tx->req_prod_pvt += ng->count;
+					RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(
+							ring.tx, notify);
+				}
+
+				if (notify)
+					notify_remote_via_evtchn(ng->port);
+			}
+
+			rc = 0;
+		}
+		break;
+	default:
+		rc = -ENOSYS;
+		break;
+	}
+	return rc;
+}
+
+static const struct file_operations xennet_fops = {
+	.owner   = THIS_MODULE,
+	.read    = NULL/*xennet_read*/,
+	.write   = NULL/*xennet_write*/,
+	.unlocked_ioctl = xennet_ioctl,
+	.poll    = NULL/*xennet_poll*/,
+	.fasync  = NULL/*xennet_fasync*/,
+	.open    = NULL/*xennet_open*/,
+	.mmap    = NULL/*xennet_mmap*/,
+	.release = NULL/*xennet_release*/,
+	.llseek  = no_llseek,
+};
+
+static struct miscdevice xennet_miscdev = {
+	.minor        = MISC_DYNAMIC_MINOR,
+	.name         = XEN_PMD_UIO_NAME,
+	.fops         = &xennet_fops,
+};
+
+static ssize_t read_domain(struct file *f, char __user *buf,
+		size_t count, loff_t *off)
+{
+	if (count > domain_len)
+		count = domain_len;
+
+	if (copy_to_user(buf, domain_name, count))
+		return -EFAULT;
+
+	domain_len = (count ? domain_len - count : sizeof(domain_name));
+
+	return count;
+}
+
+static const struct file_operations domain_fops = {
+	.owner = THIS_MODULE,
+	.read = read_domain,
+};
+
+static int __init netif_init(void)
+{
+	int err;
+
+	if (!xen_domain()) {
+		pr_err(KERN_INFO "xen bare hw\n");
+		return -ENODEV;
+	}
+
+	pr_info("xen %s domain\n", domains[xen_domain_type]);
+
+	snprintf(domain_name, sizeof(domain_name),
+			"%s\n", domains[xen_domain_type]);
+
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		pr_info("feature auto_translated_physmap is disabled\n");
+
+	pr_info("gnttab version: %d\n", (int)__gnttab_version());
+
+	domain_proc = proc_create(DOMAIN_PROC, S_IRUGO, NULL, &domain_fops);
+	if (domain_proc == NULL) {
+		pr_err("could not create /proc/%s\n", DOMAIN_PROC);
+		return -ENOMEM;
+	}
+
+	pr_info("/proc/%s created\n", DOMAIN_PROC);
+
+	err = misc_register(&xennet_miscdev);
+	if (err != 0) {
+		pr_err("could not register char device\n");
+		return err;
+	}
+
+	pr_info("initialising xen virtual ethernet driver\n");
+
+	err = xenbus_register_frontend(&xenuio_driver);
+
+	return err;
+}
+module_init(netif_init);
+
+static void __exit netif_exit(void)
+{
+	remove_proc_entry(DOMAIN_PROC, NULL);
+
+	xenbus_unregister_driver(&xenuio_driver);
+
+	misc_deregister(&xennet_miscdev);
+}
+module_exit(netif_exit);
+
+MODULE_DESCRIPTION("Xen virtual network device frontend");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:vif");
+MODULE_ALIAS("xennet");
-- 
2.1.4

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-dev] [PATCH 4/4] xen: net-front poll mode driver
  2015-02-14 18:06 [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime Stephen Hemminger
  2015-02-14 18:06 ` [dpdk-dev] [PATCH 2/4] xen: add phys-addr command line argument Stephen Hemminger
  2015-02-14 18:06 ` [dpdk-dev] [PATCH 3/4] xen: add uio driver Stephen Hemminger
@ 2015-02-14 18:06 ` Stephen Hemminger
  2015-02-14 19:31   ` Neil Horman
  2015-02-14 19:25 ` [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime Neil Horman
  3 siblings, 1 reply; 6+ messages in thread
From: Stephen Hemminger @ 2015-02-14 18:06 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

This driver implements DPDK driver that has the same functionality
as net-front driver in Linux kernel.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 config/common_linuxapp                |   6 +
 lib/Makefile                          |   1 +
 lib/librte_eal/common/eal_private.h   |   7 +
 lib/librte_eal/linuxapp/eal/eal.c     |   8 +
 lib/librte_pmd_xen/Makefile           |  30 ++
 lib/librte_pmd_xen/virt_dev.c         | 400 +++++++++++++++++++++++++
 lib/librte_pmd_xen/virt_dev.h         |  30 ++
 lib/librte_pmd_xen/xen_adapter_info.h |  64 ++++
 lib/librte_pmd_xen/xen_dev.c          | 369 +++++++++++++++++++++++
 lib/librte_pmd_xen/xen_dev.h          |  96 ++++++
 lib/librte_pmd_xen/xen_logs.h         |  23 ++
 lib/librte_pmd_xen/xen_rxtx.c         | 546 ++++++++++++++++++++++++++++++++++
 lib/librte_pmd_xen/xen_rxtx.h         | 110 +++++++
 mk/rte.app.mk                         |   4 +
 14 files changed, 1694 insertions(+)
 create mode 100644 lib/librte_pmd_xen/Makefile
 create mode 100644 lib/librte_pmd_xen/virt_dev.c
 create mode 100644 lib/librte_pmd_xen/virt_dev.h
 create mode 100644 lib/librte_pmd_xen/xen_adapter_info.h
 create mode 100644 lib/librte_pmd_xen/xen_dev.c
 create mode 100644 lib/librte_pmd_xen/xen_dev.h
 create mode 100644 lib/librte_pmd_xen/xen_logs.h
 create mode 100644 lib/librte_pmd_xen/xen_rxtx.c
 create mode 100644 lib/librte_pmd_xen/xen_rxtx.h

diff --git a/config/common_linuxapp b/config/common_linuxapp
index d428f84..668fc8d 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -232,6 +232,12 @@ CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
 
 #
+# Compile XEN net-front PMD driver
+#
+CONFIG_RTE_LIBRTE_XEN_PMD=n
+CONFIG_RTE_LIBRTE_XEN_DEBUG_INIT=n
+
+#
 # Do prefetch of packet data within PMD driver receive function
 #
 CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/lib/Makefile b/lib/Makefile
index d617d81..f405e40 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -52,6 +52,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += librte_pmd_af_packet
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += librte_pmd_xen
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 159cd66..0614607 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -128,6 +128,13 @@ int rte_eal_log_init(const char *id, int facility);
  */
 int rte_eal_pci_init(void);
 
+#ifdef RTE_LIBRTE_XEN_PMD
+/**
+ * Init of the xen driver
+ */
+extern int rte_xen_pmd_init(void);
+#endif
+
 #ifdef RTE_LIBRTE_IVSHMEM
 /**
  * Init the memory from IVSHMEM devices
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index f99e158..4e60b7c 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -760,6 +760,14 @@ rte_eal_init(int argc, char **argv)
 		rte_panic("Cannot init IVSHMEM\n");
 #endif
 
+#ifdef RTE_LIBRTE_XEN_PMD
+	ret = rte_xen_pmd_init();
+	if (ret != 0) {
+		RTE_LOG(ERR, PMD, "Cannot init xen PMD\n");
+		return ret;
+	}
+#endif /* RTE_LIBRTE_XEN_PMD */
+
 	if (rte_eal_memory_init() < 0)
 		rte_panic("Cannot init memory\n");
 
diff --git a/lib/librte_pmd_xen/Makefile b/lib/librte_pmd_xen/Makefile
new file mode 100644
index 0000000..d294d03
--- /dev/null
+++ b/lib/librte_pmd_xen/Makefile
@@ -0,0 +1,30 @@
+#
+#   Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+#   All rights reserved.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_xen.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+VPATH += $(RTE_SDK)/lib/librte_pmd_xen
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += virt_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += xen_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += xen_rxtx.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += lib/librte_net lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_xen/virt_dev.c b/lib/librte_pmd_xen/virt_dev.c
new file mode 100644
index 0000000..f824977
--- /dev/null
+++ b/lib/librte_pmd_xen/virt_dev.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "virt_dev.h"
+
+struct uio_map {
+	void *addr;
+	uint64_t offset;
+	uint64_t size;
+	uint64_t phaddr;
+};
+
+struct uio_resource {
+	TAILQ_ENTRY(uio_resource) next;
+	struct rte_pci_addr pci_addr;
+	char path[PATH_MAX];
+	size_t nb_maps;
+	struct uio_map maps[PCI_MAX_RESOURCE];
+};
+
+static int
+virt_parse_sysfs_value(const char *filename, uint64_t *val)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "cannot open sysfs value %s", filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "cannot read sysfs value %s", filename);
+		fclose(f);
+		return -1;
+	}
+
+	*val = strtoull(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		RTE_LOG(ERR, EAL, "cannot parse sysfs value %s", filename);
+		fclose(f);
+		return -1;
+	}
+
+	fclose(f);
+	return 0;
+}
+
+#define OFF_MAX ((uint64_t)(off_t)-1)
+static ssize_t
+virt_uio_get_mappings(const char *devname, struct uio_map maps[],
+		      size_t nb_maps)
+{
+	size_t i;
+	char dirname[PATH_MAX];
+	char filename[PATH_MAX];
+	uint64_t offset, size;
+
+	for (i = 0; i != nb_maps; i++) {
+
+		snprintf(dirname, sizeof(dirname),
+				"%s/maps/map%zu", devname, i);
+
+		if (access(dirname, F_OK) != 0)
+			break;
+
+		snprintf(filename, sizeof(filename), "%s/offset", dirname);
+		if (virt_parse_sysfs_value(filename, &offset) < 0) {
+			RTE_LOG(ERR, EAL, "cannot parse offset of %s",
+					dirname);
+			return -1;
+		}
+
+		snprintf(filename, sizeof(filename), "%s/size", dirname);
+		if (virt_parse_sysfs_value(filename, &size) < 0) {
+			RTE_LOG(ERR, EAL, "cannot parse size of %s", dirname);
+			return -1;
+		}
+
+		snprintf(filename, sizeof(filename), "%s/addr", dirname);
+		if (virt_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
+			RTE_LOG(ERR, EAL, "cannot parse addr of %s", dirname);
+			return -1;
+		}
+
+		if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+			RTE_LOG(ERR, EAL,
+					"offset/size exceed system max value");
+			return -1;
+		}
+
+		maps[i].offset = offset;
+		maps[i].size = size;
+	}
+
+	return i;
+}
+
+static void *
+virt_map_resource(void *requested_addr, const char *devname, off_t offset,
+		  size_t size)
+{
+	int fd;
+	void *mapaddr;
+
+	fd = open(devname, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s",
+				devname, strerror(errno));
+		return NULL;
+	}
+
+	mapaddr = mmap(0, size, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, offset);
+	if (mapaddr == MAP_FAILED || (requested_addr != NULL &&
+				mapaddr != requested_addr)) {
+		RTE_LOG(ERR, EAL,
+				"cannot mmap(%s(%d), %p, 0x%lx, 0x%lx): %s (%p)",
+				devname, fd, requested_addr,
+				(unsigned long)size, (unsigned long)offset,
+				strerror(errno), mapaddr);
+		close(fd);
+		return NULL;
+	}
+
+	RTE_LOG(DEBUG, EAL, "memory mapped at %p", mapaddr);
+
+	return mapaddr;
+}
+
+void
+virt_uio_unmap_addresses(void **addresses, size_t *lens, int max_addresses)
+{
+	int j;
+
+	for (j = 0; j < max_addresses; j++) {
+		if (addresses[j] && lens[j]) {
+			munmap(addresses[j], lens[j]);
+			RTE_LOG(DEBUG, EAL, "memory umnmapped %p %d",
+					addresses[j], (int)lens[j]);
+		}
+	}
+}
+
+int
+virt_uio_map_addresses(const char *dirname, void **addresses, size_t *lens,
+		       int max_addresses)
+{
+	int j;
+	DIR *dir;
+	struct dirent *e;
+	char dirname2[PATH_MAX];
+	char devname[PATH_MAX];
+	unsigned uio_num;
+	struct uio_resource *uio_res;
+	struct uio_map *maps;
+	uint64_t pagesz;
+	ssize_t nb_maps;
+	uint64_t offset;
+	void *mapaddr;
+
+	RTE_LOG(DEBUG, EAL, "dirname %s", dirname);
+
+	dir = opendir(dirname);
+
+	if (!dir) {
+		RTE_LOG(ERR, EAL, "Cannot opendir %s", dirname);
+		return -1;
+	}
+
+	while ((e = readdir(dir)) != NULL) {
+
+		int shortprefix_len = sizeof("uio") - 1;
+		char *endptr;
+
+		if (strncmp(e->d_name, "uio", 3) != 0)
+			continue;
+
+		errno = 0;
+		uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != e->d_name) {
+			snprintf(dirname2, sizeof(dirname2), "%s/uio%u",
+					dirname, uio_num);
+			break;
+		}
+	}
+	closedir(dir);
+
+	if (!e) {
+		RTE_LOG(ERR, EAL, "dirname %s not managed, skipping",
+				dirname);
+		return -1;
+	}
+
+	uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
+	if (uio_res == NULL) {
+		RTE_LOG(ERR, EAL, "cannot store uio mmap details");
+		return -1;
+	}
+
+	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+	snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+
+	nb_maps = virt_uio_get_mappings(dirname2, uio_res->maps,
+			sizeof(uio_res->maps) / sizeof(uio_res->maps[0]));
+	if (nb_maps < 0)
+		return nb_maps;
+
+	uio_res->nb_maps = nb_maps;
+	pagesz = sysconf(_SC_PAGESIZE);
+	maps = uio_res->maps;
+
+	for (j = 0; j < nb_maps && j < max_addresses; j++) {
+		offset = j * pagesz;
+		mapaddr = virt_map_resource(NULL, devname,
+				(off_t)offset, (size_t)maps[j].size);
+		if (maps[j].addr || !mapaddr)
+			return -1;
+		maps[j].addr = mapaddr;
+		maps[j].offset = offset;
+		addresses[j] = mapaddr;
+		lens[j] = (size_t)maps[j].size;
+	}
+
+	return 0;
+}
+
+static struct
+rte_eth_dev *virt_eth_dev_allocate(const char *name,
+				   struct eth_driver *eth_drv,
+				   unsigned dev_private_size)
+{
+	struct rte_eth_dev *eth_dev;
+
+	eth_dev = rte_eth_dev_allocate(name);
+	if (!eth_dev) {
+		RTE_LOG(ERR, EAL, "virt eth_dev allocation was failed (%d)",
+				ENOMEM);
+		return NULL;
+	}
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		eth_dev->data->dev_private =
+			rte_zmalloc("eth_dev private data structure",
+				    dev_private_size, RTE_CACHE_LINE_SIZE);
+		if (!eth_dev->data->dev_private)
+			rte_panic("virt eth_dev private data allocation was failed\n");
+	}
+
+	eth_dev->driver = eth_drv;
+	eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+	TAILQ_INIT(&(eth_dev->callbacks));
+
+	return eth_dev;
+}
+
+static int
+virt_eth_dev_init(const char *name,
+		      struct virt_eth_driver *virt_eth_drv,
+		      const char *dirname)
+{
+	int err = -ENOMEM;
+	struct rte_eth_dev *eth_dev;
+	struct eth_driver *eth_drv = &virt_eth_drv->eth_driver;
+	struct rte_pci_device *dev;
+
+	dev = malloc(sizeof(*dev));
+	if (dev == NULL)
+		goto error;
+
+	eth_dev = virt_eth_dev_allocate(name, eth_drv, eth_drv->dev_private_size);
+	if (!eth_dev)
+		goto error;
+
+	dev->numa_node = -1;
+	dev->driver = &eth_drv->pci_drv;
+	eth_dev->pci_dev = dev;
+	
+	if (eth_drv->eth_dev_init) {
+		err = (*eth_drv->eth_dev_init)(eth_drv, eth_dev);
+		if (err) {
+			RTE_LOG(ERR, EAL, "eth_dev_init was failed (%d)", err);
+			goto error;
+		}
+	}
+
+	if (virt_eth_drv->virt_eth_dev_init) {
+		err = (*virt_eth_drv->virt_eth_dev_init)(virt_eth_drv, eth_dev,
+							 dirname);
+		if (err) {
+			RTE_LOG(ERR, EAL, "virt eth_dev_init was failed (%d)",
+					err);
+			goto error;
+		}
+	}
+
+	return 0;
+error:
+	free(dev);
+	return err;
+}
+
+#define PROC_MODULES "/proc/modules"
+static int
+virt_uio_check_module(const char *module_name)
+{
+	FILE *f;
+	unsigned i;
+	char buf[BUFSIZ];
+
+	if (module_name == NULL)
+		return 0;
+
+	f = fopen(PROC_MODULES, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot open "PROC_MODULES": %s\n",
+				strerror(errno));
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), f) != NULL) {
+
+		for (i = 0; i < sizeof(buf) && buf[i] != '\0'; i++) {
+			if (isspace(buf[i]))
+				buf[i] = '\0';
+		}
+
+		if (strncmp(buf, module_name, sizeof(buf)) == 0) {
+			fclose(f);
+			return 0;
+		}
+	}
+
+	fclose(f);
+	return -1;
+}
+
+int
+virt_eth_driver_register(struct virt_eth_driver *virt_eth_drv)
+{
+	struct dirent *e;
+	DIR *dir;
+	char dirname[PATH_MAX];
+
+	if (virt_eth_drv->module_name) {
+		RTE_LOG(DEBUG, EAL, "module name: \"%s\", driver name: \"%s\"",
+			virt_eth_drv->module_name,
+			virt_eth_drv->eth_driver.pci_drv.name);
+
+		if (virt_uio_check_module(virt_eth_drv->module_name) != 0) {
+			RTE_LOG(ERR, EAL, "The %s is required by %s driver\n",
+				virt_eth_drv->module_name,
+				virt_eth_drv->eth_driver.pci_drv.name);
+			return -1;
+		}
+	}
+
+	dir = opendir(virt_eth_drv->sysfs_unbind_dir);
+	if (dir == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", __func__,
+			strerror(errno));
+		return -1;
+	}
+
+	while ((e = readdir(dir)) != NULL) {
+		if (e->d_name[0] == '.')
+			continue;
+
+		/*create or not*/
+		if (!(virt_eth_drv->is_eth_device_dir(e->d_name)))
+			continue;
+
+		snprintf(dirname, sizeof(dirname), "%s/%s/uio",
+			 virt_eth_drv->sysfs_unbind_dir, e->d_name);
+		if (virt_eth_dev_init(e->d_name, virt_eth_drv, dirname) < 0)
+			goto error;
+	}
+	closedir(dir);
+	return 0;
+
+error:
+	closedir(dir);
+	return -1;
+}
diff --git a/lib/librte_pmd_xen/virt_dev.h b/lib/librte_pmd_xen/virt_dev.h
new file mode 100644
index 0000000..73223ee
--- /dev/null
+++ b/lib/librte_pmd_xen/virt_dev.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _VIRT_ETHDEV_H_
+#define _VIRT_ETHDEV_H_
+
+struct virt_eth_driver;
+
+typedef int (*virt_is_eth_device_dir_t)(const char *dir);
+typedef int (*virt_eth_dev_init_t)(struct virt_eth_driver *virt_eth_drv,
+				       struct rte_eth_dev *dev, const char *dirname);
+
+struct virt_eth_driver {
+	struct eth_driver	     eth_driver;
+	const char		     *sysfs_bind_dir;
+	const char                   *sysfs_unbind_dir;
+	virt_is_eth_device_dir_t is_eth_device_dir;
+	virt_eth_dev_init_t      virt_eth_dev_init;
+	const char	             *module_name;
+};
+
+int virt_eth_driver_register(struct virt_eth_driver *virt_eth_drv);
+int virt_uio_map_addresses(const char *dirname, void **addresses,
+			       size_t *lens, int max_addresses);
+void virt_uio_unmap_addresses(void **addresses,
+				  size_t *lens, int max_addresses);
+
+#endif /* _VIRT_ETHDEV_H_ */
diff --git a/lib/librte_pmd_xen/xen_adapter_info.h b/lib/librte_pmd_xen/xen_adapter_info.h
new file mode 100644
index 0000000..15d71ac
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_adapter_info.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef XEN_ADAPTER_INFO_H_
+#define XEN_ADAPTER_INFO_H_
+
+#define MAX_TARGET 256
+
+#define IOCTL_EVTCHN_NOTIFY_GRANT 7
+
+struct gref_addr {
+	grant_ref_t gref;
+	unsigned long paddr;
+};
+
+struct ioctl_evtchn_notify_grant {
+	unsigned int port;
+	int otherend_id;
+	uint16_t count;
+	uint8_t is_rx;
+	union {
+		struct xen_netif_rx_front_ring *rx_ring;
+		struct xen_netif_tx_front_ring *tx_ring;
+	} u;
+	struct netfront_info *info;
+	uint16_t rel_count;
+	grant_ref_t rel_gref[MAX_TARGET];
+	struct gref_addr s[MAX_TARGET];
+};
+
+#define XEN_PMD_UIO_NAME "xen/pmd_uio"
+
+enum {
+	INFO_MAP = 0,
+	RX_RING_MAP,
+	TX_RING_MAP,
+	XEN_MAP_MAX
+};
+
+struct xen_adapter_info {
+	/*global parameters */
+	struct xen_netif_rx_front_ring *rx_ring;
+	struct xen_netif_tx_front_ring *tx_ring;
+	struct netfront_info *info;
+
+	uint8_t is_connected;
+	uint8_t disconnect_count;
+
+	/*adapter specific data*/
+	int otherend_id;
+	unsigned int rx_evtchn;
+	unsigned int tx_evtchn;
+	u_int8_t mac[6];
+
+	/*params of grefs array*/
+	uint16_t rx_grefs_count;
+	uint16_t tx_grefs_count;
+	/* this field has to be the last */
+	grant_ref_t rxtx_grefs[];
+};
+
+#endif /* XEN_ADAPTER_INFO_H_ */
diff --git a/lib/librte_pmd_xen/xen_dev.c b/lib/librte_pmd_xen/xen_dev.c
new file mode 100644
index 0000000..8e94d38
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_dev.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "virt_dev.h"
+
+#include <stdio.h>
+
+#include <sys/ioctl.h>
+#include <xen/sys/evtchn.h>
+
+#define XEN_MAX_RX_PKTLEN  0xFFFF
+#define XEN_MIN_RX_BUFSIZE (2 * PAGE_SIZE)
+
+static int xen_evt_fd = -1;
+
+extern int rte_xen_pmd_init(void);
+
+void
+xen_set_rx_ng(struct xen_rx_queue *rxq)
+{
+	rxq->ng_rx.port = rxq->xa->info_page->rx_evtchn;
+	rxq->ng_rx.info = rxq->xa->info_page->info;
+	rxq->ng_rx.u.rx_ring = rxq->xa->info_page->rx_ring;
+	rxq->ng_rx.otherend_id = rxq->xa->info_page->otherend_id;
+}
+
+void
+xen_set_tx_ng(struct xen_tx_queue *txq)
+{
+	txq->ng_tx.port = txq->xa->info_page->tx_evtchn;
+	txq->ng_tx.info = txq->xa->info_page->info;
+	txq->ng_tx.u.tx_ring = txq->xa->info_page->tx_ring;
+	txq->ng_tx.otherend_id = txq->xa->info_page->otherend_id;
+}
+
+static int
+xen_evtchn_notify_grant_rxtx(struct ioctl_evtchn_notify_grant *ng)
+{
+	int rc;
+
+	rc = ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY_GRANT, ng);
+	if (rc)
+		rc = errno;
+
+	return rc;
+}
+
+int
+xen_evtchn_notify_grant_rx(struct xen_rx_queue *rxq)
+{
+	if (likely(xen_evt_fd >= 0)) {
+
+		xen_set_rx_ng(rxq);
+
+		return xen_evtchn_notify_grant_rxtx(&rxq->ng_rx);
+	}
+
+	return -1;
+}
+
+int
+xen_evtchn_notify_grant_tx(struct xen_tx_queue *txq)
+{
+	if (likely(xen_evt_fd >= 0)) {
+
+		xen_set_tx_ng(txq);
+
+		return xen_evtchn_notify_grant_rxtx(&txq->ng_tx);
+
+	}
+
+	return -1;
+}
+
+static int
+xen_evtchn_notify_rxtx(unsigned int evtchn)
+{
+	struct ioctl_evtchn_notify notify = { .port = evtchn };
+
+	if (xen_evt_fd >= 0)
+		return ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY, &notify);
+
+	return -1;
+}
+
+static int
+xen_evtchn_notify(struct xen_adapter *xa)
+{
+	int res = 0;
+
+	res += xen_evtchn_notify_rxtx(xa->info_page->tx_evtchn);
+
+	if (xa->info_page->tx_evtchn != xa->info_page->rx_evtchn)
+		res += xen_evtchn_notify_rxtx(xa->info_page->rx_evtchn);
+
+	return res;
+}
+
+static void
+xen_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	unsigned i;
+
+	PMD_INIT_FUNC_TRACE();
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+				dev->data->tx_queues[i])->tx_stats;
+		if (NULL != txs) {
+			stats->opackets += txs->opackets;
+			stats->obytes += txs->obytes;
+			stats->oerrors += txs->oerrors;
+		} else {
+			continue;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+				dev->data->rx_queues[i])->rx_stats;
+		if (NULL != rxs) {
+			stats->ipackets += rxs->ipackets;
+			stats->ierrors += rxs->ierrors;
+			stats->ibytes += rxs->ibytes;
+		} else {
+			continue;
+		}
+	}
+}
+
+static void
+xen_dev_stats_reset(struct rte_eth_dev *dev)
+{
+	uint16_t i;
+
+	PMD_INIT_FUNC_TRACE();
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+				dev->data->tx_queues[i])->tx_stats;
+		if (NULL != txs) {
+			txs->opackets = 0;
+			txs->obytes = 0;
+			txs->oerrors = 0;
+		} else {
+			continue;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+				dev->data->rx_queues[i])->rx_stats;
+		if (NULL != rxs) {
+			rxs->ipackets = 0;
+			rxs->ibytes = 0;
+			rxs->ierrors = 0;
+		} else {
+			continue;
+		}
+	}
+}
+
+static void
+xen_dev_info_get(__attribute__((unused)) struct rte_eth_dev *dev,
+		struct rte_eth_dev_info *dev_info)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	dev_info->max_rx_queues = (uint16_t)1;
+	dev_info->max_tx_queues = (uint16_t)1;
+	dev_info->max_mac_addrs = 1;
+	dev_info->min_rx_bufsize = XEN_MIN_RX_BUFSIZE;
+	dev_info->max_rx_pktlen = XEN_MAX_RX_PKTLEN;
+}
+
+static int
+xen_dev_configure(__attribute__((unused)) struct rte_eth_dev *dev)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	return 0;
+}
+
+static void
+xen_dev_close(__attribute__((unused)) struct rte_eth_dev *dev)
+{
+	PMD_INIT_FUNC_TRACE();
+}
+
+static int
+_xen_is_eth_device_dir(const char *dir)
+{
+	int devid;
+
+	return sscanf(dir, "vif-%d", &devid) == 1;
+}
+
+/**
+ * Atomically writes the link status information into global
+ * structure rte_eth_dev.
+ *
+ * @param dev
+ *   - Pointer to the structure rte_eth_dev to read from.
+ *   - Pointer to the buffer to be saved with the link status.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, negative value.
+ */
+static inline int
+xen_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+		struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = &(dev->data->dev_link);
+	struct rte_eth_link *src = link;
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+				*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+static int
+xen_dev_link_update(struct rte_eth_dev *dev,
+		__attribute__((unused)) int wait_to_complete)
+{
+	struct rte_eth_link link;
+
+	PMD_INIT_FUNC_TRACE();
+
+	link.link_status = 1;
+	link.link_speed = ETH_LINK_SPEED_1000;
+	link.link_duplex = ETH_LINK_FULL_DUPLEX;
+
+	xen_dev_atomic_write_link_status(dev, &link);
+
+	return 0;
+}
+
+static int
+xen_dev_start(struct rte_eth_dev *dev)
+{
+	struct xen_adapter *xa = VA_XA(dev);
+
+	PMD_INIT_FUNC_TRACE();
+
+	xen_dev_link_update(dev, 0);
+
+	xen_evtchn_notify(xa);
+
+	return 0;
+}
+
+static void
+xen_dev_stop(__attribute__((unused)) struct rte_eth_dev *dev)
+{
+	PMD_INIT_FUNC_TRACE();
+}
+
+static int
+wait_uio_init(uint8_t *state, const uint32_t timeout)
+{
+	uint32_t i;
+
+	for (i = 0; i < timeout * 10; i++) {
+		if (*state)
+			return 0;
+		usleep(100000);
+	}
+
+	return -1;
+}
+
+static struct eth_dev_ops xen_eth_dev_ops = {
+	/*dev*/
+	.dev_configure        = xen_dev_configure,
+	.dev_close            = xen_dev_close,
+	.dev_start            = xen_dev_start,
+	.dev_stop             = xen_dev_stop,
+	.dev_infos_get        = xen_dev_info_get,
+	.link_update          = xen_dev_link_update,
+	/*rxtx*/
+	.stats_get            = xen_dev_stats_get,
+	.stats_reset          = xen_dev_stats_reset,
+	.rx_queue_setup       = xen_dev_rx_queue_setup,
+	.rx_queue_release     = xen_dev_rx_queue_release,
+	.tx_queue_setup       = xen_dev_tx_queue_setup,
+	.tx_queue_release     = xen_dev_tx_queue_release,
+};
+
+static int
+xen_dev_init(struct virt_eth_driver *virt_eth_drv __attribute__((unused)),
+	     struct rte_eth_dev *eth_dev, const char *dirname)
+{
+	int err = 0;
+
+	struct xen_adapter *xa = VA_XA(eth_dev);
+
+	PMD_INIT_FUNC_TRACE();
+
+	err = virt_uio_map_addresses(dirname, xa->uio_res, xa->uio_len,
+				     XEN_MAP_MAX);
+	if (err != 0) {
+		PMD_INIT_LOG(ERR, "virt_uio_map_addresses failed (%d)", err);
+		return -1;
+	}
+
+	eth_dev->dev_ops = &xen_eth_dev_ops;
+
+	xa->info_page =
+		(struct xen_adapter_info *)xa->uio_res[INFO_MAP];
+
+	if (wait_uio_init(&xa->info_page->is_connected, 3)) {
+		PMD_INIT_LOG(ERR, "no connection to xen_netback");
+		virt_uio_unmap_addresses(xa->uio_res, xa->uio_len,
+					 XEN_MAP_MAX);
+		return -1;
+	}
+
+	PMD_INIT_LOG(DEBUG, "rx: %d,rx_evtchn: %d,tx: %d,tx_evtchn: %d",
+		     (int)xa->info_page->rx_grefs_count,
+		     (int)xa->info_page->rx_evtchn,
+		     (int)xa->info_page->tx_grefs_count,
+		     (int)xa->info_page->tx_evtchn);
+
+	/* copy mac-addr */
+	eth_dev->data->mac_addrs = rte_malloc("xen", ETHER_ADDR_LEN, 0);
+	memcpy(&eth_dev->data->mac_addrs->addr_bytes[0],
+	       &xa->info_page->mac[0], ETHER_ADDR_LEN);
+
+	return 0;
+}
+
+static struct virt_eth_driver rte_xen_pmd = {
+	.eth_driver = {
+		.pci_drv = {
+			.name = "rte_xen_pmd",
+			.id_table = NULL,
+		},
+		.dev_private_size = sizeof(struct xen_adapter),
+	},
+	.sysfs_unbind_dir = "/sys/bus/xen/devices",
+	.sysfs_bind_dir = "/sys/bus/xen/drivers",
+	.is_eth_device_dir = _xen_is_eth_device_dir,
+	.virt_eth_dev_init = xen_dev_init,
+	.module_name = "xen_uio",
+};
+
+int
+rte_xen_pmd_init(void)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	xen_evt_fd = open("/dev/"XEN_PMD_UIO_NAME, O_RDWR);
+
+	if (xen_evt_fd == -1) {
+		if (errno != ENOENT)
+			PMD_INIT_LOG(ERR, "cannot open event device %s",
+					"/dev/"XEN_PMD_UIO_NAME);
+		return 0;
+	}
+
+	return virt_eth_driver_register(&rte_xen_pmd);
+}
diff --git a/lib/librte_pmd_xen/xen_dev.h b/lib/librte_pmd_xen/xen_dev.h
new file mode 100644
index 0000000..cde6343
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_dev.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_ETHDEV_H_
+#define _XEN_ETHDEV_H_
+
+#include <assert.h>
+#include <sys/user.h>
+#include <inttypes.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <xen/io/netif.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+#include <rte_memzone.h>
+
+#include "xen_logs.h"
+
+#include "xen_adapter_info.h"
+
+typedef uint64_t u64;
+
+#undef  PAGE_SIZE
+#define PAGE_SHIFT            12
+#define PAGE_SIZE             (1 << PAGE_SHIFT)
+
+#define __phys_to_pfn(paddr)  ((unsigned long)((paddr) >> PAGE_SHIFT))
+#define __pfn_to_phys(pfn)    ((phys_addr_t)(pfn) << PAGE_SHIFT)
+
+#define NET_TX_RING_SIZE      __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
+#define NET_RX_RING_SIZE      __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
+
+#define RX_MAX_TARGET         min_t(int, NET_RX_RING_SIZE, 256)
+#define TX_MAX_TARGET         min_t(int, NET_TX_RING_SIZE, 256)
+
+#if __XEN_LATEST_INTERFACE_VERSION__ > 0x0003020a
+
+#define FRONT_RING_ATTACH(_r, _s, __size) do {   \
+	(_r)->sring = (_s);                      \
+	(_r)->req_prod_pvt = (_s)->req_prod;     \
+	(_r)->rsp_cons = (_s)->rsp_prod;         \
+	(_r)->nr_ents = __RING_SIZE(_s, __size); \
+} while (0)
+
+#endif
+
+#define VA_XA(eth_dev) \
+	(struct xen_adapter *)((eth_dev->data->dev_private))
+
+#define min_t(t, x, y) ({   \
+	t _x = (x);         \
+	t _y = (y);         \
+	_x > _y ? _x : _y; })
+
+struct xen_adapter {
+	/* it's a place for all uio resources */
+	void *uio_res[XEN_MAP_MAX];
+	size_t uio_len[XEN_MAP_MAX];
+
+	/*pointer to the info page*/
+	struct xen_adapter_info *info_page;
+
+	void **rx_queues;
+	void **tx_queues;
+};
+
+#include "xen_rxtx.h"
+
+void xen_set_rx_ng(struct xen_rx_queue *rxq);
+void xen_set_tx_ng(struct xen_tx_queue *txq);
+int xen_evtchn_notify_grant_rx(struct xen_rx_queue *rxq);
+int xen_evtchn_notify_grant_tx(struct xen_tx_queue *txq);
+
+/*rx*/
+int xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+		uint16_t nb_rx_desc, unsigned int socket_id,
+		const struct rte_eth_rxconf *rx_conf,
+		struct rte_mempool *mb_pool);
+void xen_dev_rx_queue_release(void *rxq);
+uint16_t xen_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts);
+
+/*tx*/
+int xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+		uint16_t nb_tx_desc, unsigned int socket_id,
+		const struct rte_eth_txconf *tx_conf);
+void xen_dev_tx_queue_release(void *txq);
+
+#endif /* _XEN_ETHDEV_H_ */
diff --git a/lib/librte_pmd_xen/xen_logs.h b/lib/librte_pmd_xen/xen_logs.h
new file mode 100644
index 0000000..2334db0
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_logs.h
@@ -0,0 +1,23 @@
+#ifndef _XEN_LOGS_H_
+#define _XEN_LOGS_H_
+
+
+#ifdef RTE_LIBRTE_XEN_DEBUG_INIT
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+	do { \
+		RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args);\
+		fflush(stdout);\
+	} while (0)
+
+#else
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+	do { } while (0)
+
+#endif
+
+#define PMD_INIT_FUNC_TRACE() \
+	PMD_INIT_LOG(DEBUG, " >>")
+
+#endif /* _XEN_LOGS_H_ */
diff --git a/lib/librte_pmd_xen/xen_rxtx.c b/lib/librte_pmd_xen/xen_rxtx.c
new file mode 100644
index 0000000..c45e67a
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_rxtx.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "virt_dev.h"
+
+#define RTE_MBUF_DATA_DMA_ADDR(mb)             \
+	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
+
+static void
+xen_rx_ring_init(struct xen_rx_queue *rxq)
+{
+	SHARED_RING_INIT(rxq->rxs);
+	FRONT_RING_ATTACH(&rxq->ring, rxq->rxs, PAGE_SIZE);
+	xen_dev_rx_send_requests(rxq);
+	rxq->rx_disconnect_count = rxq->xa->info_page->disconnect_count;
+	xen_set_rx_ng(rxq);
+}
+
+static void
+xen_tx_ring_init(struct xen_tx_queue *txq)
+{
+	SHARED_RING_INIT(txq->txs);
+	FRONT_RING_ATTACH(&txq->ring, txq->txs, PAGE_SIZE);
+	xen_dev_tx_recv_responses(txq);
+	txq->tx_disconnect_count = txq->xa->info_page->disconnect_count;
+	xen_set_tx_ng(txq);
+}
+
+int
+xen_dev_rx_send_requests(struct xen_rx_queue *rxq)
+{
+	uint16_t i;
+	struct netif_rx_request *req;
+	RING_IDX req_prod = rxq->ring.req_prod_pvt;
+	RING_IDX prod = req_prod;
+	uint16_t free_space = RING_FREE_REQUESTS(&rxq->ring);
+
+	xen_set_rx_ng(rxq);
+
+	for (i = 0; i < free_space; i++) {
+		struct rte_mbuf *mbuf;
+
+		prod = (req_prod + i) & (RING_SIZE(&rxq->ring) - 1);
+
+		req = RING_GET_REQUEST(&rxq->ring, prod);
+
+		mbuf = rte_pktmbuf_alloc(rxq->mb_pool);
+		if (unlikely(!mbuf)) {
+			PMD_INIT_LOG(ERR, "no mbuf");
+			break; /*skip*/
+		}
+
+		mbuf->ol_flags |= PKT_RX_IPV4_HDR;
+		rxq->mbuf[prod] = mbuf;
+
+		/*set data at the begin of the next page*/
+		uint64_t phys_addr = RTE_MBUF_DATA_DMA_ADDR(mbuf);
+		uint64_t phys_addr_shifted =
+			(phys_addr + PAGE_SIZE - 1) &
+			(~((uint64_t)PAGE_SIZE - 1));
+		uint64_t shift =  phys_addr_shifted - phys_addr;
+
+		mbuf->data_off += shift;
+		rxq->ng_rx.s[i].gref = rxq->gref[prod];
+
+		rxq->ng_rx.s[i].paddr = __phys_to_pfn(phys_addr_shifted);
+
+		req->gref = rxq->gref[prod];
+		req->id = prod;
+	}
+
+	rxq->ring.req_prod_pvt = (req_prod + i);
+
+	rxq->ng_rx.count = i;
+	xen_evtchn_notify_grant_rx(rxq);
+	rxq->ng_rx.rel_count = 0;
+
+	return 0;
+}
+
+static void
+xen_dev_rx_recv_extra(struct xen_rx_queue *rxq, struct netif_extra_info *extra)
+{
+	if (unlikely(!extra)) {
+		PMD_INIT_LOG(ERR, "Invalid rxq state transition: %d",
+				rxq->state);
+		rxq->state = RX_RESP_GENERAL;
+	}
+
+	if (unlikely(!extra->type ||
+				extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+		PMD_INIT_LOG(WARNING, "Invalid extra type: %d", extra->type);
+		rxq->state = RX_RESP_GENERAL;
+	}
+
+	if (!(extra->flags & XEN_NETIF_EXTRA_FLAG_MORE)) {
+		PMD_INIT_LOG(DEBUG, "No XEN_NETIF_EXTRA_FLAG_MORE");
+		rxq->state = RX_RESP_GENERAL;
+	}
+}
+
+static uint16_t
+xen_dev_rx_recv_responses(struct xen_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts)
+{
+	uint16_t nb_rx;
+	uint16_t i;
+	struct netif_rx_response *rsp;
+	struct netif_extra_info *extra = NULL;
+	RING_IDX rsp_cons = rxq->ring.rsp_cons;
+	RING_IDX cons = rsp_cons;
+	uint16_t work_todo;
+
+	nb_rx = 0;
+	work_todo = RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring);
+	for (i = 0; i < work_todo && nb_rx < nb_pkts; i++) {
+		struct rte_mbuf *mbuf;
+
+		cons = (rsp_cons + i) & (RING_SIZE(&rxq->ring) - 1);
+
+		rsp = RING_GET_RESPONSE(&rxq->ring, cons);
+
+		PMD_INIT_LOG(DEBUG, "id:%u status:%u offset:%u flags:%x",
+				rsp->id, rsp->status, rsp->offset, rsp->flags);
+
+		rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] = rxq->gref[cons];
+		rxq->ng_rx.rel_count++;
+
+		if (unlikely(rsp->status < 0)) {
+			PMD_INIT_LOG(WARNING, "bad rsp->status: %d",
+					rsp->status);
+			rte_pktmbuf_free(rxq->mbuf[cons]);
+			rxq->mbuf[cons] = NULL;
+			rxq->state = RX_RESP_GENERAL;
+			rxq->first_frag = rxq->prev_frag = NULL;
+			continue;
+		}
+
+		switch (rxq->state) {
+		case RX_RESP_GENERAL: /* normal receiving */
+			if (unlikely(rsp->flags & NETRXF_extra_info)) {
+				PMD_INIT_LOG(DEBUG,
+						"EXTRA_NETRXF_extra_info");
+				rxq->state = RX_RESP_EXTRA;
+				rte_pktmbuf_free(rxq->mbuf[cons]);
+				rxq->mbuf[cons] = NULL;
+				break;
+			}
+			/* normal receive */
+			if (rxq->mbuf[cons]) {
+				mbuf = rxq->mbuf[cons];
+				mbuf->port = rxq->port_id;
+				mbuf->data_len = mbuf->pkt_len = rsp->status;
+				mbuf->data_off += rsp->offset;
+
+				if (rsp->flags & NETRXF_more_data) {
+					rxq->state = RX_RESP_CONTINUE;
+					rxq->first_frag =
+						rxq->prev_frag = mbuf;
+				} else {
+					/*send to the upper level*/
+					rx_pkts[nb_rx++] = mbuf;
+					rxq->rx_stats.ipackets++;
+					rxq->rx_stats.ibytes +=
+						mbuf->pkt_len;
+				}
+
+				rxq->mbuf[cons] = NULL;
+			} else {
+				PMD_INIT_LOG(WARNING, "no rxq->mbuf[%d]",
+						cons);
+				rxq->rx_stats.ierrors++;
+			}
+			break;
+
+		case RX_RESP_EXTRA: /* extra */
+			extra = (struct netif_extra_info *)rsp;
+			xen_dev_rx_recv_extra(rxq, extra);
+			rte_pktmbuf_free(rxq->mbuf[cons]);
+			rxq->mbuf[cons] = NULL;
+			break;
+
+		case RX_RESP_CONTINUE: /* packet is segmented */
+			if (rxq->mbuf[cons]) {
+				mbuf = rxq->mbuf[cons];
+				/* mbuf->in_port = rxq->port_id; */
+				mbuf->data_len = mbuf->pkt_len =
+					rsp->status;
+				mbuf->data_off += rsp->offset;
+
+				rxq->first_frag->nb_segs++;
+				rxq->first_frag->pkt_len += mbuf->data_len;
+				rxq->prev_frag->next = mbuf;
+
+				if (rsp->flags & NETRXF_more_data)
+					rxq->prev_frag = mbuf;
+				else {
+					rxq->state = RX_RESP_GENERAL;
+					/*send to the upper level*/
+					rx_pkts[nb_rx++] = rxq->first_frag;
+					rxq->rx_stats.ipackets++;
+					rxq->rx_stats.ibytes += rxq->first_frag->pkt_len;
+					rxq->first_frag = rxq->prev_frag = NULL;
+				}
+
+				rxq->mbuf[cons] = NULL;
+			} else {
+				PMD_INIT_LOG(WARNING, "no cntn rxq->mbuf[%d]",
+						cons);
+				rxq->rx_stats.ierrors++;
+			}
+			break;
+		}
+
+		rxq->mbuf[cons] = NULL;
+	}
+	rxq->ring.rsp_cons = (rsp_cons + i);
+
+	return nb_rx;
+}
+
+uint16_t
+xen_dev_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	uint16_t res = 0;
+
+	struct xen_rx_queue *rxq = (struct xen_rx_queue *)rx_queue;
+
+	if (likely(rxq->xa->info_page->is_connected)) {
+
+		if (unlikely(rxq->xa->info_page->disconnect_count !=
+					rxq->rx_disconnect_count)) {
+
+			xen_rx_queue_release(rxq);
+
+			xen_rx_ring_init(rxq);
+
+		}
+
+		res = xen_dev_rx_recv_responses(rxq, rx_pkts, nb_pkts);
+
+		xen_dev_rx_send_requests(rxq);
+	}
+
+	return res;
+}
+
+void
+xen_rx_queue_release(struct xen_rx_queue *rxq)
+{
+	uint16_t i;
+
+	rxq->ng_rx.count = 0;
+	rxq->ng_rx.rel_count = 0;
+
+	for (i = 0; i < (RING_SIZE(&rxq->ring)); i++) {
+		rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] =
+			rxq->gref[i];
+		rxq->ng_rx.rel_count++;
+		if (NULL != rxq->mbuf[i]) {
+			rte_pktmbuf_free(rxq->mbuf[i]);
+			rxq->mbuf[i] = NULL;
+		}
+	}
+	xen_evtchn_notify_grant_rx(rxq);
+}
+
+void
+xen_dev_rx_queue_release(void *rxq)
+{
+	struct xen_rx_queue *rx_q = (struct xen_rx_queue *)rxq;
+
+	if (NULL != rx_q) {
+		xen_rx_queue_release(rx_q);
+		rte_free(rx_q);
+	}
+}
+
+int
+xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+		__attribute__((unused)) uint16_t nb_desc,
+		__attribute__((unused)) unsigned int socket_id,
+		__attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
+		struct rte_mempool *mp)
+{
+	struct xen_rx_queue *rxq;
+	struct xen_adapter *xa = VA_XA(dev);
+	struct xen_adapter_info *info = xa->info_page;
+
+	if (NET_RX_RING_SIZE > info->rx_grefs_count) {
+		PMD_INIT_LOG(ERR, "rx ring size greater than rx grefs count");
+		return -ENOMEM;
+	}
+
+	rxq = rte_zmalloc("rx_queue", sizeof(struct xen_rx_queue),
+			  RTE_CACHE_LINE_SIZE);
+	if (NULL == rxq) {
+		PMD_INIT_LOG(ERR, "rte_zmalloc for rxq failed!");
+		return -ENOMEM;
+	}
+
+	rxq->xa = xa;
+	rxq->queue_id = queue_idx;
+	rxq->port_id = dev->data->port_id;
+	rxq->state = RX_RESP_GENERAL;
+	rxq->first_frag = rxq->prev_frag = NULL;
+	rxq->mb_pool = mp;
+	rxq->ng_rx.is_rx = 1;
+	rxq->ng_rx.rel_count = 0;
+	rxq->gref = &info->rxtx_grefs[0];
+
+	rxq->rxs = (struct netif_rx_sring *)xa->uio_res[RX_RING_MAP];
+
+	dev->data->rx_queues[queue_idx] = rxq;
+	if (!xa->rx_queues)
+		xa->rx_queues = dev->data->rx_queues;
+
+	xen_rx_ring_init(rxq);
+
+	dev->rx_pkt_burst = xen_dev_recv_pkts;
+
+	return 0;
+}
+
+static void
+xen_dev_tx_prepare_request(struct xen_tx_queue *txq, uint16_t i, uint16_t size,
+		uint16_t offset, uint16_t flags, unsigned long paddr)
+{
+	RING_IDX prod = (txq->ring.req_prod_pvt+i) & (RING_SIZE(&txq->ring)-1);
+	struct netif_tx_request *req = RING_GET_REQUEST(&txq->ring, prod);
+
+	txq->ng_tx.s[i].gref = txq->gref[prod];
+	txq->ng_tx.s[i].paddr = paddr;
+
+	req->id = prod;
+	req->flags = flags;
+	req->offset = offset;
+	req->gref = txq->gref[prod];
+	req->size = (txq->mbuf[prod] ? txq->mbuf[prod]->pkt_len : size);
+
+	PMD_INIT_LOG(DEBUG, "id:%u size:%u offset:%u gref:%u flags:%x",
+		req->id, req->size, req->offset, req->gref, req->flags);
+}
+
+static int
+xen_dev_tx_send_requests(struct xen_tx_queue *txq, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	unsigned long paddr;
+	uint16_t offset;
+	uint16_t flags;
+	uint16_t size;
+	uint16_t i = 0;
+	uint16_t nb_tx = 0;
+	uint16_t free_space = RING_FREE_REQUESTS(&txq->ring);
+
+	xen_set_tx_ng(txq);
+
+	while (i < free_space && nb_tx < nb_pkts) {
+
+		RING_IDX prod = (txq->ring.req_prod_pvt + i) &
+			(RING_SIZE(&txq->ring) - 1);
+		txq->mbuf[prod] = mbuf = tx_pkts[nb_tx];
+
+		if (unlikely(NULL == mbuf)) {
+			PMD_INIT_LOG(WARNING, "no mbuf for req");
+			break;
+		}
+
+		/* each segment could be splited because of offset
+		 * so it must be twice */
+		if (i + (tx_pkts[nb_tx]->nb_segs * 2) > free_space)
+			break;
+
+		/* prepare request for each mbuf segment */
+		do {
+			size = mbuf->data_len;
+			flags = (mbuf->next ? NETTXF_more_data : 0);
+			paddr = __phys_to_pfn(RTE_MBUF_DATA_DMA_ADDR(mbuf));
+			offset = (RTE_MBUF_DATA_DMA_ADDR(mbuf)) &
+				((uint64_t)PAGE_SIZE - 1);
+
+			/* check if additional segmentation is needed */
+			if (size + offset > PAGE_SIZE) {
+				size = PAGE_SIZE - offset;
+				xen_dev_tx_prepare_request(txq, i, size,
+					offset, NETTXF_more_data, paddr);
+				paddr += size;
+				offset = (offset + size) % PAGE_SIZE;
+				size = mbuf->data_len - size;
+				i++;
+			}
+
+			xen_dev_tx_prepare_request(txq, i, size,
+					offset, flags, paddr);
+			i++;
+
+		} while ((mbuf = mbuf->next));
+
+		nb_tx++;
+		txq->tx_stats.opackets++;
+		txq->tx_stats.obytes += txq->mbuf[prod]->pkt_len;
+	}
+
+	txq->ring.req_prod_pvt += i;
+	txq->ng_tx.count = i;
+	xen_evtchn_notify_grant_tx(txq);
+	txq->ng_tx.rel_count = 0;
+
+	return nb_tx;
+}
+
+int
+xen_dev_tx_recv_responses(struct xen_tx_queue *txq)
+{
+	uint16_t i;
+	struct netif_tx_response *rsp;
+	RING_IDX rsp_cons = txq->ring.rsp_cons;
+	RING_IDX cons;
+	uint16_t work_todo;
+
+	work_todo = RING_HAS_UNCONSUMED_RESPONSES(&txq->ring);
+	for (i = 0; i < work_todo; i++) {
+		cons = (rsp_cons + i) & (RING_SIZE(&txq->ring) - 1);
+
+		rsp = RING_GET_RESPONSE(&txq->ring, cons);
+
+		if (unlikely(rsp->status == NETIF_RSP_NULL))
+			PMD_INIT_LOG(WARNING, "NETIF_RSP_NULL");
+
+		txq->ng_tx.rel_gref[txq->ng_tx.rel_count] = txq->gref[cons];
+		txq->ng_tx.rel_count++;
+
+		if (likely(txq->mbuf[cons] != NULL)) {
+			rte_pktmbuf_free(txq->mbuf[cons]);
+			txq->mbuf[cons] = NULL;
+		}
+	}
+	txq->ring.rsp_cons = (rsp_cons + i);
+
+	return 0;
+}
+
+uint16_t
+xen_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	uint16_t res = 0;
+
+	struct xen_tx_queue *txq = (struct xen_tx_queue *)tx_queue;
+
+	if (likely(txq->xa->info_page->is_connected)) {
+
+		if (unlikely(txq->xa->info_page->disconnect_count !=
+					txq->tx_disconnect_count)) {
+
+			xen_tx_queue_release(txq);
+
+			xen_tx_ring_init(txq);
+
+		}
+
+		xen_dev_tx_recv_responses(txq);
+
+		res = xen_dev_tx_send_requests(txq, tx_pkts, nb_pkts);
+	}
+
+	return res;
+}
+
+void
+xen_tx_queue_release(struct xen_tx_queue *txq)
+{
+	uint16_t i;
+
+	txq->ng_tx.count = 0;
+	txq->ng_tx.rel_count = 0;
+
+	for (i = 0; i < (RING_SIZE(&txq->ring)); i++) {
+		if (NULL != txq->mbuf[i]) {
+			rte_pktmbuf_free(txq->mbuf[i]);
+			txq->mbuf[i] = NULL;
+			txq->ng_tx.rel_gref[txq->ng_tx.rel_count] =
+				txq->gref[i];
+			txq->ng_tx.rel_count++;
+		}
+	}
+	xen_evtchn_notify_grant_tx(txq);
+}
+
+void
+xen_dev_tx_queue_release(void *txq)
+{
+	struct xen_tx_queue *tx_q = (struct xen_tx_queue *)txq;
+
+	if (NULL != tx_q) {
+		xen_tx_queue_release(tx_q);
+		rte_free(tx_q);
+	}
+}
+
+int
+xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+		__attribute__((unused)) uint16_t nb_desc,
+		__attribute__((unused)) unsigned int socket_id,
+		__attribute__((unused)) const struct rte_eth_txconf *tx_conf)
+{
+	struct xen_tx_queue *txq;
+	struct xen_adapter *xa = VA_XA(dev);
+	struct xen_adapter_info *info = xa->info_page;
+
+	if (NET_TX_RING_SIZE > info->tx_grefs_count) {
+		PMD_INIT_LOG(ERR, "tx ring size greater than tx grefs count");
+		return -ENOMEM;
+	}
+
+	txq = rte_zmalloc("tx_queue", sizeof(struct xen_tx_queue),
+			  RTE_CACHE_LINE_SIZE);
+	if (NULL == txq) {
+		PMD_INIT_LOG(ERR, "rte_zmalloc for txq failed!");
+		return -ENOMEM;
+	}
+
+	txq->txs = (struct netif_tx_sring *)xa->uio_res[TX_RING_MAP];
+
+	txq->xa = xa;
+	txq->queue_id = queue_idx;
+	txq->port_id = dev->data->port_id;
+	txq->ng_tx.is_rx = 0;
+	txq->ng_tx.rel_count = 0;
+	txq->gref = &info->rxtx_grefs[info->rx_grefs_count];
+
+	dev->data->tx_queues[queue_idx] = txq;
+	if (!xa->tx_queues)
+		xa->tx_queues = dev->data->tx_queues;
+
+	xen_tx_ring_init(txq);
+
+	dev->tx_pkt_burst = xen_dev_xmit_pkts;
+
+	return 0;
+}
diff --git a/lib/librte_pmd_xen/xen_rxtx.h b/lib/librte_pmd_xen/xen_rxtx.h
new file mode 100644
index 0000000..eea41c8
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_rxtx.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_RXTX_H_
+#define _XEN_RXTX_H_
+
+#define DEFAULT_RX_FREE_THRESH   0
+#define DEFAULT_TX_FREE_THRESH   512
+
+struct xen_tx_stats {
+	uint64_t opackets;
+	uint64_t obytes;
+	uint64_t oerrors;
+};
+
+struct xen_rx_stats {
+	uint64_t ipackets;
+	uint64_t ibytes;
+	uint64_t ierrors;
+};
+
+enum rx_resp_state {
+	RX_RESP_GENERAL = 0,
+	RX_RESP_CONTINUE,
+	RX_RESP_EXTRA
+};
+
+struct xen_rx_queue {
+	/**< RX queue index. */
+	uint16_t                   queue_id;
+	/**< Device port identifier. */
+	uint8_t                    port_id;
+	/**< mbuf pool to populate RX ring. */
+	struct rte_mempool         *mb_pool;
+	/**< Ptr to dev_private data. */
+	struct xen_adapter         *xa;
+
+	/* Xen specific */
+
+	/**< Pointer to the xen rx ring shared with other end. */
+	netif_rx_front_ring_t      ring;
+	struct netif_rx_sring      *rxs;
+	/**< Grefs for sharing with the other end. */
+	grant_ref_t                *gref;
+	/**< Allocated for RING_INX mbufs. */
+	struct rte_mbuf            *mbuf[NET_RX_RING_SIZE];
+	/**< state machine */
+	enum rx_resp_state         state;
+	/**< First packet segment. */
+	struct rte_mbuf            *first_frag;
+	/**< Previous packet segment. */
+	struct rte_mbuf            *prev_frag;
+	/**< Statistics. */
+	struct xen_rx_stats        rx_stats;
+	/**< Number of disconnections. */
+	uint8_t                    rx_disconnect_count;
+	/**< Notify and gnttab ioctl struct. */
+	struct ioctl_evtchn_notify_grant ng_rx;
+};
+
+struct xen_tx_queue {
+	uint16_t                   queue_id;
+	/**< TX queue index. */
+	uint8_t                    port_id;
+	/**< Device port identifier. */
+	struct xen_adapter         *xa;
+	/**< Ptr to dev_private data */
+
+	/* Xen specific */
+
+	/**< Pointer to the xen tx ring shared with other end. */
+	netif_tx_front_ring_t      ring;
+	struct netif_tx_sring      *txs;
+	/**< Grefs for sharing with the other end. */
+	grant_ref_t                *gref;
+	/**< Allocated for RING_INX mbufs. */
+	struct rte_mbuf            *mbuf[NET_TX_RING_SIZE];
+	/**< Statistics. */
+	struct xen_tx_stats        tx_stats;
+	/**< Number of disconnections. */
+	uint8_t                    tx_disconnect_count;
+	/**< Notify and gnttab ioctl struct. */
+	struct ioctl_evtchn_notify_grant ng_tx;
+};
+
+int xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+		uint16_t nb_rx_desc, unsigned int socket_id,
+		const struct rte_eth_rxconf *rx_conf,
+		struct rte_mempool *mb_pool);
+
+int xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+		uint16_t nb_tx_desc, unsigned int socket_id,
+		const struct rte_eth_txconf *tx_conf);
+
+void xen_dev_rx_queue_release(void *rxq);
+void xen_dev_tx_queue_release(void *txq);
+void xen_rx_queue_release(struct xen_rx_queue *rxq);
+void xen_tx_queue_release(struct xen_tx_queue *txq);
+
+int xen_dev_rx_send_requests(struct xen_rx_queue *rxq);
+int xen_dev_tx_recv_responses(struct xen_tx_queue *txq);
+
+uint16_t xen_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts);
+uint16_t xen_dev_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts);
+
+#endif /* _XEN_RXTX_H_ */
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 334cb25..5d0927c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -192,6 +192,10 @@ LDLIBS += -lrte_pmd_xenvirt
 LDLIBS += -lxenstore
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_PMD),y)
+LDLIBS += -lrte_pmd_xen
+endif
+
 ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
 # plugins (link only if static libraries)
 
-- 
2.1.4

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime
  2015-02-14 18:06 [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime Stephen Hemminger
                   ` (2 preceding siblings ...)
  2015-02-14 18:06 ` [dpdk-dev] [PATCH 4/4] xen: net-front poll mode driver Stephen Hemminger
@ 2015-02-14 19:25 ` Neil Horman
  3 siblings, 0 replies; 6+ messages in thread
From: Neil Horman @ 2015-02-14 19:25 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Stephen Hemminger

On Sat, Feb 14, 2015 at 01:06:45PM -0500, Stephen Hemminger wrote:
> The previous code would only allow building library and application
> so that it ran on Xen DOM0 or not on DOM0. This changes that to
> a runtime flag.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  lib/librte_eal/common/include/rte_memory.h |  4 +++
>  lib/librte_eal/linuxapp/eal/eal_memory.c   |  7 ++++
>  lib/librte_ether/rte_ethdev.c              | 22 ++++++++++++
>  lib/librte_ether/rte_ethdev.h              | 23 ++++++++++++
>  lib/librte_mempool/rte_mempool.c           | 26 +++++++-------
>  lib/librte_pmd_e1000/em_rxtx.c             | 30 +++-------------
>  lib/librte_pmd_e1000/igb_rxtx.c            | 52 +++++++++------------------
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c          | 58 +++++++++---------------------
>  8 files changed, 108 insertions(+), 114 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
> index 7f8103f..ab6c1ff 100644
> --- a/lib/librte_eal/common/include/rte_memory.h
> +++ b/lib/librte_eal/common/include/rte_memory.h
> @@ -176,6 +176,10 @@ unsigned rte_memory_get_nchannel(void);
>  unsigned rte_memory_get_nrank(void);
>  
>  #ifdef RTE_LIBRTE_XEN_DOM0
> +
> +/**< Internal use only - should DOM0 memory mapping be used */
> +extern int is_xen_dom0_supported(void);
> +
>  /**
>   * Return the physical address of elt, which is an element of the pool mp.
>   *
> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
> index a67a1b0..4afda2a 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_memory.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
> @@ -98,6 +98,13 @@
>  #include "eal_filesystem.h"
>  #include "eal_hugepages.h"
>  
> +#ifdef RTE_LIBRTE_XEN_DOM0
> +int is_xen_dom0_supported(void)
> +{
> +	return internal_config.xen_dom0_support;
> +}
> +#endif
> +
>  /**
>   * @file
>   * Huge page mapping under linux
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index ea3a1fb..457e0bc 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -2825,6 +2825,27 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
>  	}
>  	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
>  }
> +
> +const struct rte_memzone *
> +rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
> +			 uint16_t queue_id, size_t size, unsigned align,
> +			 int socket_id)
> +{
> +	char z_name[RTE_MEMZONE_NAMESIZE];
> +	const struct rte_memzone *mz;
> +
> +	snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
> +		 dev->driver->pci_drv.name, ring_name,
> +		 dev->data->port_id, queue_id);
> +
> +	mz = rte_memzone_lookup(z_name);
> +	if (mz)
> +		return mz;
> +
> +	return rte_memzone_reserve_bounded(z_name, size,
> +					   socket_id, 0, align, RTE_PGSIZE_2M);
> +}
> +
>  #ifdef RTE_NIC_BYPASS
>  int rte_eth_dev_bypass_init(uint8_t port_id)
>  {
> @@ -3003,6 +3024,7 @@ rte_eth_dev_bypass_wd_reset(uint8_t port_id)
>  	(*dev->dev_ops->bypass_wd_reset)(dev);
>  	return 0;
>  }
> +
Nit: I think you meant to remove that space.

>  #endif
>  
>  int
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 1200c1c..747acb5 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -3664,6 +3664,29 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
>  int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
>  			enum rte_filter_op filter_op, void *arg);
>  
> +/**
> + * Create memzone for HW rings.
> + * malloc can't be used as the physical address is needed.
> + * If the memzone is already created, then this function returns a ptr
> + * to the old one.
> + *
> + * @param eth_dev
> + *   The *eth_dev* pointer is the address of the *rte_eth_dev* structure
> + * @param name
> + *   The name of the memory zone
> + * @param queue_id
> + *   The index of the queue to add to name
> + * @param size
> + *   The sizeof of the memory area
> + * @param align
> + *   Alignment for resulting memzone. Must be a power of 2.
> + * @param socket_id
> + *   The *socket_id* argument is the socket identifier in case of NUMA.
> + */
> +const struct rte_memzone *
> +rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name,
> +			 uint16_t queue_id, size_t size,
> +			 unsigned align, int socket_id);
I think this is an exported funciton right?  It needs to be added to the version
map.

neil

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH 4/4] xen: net-front poll mode driver
  2015-02-14 18:06 ` [dpdk-dev] [PATCH 4/4] xen: net-front poll mode driver Stephen Hemminger
@ 2015-02-14 19:31   ` Neil Horman
  0 siblings, 0 replies; 6+ messages in thread
From: Neil Horman @ 2015-02-14 19:31 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Stephen Hemminger

On Sat, Feb 14, 2015 at 01:06:48PM -0500, Stephen Hemminger wrote:
> This driver implements DPDK driver that has the same functionality
> as net-front driver in Linux kernel.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  config/common_linuxapp                |   6 +
>  lib/Makefile                          |   1 +
>  lib/librte_eal/common/eal_private.h   |   7 +
>  lib/librte_eal/linuxapp/eal/eal.c     |   8 +
>  lib/librte_pmd_xen/Makefile           |  30 ++
>  lib/librte_pmd_xen/virt_dev.c         | 400 +++++++++++++++++++++++++
>  lib/librte_pmd_xen/virt_dev.h         |  30 ++
>  lib/librte_pmd_xen/xen_adapter_info.h |  64 ++++
>  lib/librte_pmd_xen/xen_dev.c          | 369 +++++++++++++++++++++++
>  lib/librte_pmd_xen/xen_dev.h          |  96 ++++++
>  lib/librte_pmd_xen/xen_logs.h         |  23 ++
>  lib/librte_pmd_xen/xen_rxtx.c         | 546 ++++++++++++++++++++++++++++++++++
>  lib/librte_pmd_xen/xen_rxtx.h         | 110 +++++++
>  mk/rte.app.mk                         |   4 +
>  14 files changed, 1694 insertions(+)
>  create mode 100644 lib/librte_pmd_xen/Makefile
>  create mode 100644 lib/librte_pmd_xen/virt_dev.c
>  create mode 100644 lib/librte_pmd_xen/virt_dev.h
>  create mode 100644 lib/librte_pmd_xen/xen_adapter_info.h
>  create mode 100644 lib/librte_pmd_xen/xen_dev.c
>  create mode 100644 lib/librte_pmd_xen/xen_dev.h
>  create mode 100644 lib/librte_pmd_xen/xen_logs.h
>  create mode 100644 lib/librte_pmd_xen/xen_rxtx.c
>  create mode 100644 lib/librte_pmd_xen/xen_rxtx.h
><snip>
> +
> +int
> +rte_xen_pmd_init(void)
> +{
> +	PMD_INIT_FUNC_TRACE();
> +
> +	xen_evt_fd = open("/dev/"XEN_PMD_UIO_NAME, O_RDWR);
> +
> +	if (xen_evt_fd == -1) {
> +		if (errno != ENOENT)
> +			PMD_INIT_LOG(ERR, "cannot open event device %s",
> +					"/dev/"XEN_PMD_UIO_NAME);
> +		return 0;
> +	}
> +
> +	return virt_eth_driver_register(&rte_xen_pmd);
> +}
It looks like you've created a new method of registering a pmd here?  Why not
use the existing REGISTER_PMD_DRIVER macro?  It seems like this method will
break the DSO build.

Neil

> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-02-14 19:32 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-14 18:06 [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime Stephen Hemminger
2015-02-14 18:06 ` [dpdk-dev] [PATCH 2/4] xen: add phys-addr command line argument Stephen Hemminger
2015-02-14 18:06 ` [dpdk-dev] [PATCH 3/4] xen: add uio driver Stephen Hemminger
2015-02-14 18:06 ` [dpdk-dev] [PATCH 4/4] xen: net-front poll mode driver Stephen Hemminger
2015-02-14 19:31   ` Neil Horman
2015-02-14 19:25 ` [dpdk-dev] [PATCH 1/4] xen: allow choosing dom0 support at runtime Neil Horman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).