* [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime
@ 2015-02-15 15:24 Stephen Hemminger
2015-02-15 15:24 ` [dpdk-dev] [PATCH 2/5] enic: fix device to work with Xen DOM0 Stephen Hemminger
` (4 more replies)
0 siblings, 5 replies; 23+ messages in thread
From: Stephen Hemminger @ 2015-02-15 15:24 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
The previous code would only allow building library and application
so that it ran on Xen DOM0 or not on DOM0. This changes that to
a runtime flag.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
v2 -- fix i40e as well
lib/librte_eal/common/include/rte_memory.h | 4 +++
lib/librte_eal/linuxapp/eal/eal_memory.c | 7 ++++
lib/librte_ether/rte_ethdev.c | 22 ++++++++++++
lib/librte_ether/rte_ethdev.h | 23 ++++++++++++
lib/librte_mempool/rte_mempool.c | 26 +++++++-------
lib/librte_pmd_e1000/em_rxtx.c | 30 +++-------------
lib/librte_pmd_e1000/igb_rxtx.c | 52 +++++++++------------------
lib/librte_pmd_i40e/i40e_ethdev.c | 16 +++++----
lib/librte_pmd_i40e/i40e_fdir.c | 8 +++--
lib/librte_pmd_i40e/i40e_rxtx.c | 57 +++++++++++++++++------------
lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 58 +++++++++---------------------
11 files changed, 156 insertions(+), 147 deletions(-)
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 7f8103f..ab6c1ff 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -176,6 +176,10 @@ unsigned rte_memory_get_nchannel(void);
unsigned rte_memory_get_nrank(void);
#ifdef RTE_LIBRTE_XEN_DOM0
+
+/**< Internal use only - should DOM0 memory mapping be used */
+extern int is_xen_dom0_supported(void);
+
/**
* Return the physical address of elt, which is an element of the pool mp.
*
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a67a1b0..4afda2a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -98,6 +98,13 @@
#include "eal_filesystem.h"
#include "eal_hugepages.h"
+#ifdef RTE_LIBRTE_XEN_DOM0
+int is_xen_dom0_supported(void)
+{
+ return internal_config.xen_dom0_support;
+}
+#endif
+
/**
* @file
* Huge page mapping under linux
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..457e0bc 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -2825,6 +2825,27 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
}
rte_spinlock_unlock(&rte_eth_dev_cb_lock);
}
+
+const struct rte_memzone *
+rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
+ uint16_t queue_id, size_t size, unsigned align,
+ int socket_id)
+{
+ char z_name[RTE_MEMZONE_NAMESIZE];
+ const struct rte_memzone *mz;
+
+ snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
+ dev->driver->pci_drv.name, ring_name,
+ dev->data->port_id, queue_id);
+
+ mz = rte_memzone_lookup(z_name);
+ if (mz)
+ return mz;
+
+ return rte_memzone_reserve_bounded(z_name, size,
+ socket_id, 0, align, RTE_PGSIZE_2M);
+}
+
#ifdef RTE_NIC_BYPASS
int rte_eth_dev_bypass_init(uint8_t port_id)
{
@@ -3003,6 +3024,7 @@ rte_eth_dev_bypass_wd_reset(uint8_t port_id)
(*dev->dev_ops->bypass_wd_reset)(dev);
return 0;
}
+
#endif
int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 1200c1c..747acb5 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -3664,6 +3664,29 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);
+/**
+ * Create memzone for HW rings.
+ * malloc can't be used as the physical address is needed.
+ * If the memzone is already created, then this function returns a ptr
+ * to the old one.
+ *
+ * @param eth_dev
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure
+ * @param name
+ * The name of the memory zone
+ * @param queue_id
+ * The index of the queue to add to name
+ * @param size
+ * The sizeof of the memory area
+ * @param align
+ * Alignment for resulting memzone. Must be a power of 2.
+ * @param socket_id
+ * The *socket_id* argument is the socket identifier in case of NUMA.
+ */
+const struct rte_memzone *
+rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name,
+ uint16_t queue_id, size_t size,
+ unsigned align, int socket_id);
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 4cf6c25..5056a4f 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -372,19 +372,21 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
int socket_id, unsigned flags)
{
#ifdef RTE_LIBRTE_XEN_DOM0
- return (rte_dom0_mempool_create(name, n, elt_size,
- cache_size, private_data_size,
- mp_init, mp_init_arg,
- obj_init, obj_init_arg,
- socket_id, flags));
-#else
- return (rte_mempool_xmem_create(name, n, elt_size,
- cache_size, private_data_size,
- mp_init, mp_init_arg,
- obj_init, obj_init_arg,
- socket_id, flags,
- NULL, NULL, MEMPOOL_PG_NUM_DEFAULT, MEMPOOL_PG_SHIFT_MAX));
+ if (is_xen_dom0_supported())
+ return (rte_dom0_mempool_create(name, n, elt_size,
+ cache_size, private_data_size,
+ mp_init, mp_init_arg,
+ obj_init, obj_init_arg,
+ socket_id, flags));
+ else
#endif
+ return (rte_mempool_xmem_create(name, n, elt_size,
+ cache_size, private_data_size,
+ mp_init, mp_init_arg,
+ obj_init, obj_init_arg,
+ socket_id, flags,
+ NULL, NULL, MEMPOOL_PG_NUM_DEFAULT,
+ MEMPOOL_PG_SHIFT_MAX));
}
/*
diff --git a/lib/librte_pmd_e1000/em_rxtx.c b/lib/librte_pmd_e1000/em_rxtx.c
index aa0b88c..9e09cfa 100644
--- a/lib/librte_pmd_e1000/em_rxtx.c
+++ b/lib/librte_pmd_e1000/em_rxtx.c
@@ -1104,28 +1104,6 @@ eth_em_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
#define EM_MAX_BUF_SIZE 16384
#define EM_RCTL_FLXBUF_STEP 1024
-static const struct rte_memzone *
-ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
- uint16_t queue_id, uint32_t ring_size, int socket_id)
-{
- const struct rte_memzone *mz;
- char z_name[RTE_MEMZONE_NAMESIZE];
-
- snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
- dev->driver->pci_drv.name, ring_name, dev->data->port_id,
- queue_id);
-
- if ((mz = rte_memzone_lookup(z_name)) != 0)
- return (mz);
-
-#ifdef RTE_LIBRTE_XEN_DOM0
- return rte_memzone_reserve_bounded(z_name, ring_size,
- socket_id, 0, RTE_CACHE_LINE_SIZE, RTE_PGSIZE_2M);
-#else
- return rte_memzone_reserve(z_name, ring_size, socket_id, 0);
-#endif
-}
-
static void
em_tx_queue_release_mbufs(struct em_tx_queue *txq)
{
@@ -1273,8 +1251,8 @@ eth_em_tx_queue_setup(struct rte_eth_dev *dev,
* resizing in later calls to the queue setup function.
*/
tsize = sizeof (txq->tx_ring[0]) * EM_MAX_RING_DESC;
- if ((tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx, tsize,
- socket_id)) == NULL)
+ if ((tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, tsize,
+ RTE_CACHE_LINE_SIZE, socket_id)) == NULL)
return (-ENOMEM);
/* Allocate the tx queue data structure. */
@@ -1400,8 +1378,8 @@ eth_em_rx_queue_setup(struct rte_eth_dev *dev,
/* Allocate RX ring for max possible mumber of hardware descriptors. */
rsize = sizeof (rxq->rx_ring[0]) * EM_MAX_RING_DESC;
- if ((rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx, rsize,
- socket_id)) == NULL)
+ if ((rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, rsize,
+ RTE_CACHE_LINE_SIZE, socket_id)) == NULL)
return (-ENOMEM);
/* Allocate the RX queue data structure. */
diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
index 5c394a9..d36469b 100644
--- a/lib/librte_pmd_e1000/igb_rxtx.c
+++ b/lib/librte_pmd_e1000/igb_rxtx.c
@@ -1109,29 +1109,6 @@ eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
#define IGB_MIN_RING_DESC 32
#define IGB_MAX_RING_DESC 4096
-static const struct rte_memzone *
-ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
- uint16_t queue_id, uint32_t ring_size, int socket_id)
-{
- char z_name[RTE_MEMZONE_NAMESIZE];
- const struct rte_memzone *mz;
-
- snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
- dev->driver->pci_drv.name, ring_name,
- dev->data->port_id, queue_id);
- mz = rte_memzone_lookup(z_name);
- if (mz)
- return mz;
-
-#ifdef RTE_LIBRTE_XEN_DOM0
- return rte_memzone_reserve_bounded(z_name, ring_size,
- socket_id, 0, IGB_ALIGN, RTE_PGSIZE_2M);
-#else
- return rte_memzone_reserve_aligned(z_name, ring_size,
- socket_id, 0, IGB_ALIGN);
-#endif
-}
-
static void
igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
{
@@ -1265,8 +1242,8 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
* resizing in later calls to the queue setup function.
*/
size = sizeof(union e1000_adv_tx_desc) * IGB_MAX_RING_DESC;
- tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
- size, socket_id);
+ tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
+ IGB_ALIGN, socket_id);
if (tz == NULL) {
igb_tx_queue_release(txq);
return (-ENOMEM);
@@ -1284,12 +1261,14 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
txq->port_id = dev->data->port_id;
txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
-#ifndef RTE_LIBRTE_XEN_DOM0
- txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
-#else
- txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (is_xen_dom0_supported())
+ txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+ else
#endif
- txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
+ txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
+
+ txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
/* Allocate software ring */
txq->sw_ring = rte_zmalloc("txq->sw_ring",
sizeof(struct igb_tx_entry) * nb_desc,
@@ -1414,18 +1393,21 @@ eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
* resizing in later calls to the queue setup function.
*/
size = sizeof(union e1000_adv_rx_desc) * IGB_MAX_RING_DESC;
- rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx, size, socket_id);
+ rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
+ IGB_ALIGN, socket_id);
if (rz == NULL) {
igb_rx_queue_release(rxq);
return (-ENOMEM);
}
rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
-#ifndef RTE_LIBRTE_XEN_DOM0
- rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
-#else
- rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (is_xen_dom0_supported())
+ rxq->rx_ring_phys_addr =
+ rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+ else
#endif
+ rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
/* Allocate software ring. */
diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c b/lib/librte_pmd_i40e/i40e_ethdev.c
index 9fa6bec..44a012f 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -1979,11 +1979,12 @@ i40e_allocate_dma_mem_d(__attribute__((unused)) struct i40e_hw *hw,
id++;
snprintf(z_name, sizeof(z_name), "i40e_dma_%"PRIu64, id);
#ifdef RTE_LIBRTE_XEN_DOM0
- mz = rte_memzone_reserve_bounded(z_name, size, 0, 0, alignment,
- RTE_PGSIZE_2M);
-#else
- mz = rte_memzone_reserve_aligned(z_name, size, 0, 0, alignment);
+ if (is_xen_dom0_supported())
+ mz = rte_memzone_reserve_bounded(z_name, size, 0, 0, alignment,
+ RTE_PGSIZE_2M);
+ else
#endif
+ mz = rte_memzone_reserve_aligned(z_name, size, 0, 0, alignment);
if (!mz)
return I40E_ERR_NO_MEMORY;
@@ -1991,10 +1992,11 @@ i40e_allocate_dma_mem_d(__attribute__((unused)) struct i40e_hw *hw,
mem->size = size;
mem->va = mz->addr;
#ifdef RTE_LIBRTE_XEN_DOM0
- mem->pa = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
-#else
- mem->pa = mz->phys_addr;
+ if (is_xen_dom0_supported())
+ mem->pa = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+ else
#endif
+ mem->pa = mz->phys_addr;
return I40E_SUCCESS;
}
diff --git a/lib/librte_pmd_i40e/i40e_fdir.c b/lib/librte_pmd_i40e/i40e_fdir.c
index 68511c8..4c5b185 100644
--- a/lib/librte_pmd_i40e/i40e_fdir.c
+++ b/lib/librte_pmd_i40e/i40e_fdir.c
@@ -274,10 +274,12 @@ i40e_fdir_setup(struct i40e_pf *pf)
}
pf->fdir.prg_pkt = mz->addr;
#ifdef RTE_LIBRTE_XEN_DOM0
- pf->fdir.dma_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
-#else
- pf->fdir.dma_addr = (uint64_t)mz->phys_addr;
+ if (is_xen_dom0_supported())
+ pf->fdir.dma_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+ else
#endif
+ pf->fdir.dma_addr = (uint64_t)mz->phys_addr;
+
pf->fdir.match_counter_index = I40E_COUNTER_INDEX_FDIR(hw->pf_id);
PMD_DRV_LOG(INFO, "FDIR setup successfully, with programming queue %u.",
vsi->base_queue);
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index 2beae3c..bd01dc0 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -1796,10 +1796,11 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
memset(rz->addr, 0, ring_size);
#ifdef RTE_LIBRTE_XEN_DOM0
- rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
-#else
- rxq->rx_ring_phys_addr = (uint64_t)rz->phys_addr;
+ if (is_xen_dom0_supported())
+ rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+ else
#endif
+ rxq->rx_ring_phys_addr = (uint64_t)rz->phys_addr;
rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
@@ -2079,10 +2080,11 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->tx_deferred_start = tx_conf->tx_deferred_start;
#ifdef RTE_LIBRTE_XEN_DOM0
- txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
-#else
- txq->tx_ring_phys_addr = (uint64_t)tz->phys_addr;
+ if (is_xen_dom0_supported())
+ txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+ else
#endif
+ txq->tx_ring_phys_addr = (uint64_t)tz->phys_addr;
txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
/* Allocate software ring */
@@ -2147,12 +2149,13 @@ i40e_ring_dma_zone_reserve(struct rte_eth_dev *dev,
return mz;
#ifdef RTE_LIBRTE_XEN_DOM0
- return rte_memzone_reserve_bounded(z_name, ring_size,
- socket_id, 0, I40E_ALIGN, RTE_PGSIZE_2M);
-#else
- return rte_memzone_reserve_aligned(z_name, ring_size,
- socket_id, 0, I40E_ALIGN);
-#endif
+ if (is_xen_dom0_supported())
+ return rte_memzone_reserve_bounded(z_name, ring_size,
+ socket_id, 0, I40E_ALIGN, RTE_PGSIZE_2M);
+ else
+#endif
+ return rte_memzone_reserve_aligned(z_name, ring_size,
+ socket_id, 0, I40E_ALIGN);
}
const struct rte_memzone *
@@ -2164,12 +2167,14 @@ i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
if (mz)
return mz;
#ifdef RTE_LIBRTE_XEN_DOM0
- mz = rte_memzone_reserve_bounded(name, len,
- socket_id, 0, I40E_ALIGN, RTE_PGSIZE_2M);
-#else
- mz = rte_memzone_reserve_aligned(name, len,
- socket_id, 0, I40E_ALIGN);
+ if (is_xen_dom0_supported())
+ mz = rte_memzone_reserve_bounded(name, len,
+ socket_id, 0, I40E_ALIGN, RTE_PGSIZE_2M);
+ else
#endif
+ mz = rte_memzone_reserve_aligned(name, len,
+ socket_id, 0, I40E_ALIGN);
+
return mz;
}
@@ -2573,10 +2578,13 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
txq->vsi = pf->fdir.fdir_vsi;
#ifdef RTE_LIBRTE_XEN_DOM0
- txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
-#else
- txq->tx_ring_phys_addr = (uint64_t)tz->phys_addr;
+ if (is_xen_dom0_supported())
+ txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id,
+ tz->phys_addr);
+ else
#endif
+ txq->tx_ring_phys_addr = (uint64_t)tz->phys_addr;
+
txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
/*
* don't need to allocate software ring and reset for the fdir
@@ -2633,10 +2641,13 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
rxq->vsi = pf->fdir.fdir_vsi;
#ifdef RTE_LIBRTE_XEN_DOM0
- rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
-#else
- rxq->rx_ring_phys_addr = (uint64_t)rz->phys_addr;
+ if (is_xen_dom0_supported())
+ rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id,
+ rz->phys_addr);
+ else
#endif
+ rxq->rx_ring_phys_addr = (uint64_t)rz->phys_addr;
+
rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
/*
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index e6766b3..303144d 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -1656,35 +1656,6 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
#define IXGBE_MIN_RING_DESC 32
#define IXGBE_MAX_RING_DESC 4096
-/*
- * Create memzone for HW rings. malloc can't be used as the physical address is
- * needed. If the memzone is already created, then this function returns a ptr
- * to the old one.
- */
-static const struct rte_memzone *
-ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
- uint16_t queue_id, uint32_t ring_size, int socket_id)
-{
- char z_name[RTE_MEMZONE_NAMESIZE];
- const struct rte_memzone *mz;
-
- snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
- dev->driver->pci_drv.name, ring_name,
- dev->data->port_id, queue_id);
-
- mz = rte_memzone_lookup(z_name);
- if (mz)
- return mz;
-
-#ifdef RTE_LIBRTE_XEN_DOM0
- return rte_memzone_reserve_bounded(z_name, ring_size,
- socket_id, 0, IXGBE_ALIGN, RTE_PGSIZE_2M);
-#else
- return rte_memzone_reserve_aligned(z_name, ring_size,
- socket_id, 0, IXGBE_ALIGN);
-#endif
-}
-
static void
ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
{
@@ -1920,9 +1891,9 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
* handle the maximum ring size is allocated in order to allow for
* resizing in later calls to the queue setup function.
*/
- tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
+ tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
- socket_id);
+ IXGBE_ALIGN, socket_id);
if (tz == NULL) {
ixgbe_tx_queue_release(txq);
return (-ENOMEM);
@@ -1950,11 +1921,14 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
else
txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
-#ifndef RTE_LIBRTE_XEN_DOM0
- txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
-#else
- txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (is_xen_dom0_supported())
+ txq->tx_ring_phys_addr =
+ rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+ else
#endif
+ txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
/* Allocate software ring */
@@ -2195,8 +2169,8 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
* handle the maximum ring size is allocated in order to allow for
* resizing in later calls to the queue setup function.
*/
- rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
- RX_RING_SZ, socket_id);
+ rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
+ RX_RING_SZ, IXGBE_ALIGN, socket_id);
if (rz == NULL) {
ixgbe_rx_queue_release(rxq);
return (-ENOMEM);
@@ -2223,11 +2197,13 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
rxq->rdh_reg_addr =
IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
}
-#ifndef RTE_LIBRTE_XEN_DOM0
- rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
-#else
- rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (is_xen_dom0_supported())
+ rxq->rx_ring_phys_addr =
+ rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+ else
#endif
+ rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
/*
--
2.1.4
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 2/5] enic: fix device to work with Xen DOM0
2015-02-15 15:24 [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Stephen Hemminger
@ 2015-02-15 15:24 ` Stephen Hemminger
2015-03-10 7:08 ` Liu, Jijiang
2015-02-15 15:24 ` [dpdk-dev] [PATCH 3/5] xen: add phys-addr command line argument Stephen Hemminger
` (3 subsequent siblings)
4 siblings, 1 reply; 23+ messages in thread
From: Stephen Hemminger @ 2015-02-15 15:24 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
It is possible to passthrough a PCI device when running in Xen
Paravirt mode. The device driver has to accomodate by using
memory zones differently. This patch models the memory allocation
for ENIC device based on changes already done for ixgbe and igb.
Build tested only; has not been tested on ENIC hardware.
---
v2 -- this patch is added
lib/librte_pmd_enic/enic_main.c | 19 ++++++++++++++++---
lib/librte_pmd_enic/vnic/vnic_dev.c | 19 +++++++++++++++----
2 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/lib/librte_pmd_enic/enic_main.c b/lib/librte_pmd_enic/enic_main.c
index 48fdca2..0be5172 100644
--- a/lib/librte_pmd_enic/enic_main.c
+++ b/lib/librte_pmd_enic/enic_main.c
@@ -537,8 +537,14 @@ enic_alloc_consistent(__rte_unused void *priv, size_t size,
const struct rte_memzone *rz;
*dma_handle = 0;
- rz = rte_memzone_reserve_aligned((const char *)name,
- size, 0, 0, ENIC_ALIGN);
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (is_xen_dom0_supported())
+ rz = rte_memzone_reserve_bounded((char *)name, size,
+ 0, 0, ENIC_ALIGN, RTE_PGSIZE_2M);
+ else
+#endif
+ rz = rte_memzone_reserve_aligned((char *)name, size,
+ 0, 0, ENIC_ALIGN);
if (!rz) {
pr_err("%s : Failed to allocate memory requested for %s",
__func__, name);
@@ -546,7 +552,14 @@ enic_alloc_consistent(__rte_unused void *priv, size_t size,
}
vaddr = rz->addr;
- *dma_handle = (dma_addr_t)rz->phys_addr;
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (is_xen_dom0_supported())
+ *dma_handle = rte_mem_phy2mch(rz->memseg_id,
+ rz->phys_addr);
+ else
+#endif
+ *dma_handle = (dma_addr_t)rz->phys_addr;
return vaddr;
}
diff --git a/lib/librte_pmd_enic/vnic/vnic_dev.c b/lib/librte_pmd_enic/vnic/vnic_dev.c
index 6407994..e660aaf 100644
--- a/lib/librte_pmd_enic/vnic/vnic_dev.c
+++ b/lib/librte_pmd_enic/vnic/vnic_dev.c
@@ -276,9 +276,14 @@ int vnic_dev_alloc_desc_ring(__attribute__((unused)) struct vnic_dev *vdev,
vnic_dev_desc_ring_size(ring, desc_count, desc_size);
- rz = rte_memzone_reserve_aligned(z_name,
- ring->size_unaligned, socket_id,
- 0, ENIC_ALIGN);
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (is_xen_dom0_supported())
+ rz = rte_memzone_reserve_bounded(z_name, ring->size_unaligned,
+ socket_id, 0, ENIC_ALIGN, RTE_PGSIZE_2M);
+ else
+#endif
+ rz = rte_memzone_reserve_aligned(z_name, ring->size_unaligned,
+ socket_id, 0, ENIC_ALIGN);
if (!rz) {
pr_err("Failed to allocate ring (size=%d), aborting\n",
(int)ring->size);
@@ -292,7 +297,13 @@ int vnic_dev_alloc_desc_ring(__attribute__((unused)) struct vnic_dev *vdev,
return -ENOMEM;
}
- ring->base_addr_unaligned = (dma_addr_t)rz->phys_addr;
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (is_xen_dom0_supported())
+ ring->base_addr_unaligned = rte_mem_phy2mch(rz->memseg_id,
+ rz->phys_addr);
+ else
+#endif
+ ring->base_addr_unaligned = (dma_addr_t)rz->phys_addr;
ring->base_addr = ALIGN(ring->base_addr_unaligned,
ring->base_align);
--
2.1.4
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 3/5] xen: add phys-addr command line argument
2015-02-15 15:24 [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Stephen Hemminger
2015-02-15 15:24 ` [dpdk-dev] [PATCH 2/5] enic: fix device to work with Xen DOM0 Stephen Hemminger
@ 2015-02-15 15:24 ` Stephen Hemminger
2015-02-26 7:55 ` Liu, Jijiang
2015-02-15 15:24 ` [dpdk-dev] [PATCH 4/5] xen: add uio driver Stephen Hemminger
` (2 subsequent siblings)
4 siblings, 1 reply; 23+ messages in thread
From: Stephen Hemminger @ 2015-02-15 15:24 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
Allow overriding default Xen DOM0 behavior to
use physical addresses insted of mfn
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
v2 -- no changes
lib/librte_eal/common/eal_common_options.c | 5 +++++
lib/librte_eal/common/eal_internal_cfg.h | 1 +
lib/librte_eal/common/eal_options.h | 2 ++
lib/librte_eal/common/include/rte_memory.h | 3 +++
lib/librte_eal/linuxapp/eal/eal_memory.c | 5 +++++
lib/librte_mempool/rte_dom0_mempool.c | 10 ++++++++--
6 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 67e02dc..1742364 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -83,6 +83,7 @@ eal_long_options[] = {
{OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM},
{OPT_BASE_VIRTADDR, 1, 0, OPT_BASE_VIRTADDR_NUM},
{OPT_XEN_DOM0, 0, 0, OPT_XEN_DOM0_NUM},
+ {OPT_XEN_PHYS_ADDR, 0, 0, OPT_XEN_PHYS_ADDR_NUM},
{OPT_CREATE_UIO_DEV, 1, NULL, OPT_CREATE_UIO_DEV_NUM},
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM},
{0, 0, 0, 0}
@@ -491,6 +492,10 @@ eal_parse_common_option(int opt, const char *optarg,
}
conf->log_level = log;
break;
+
+ case OPT_XEN_PHYS_ADDR_NUM:
+ conf->xen_phys_addr_support = 1;
+ break;
}
/* don't know what to do, leave this to caller */
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index e2ecb0d..41b4169 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -65,6 +65,7 @@ struct internal_config {
volatile unsigned force_nrank; /**< force number of ranks */
volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
+ volatile unsigned xen_phys_addr_support; /**< support phys addr */
volatile unsigned no_pci; /**< true to disable PCI */
volatile unsigned no_hpet; /**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index e476f8d..8aee959 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -73,6 +73,8 @@ enum {
OPT_BASE_VIRTADDR_NUM,
#define OPT_XEN_DOM0 "xen-dom0"
OPT_XEN_DOM0_NUM,
+#define OPT_XEN_PHYS_ADDR "xen-phys-addr"
+ OPT_XEN_PHYS_ADDR_NUM,
#define OPT_CREATE_UIO_DEV "create-uio-dev"
OPT_CREATE_UIO_DEV_NUM,
#define OPT_VFIO_INTR "vfio-intr"
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index ab6c1ff..c3b8a98 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -180,6 +180,9 @@ unsigned rte_memory_get_nrank(void);
/**< Internal use only - should DOM0 memory mapping be used */
extern int is_xen_dom0_supported(void);
+/**< Internal use only - should DOM0 use physical addresses insted of mfn */
+extern int is_xen_phys_addr_supported(void);
+
/**
* Return the physical address of elt, which is an element of the pool mp.
*
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 4afda2a..a759ac9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -103,6 +103,11 @@ int is_xen_dom0_supported(void)
{
return internal_config.xen_dom0_support;
}
+
+int is_xen_phys_addr_supported(void)
+{
+ return internal_config.xen_phys_addr_support;
+}
#endif
/**
diff --git a/lib/librte_mempool/rte_dom0_mempool.c b/lib/librte_mempool/rte_dom0_mempool.c
index 9ec68fb..ab35826 100644
--- a/lib/librte_mempool/rte_dom0_mempool.c
+++ b/lib/librte_mempool/rte_dom0_mempool.c
@@ -74,8 +74,14 @@ get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num,
virt_addr =(uintptr_t) mcfg->memseg[memseg_id].addr;
for (i = 0; i != pg_num; i++) {
- mfn_id = ((uintptr_t)va + i * pg_sz - virt_addr) / RTE_PGSIZE_2M;
- pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
+ if (!is_xen_phys_addr_supported()) {
+ mfn_id = ((uintptr_t)va + i * pg_sz -
+ virt_addr) / RTE_PGSIZE_2M;
+ pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
+ } else {
+ pa[i] = mcfg->memseg[memseg_id].phys_addr + i * pg_sz +
+ (uintptr_t)va - virt_addr;
+ }
}
}
--
2.1.4
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 4/5] xen: add uio driver
2015-02-15 15:24 [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Stephen Hemminger
2015-02-15 15:24 ` [dpdk-dev] [PATCH 2/5] enic: fix device to work with Xen DOM0 Stephen Hemminger
2015-02-15 15:24 ` [dpdk-dev] [PATCH 3/5] xen: add phys-addr command line argument Stephen Hemminger
@ 2015-02-15 15:24 ` Stephen Hemminger
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Jan Blunck
2015-02-15 15:24 ` [dpdk-dev] [PATCH 5/5] xen: net-front " Stephen Hemminger
2015-07-09 0:10 ` [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Thomas Monjalon
4 siblings, 1 reply; 23+ messages in thread
From: Stephen Hemminger @ 2015-02-15 15:24 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
New uio helper kernel driver for Xen netfront UIO poll
mode driver.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
v2 -- use PMD_REGISTER
lib/librte_eal/linuxapp/Makefile | 3 +
lib/librte_eal/linuxapp/xen_uio/Makefile | 55 ++
lib/librte_eal/linuxapp/xen_uio/xen_uio.c | 837 ++++++++++++++++++++++++++++++
3 files changed, 895 insertions(+)
create mode 100644 lib/librte_eal/linuxapp/xen_uio/Makefile
create mode 100644 lib/librte_eal/linuxapp/xen_uio/xen_uio.c
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 8fcfdf6..d3893e5 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -41,5 +41,8 @@ endif
ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_dom0
endif
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_PMD),y)
+DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_uio
+endif
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/xen_uio/Makefile b/lib/librte_eal/linuxapp/xen_uio/Makefile
new file mode 100644
index 0000000..25a9f35
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_uio/Makefile
@@ -0,0 +1,55 @@
+# BSD LICENSE
+#
+# Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = xen_uio
+MODULE_PATH = drivers/net/xen_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := xen_uio.c
+
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/xen_uio/xen_uio.c b/lib/librte_eal/linuxapp/xen_uio/xen_uio.c
new file mode 100644
index 0000000..b25b1f3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_uio/xen_uio.c
@@ -0,0 +1,837 @@
+/*
+ * Virtual network driver for conversing with remote driver backends.
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ * Copyright (c) 2005, XenSource Ltd
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/if_ether.h>
+#include <linux/proc_fs.h>
+
+#include <xen/xenbus.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+#include <xen/interface/io/netif.h>
+#include <xen/platform_pci.h>
+
+#include <xen/events.h>
+#include <xen/evtchn.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <linux/uio_driver.h>
+
+#include "../../../librte_pmd_xen/xen_adapter_info.h"
+
+#define GRANT_INVALID_REF 0
+
+#define NET_TX_RING_SIZE \
+ __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
+#define NET_RX_RING_SIZE \
+ __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
+
+#define TX_MAX_TARGET \
+ min_t(int, NET_RX_RING_SIZE, 256)
+#define RX_MAX_TARGET \
+ min_t(int, NET_RX_RING_SIZE, 256)
+
+#define RXTX_GREFS (TX_MAX_TARGET + RX_MAX_TARGET)
+
+#define DOMAIN_PROC "xen/domain"
+struct proc_dir_entry *domain_proc;
+char domain_name[9];
+size_t domain_len = sizeof(domain_name);
+static const char *domains[] = { "native", "pv", "hvm", "unknown" };
+
+struct netfront_info *xennet_alloc_resources(struct xenbus_device *xbdev);
+static void xennet_free_resources(struct xenbus_device *xbdev);
+static int xennet_connect_backend(struct netfront_info *info);
+static void xennet_disconnect_backend(struct netfront_info *info,
+ int deffered_free);
+
+/* some helpers */
+static int __gnttab_version(void)
+{
+ int err;
+ struct gnttab_get_version ggv;
+
+ ggv.dom = DOMID_SELF;
+
+ err = HYPERVISOR_grant_table_op(GNTTABOP_get_version, &ggv, 1);
+ if (err >= 0)
+ return (int)ggv.version;
+
+ return err;
+}
+
+static void xennet_end_access(int ref, void *page)
+{
+ /* This frees the page as a side-effect */
+ if (ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(ref, 0, (unsigned long)page);
+}
+
+static int xen_net_read_mac(struct xenbus_device *xbdev, u8 *mac)
+{
+ char *macstr;
+ int ret = 0;
+
+ macstr = xenbus_read(XBT_NIL, xbdev->nodename, "mac", NULL);
+ if (IS_ERR(macstr))
+ return PTR_ERR(macstr);
+
+ pr_info("mac addr: %s\n", macstr);
+
+ if (sscanf(macstr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &mac[0], &mac[1],
+ &mac[2], &mac[3], &mac[4], &mac[5]) != ETH_ALEN) {
+ pr_warn("can't parse mac address\n");
+ ret = -ENOENT;
+ }
+
+ kfree(macstr);
+ return ret;
+}
+
+struct xen_uio_dev {
+ struct uio_info info;
+};
+
+struct netfront_info {
+ struct xenbus_device *xbdev;
+
+ int tx_ring_ref;
+ struct xen_netif_tx_front_ring tx;
+
+ int rx_ring_ref;
+ struct xen_netif_rx_front_ring rx;
+
+ struct xen_netif_tx_sring *txs;
+ struct xen_netif_rx_sring *rxs;
+
+ grant_ref_t gref_rxtx_head;
+
+ struct xen_uio_dev *xen_udev;
+
+ struct xen_adapter_info *shared_info_page;
+};
+
+static int xennet_uio_init(struct xenbus_device *xbdev,
+ struct netfront_info *info)
+{
+ int err;
+ struct xen_uio_dev *udev;
+
+ udev = kzalloc(sizeof(struct xen_uio_dev), GFP_KERNEL);
+ if (!udev)
+ return -ENOMEM;
+
+ info->xen_udev = udev;
+
+ /* fill uio infos */
+ udev->info.name = "xen_uio";
+ udev->info.version = "0.1";
+ udev->info.irq = UIO_IRQ_NONE;
+ udev->info.irq_flags = 0;
+
+ /*share all working info here*/
+ udev->info.mem[INFO_MAP].name = "xennet info page";
+ udev->info.mem[INFO_MAP].memtype = UIO_MEM_LOGICAL;
+ udev->info.mem[INFO_MAP].addr = (phys_addr_t)info->shared_info_page;
+ udev->info.mem[INFO_MAP].size = PAGE_SIZE;
+
+ udev->info.mem[RX_RING_MAP].name = "xennet front rx ring";
+ udev->info.mem[RX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+ udev->info.mem[RX_RING_MAP].addr = (phys_addr_t)info->rxs;
+ udev->info.mem[RX_RING_MAP].size = PAGE_SIZE;
+
+ udev->info.mem[TX_RING_MAP].name = "xennet front tx ring";
+ udev->info.mem[TX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+ udev->info.mem[TX_RING_MAP].addr = (phys_addr_t)info->txs;
+ udev->info.mem[TX_RING_MAP].size = PAGE_SIZE;
+
+ err = uio_register_device(&xbdev->dev, &info->xen_udev->info);
+ if (err) {
+ pr_err("uio register failed: %d\n", err);
+ kfree(info->xen_udev);
+ info->xen_udev = NULL;
+ } else {
+ pr_info("uio device registered with irq %lx\n",
+ info->xen_udev->info.irq);
+ }
+
+ return err;
+}
+
+
+static void xennet_uio_uninit(struct netfront_info *info)
+{
+ if (info->xen_udev)
+ uio_unregister_device(&info->xen_udev->info);
+ info->xen_udev = NULL;
+}
+
+struct netfront_info *xennet_alloc_resources(struct xenbus_device *xbdev)
+{
+ int ret;
+ uint16_t i;
+ int gref = 0;
+ grant_ref_t gref_rxtx_head;
+
+ struct netfront_info *info =
+ kzalloc(sizeof(struct netfront_info), GFP_KERNEL);
+ if (NULL == info)
+ goto exit;
+
+ info->gref_rxtx_head = GRANT_INVALID_REF;
+ info->xbdev = xbdev;
+
+ /* allocate place for tx ring */
+ info->txs = (struct xen_netif_tx_sring *)get_zeroed_page(
+ GFP_NOIO | __GFP_HIGH);
+ if (!info->txs) {
+ ret = -ENOMEM;
+ xenbus_dev_fatal(xbdev, ret, "allocating tx ring page");
+ goto exit;
+ }
+
+ /* allocate place for rx ring */
+ info->rxs = (struct xen_netif_rx_sring *)get_zeroed_page(
+ GFP_NOIO | __GFP_HIGH);
+ if (!info->rxs) {
+ ret = -ENOMEM;
+ xenbus_dev_fatal(xbdev, ret, "allocating rx ring page");
+ goto exit;
+ }
+
+ /* allocate shared with user page (info page) */
+ info->shared_info_page =
+ (struct xen_adapter_info *)__get_free_page(GFP_KERNEL);
+ if (NULL == info->shared_info_page) {
+ pr_alert("xen_uio can't alloc shared page\n");
+ goto exit;
+ }
+
+ /* just assertion */
+ if (((char *)&info->shared_info_page->rxtx_grefs[RXTX_GREFS - 1])
+ - ((char *)info->shared_info_page) > PAGE_SIZE) {
+ pr_err("ASSERT: no mem for grefs\n");
+ goto exit;
+ }
+
+ /* allocate grefs for every tx ring and rx ring slot */
+ ret = gnttab_alloc_grant_references(RXTX_GREFS, &info->gref_rxtx_head);
+ if (ret < 0) {
+ pr_err("xen_uio can't alloc rx and tx grefs\n");
+ goto exit;
+ }
+
+ /* fill in all grefs*/
+ gref_rxtx_head = info->gref_rxtx_head;
+ info->shared_info_page->rx_grefs_count = RX_MAX_TARGET;
+ info->shared_info_page->tx_grefs_count = TX_MAX_TARGET;
+ info->shared_info_page->rx_evtchn = 0;
+ info->shared_info_page->tx_evtchn = 0;
+
+ /*go through the list and collect put all grefs to array*/
+ for (i = 0; i < (RXTX_GREFS); i++) {
+ gref = gnttab_claim_grant_reference(&gref_rxtx_head);
+ if (gref < 0) {
+ pr_err("not expected end of list\n");
+ goto exit;
+ }
+ info->shared_info_page->rxtx_grefs[i] = (grant_ref_t)gref;
+ }
+
+ /*setup shared_info_page*/
+ info->shared_info_page->rx_ring = &info->rx;
+ info->shared_info_page->tx_ring = &info->tx;
+ /*it's not secure - we need here something else*/
+ info->shared_info_page->info = info;
+
+ info->shared_info_page->is_connected = 0;
+ info->shared_info_page->disconnect_count = 0;
+
+ /* share struct by UIO */
+ ret = xennet_uio_init(xbdev, info);
+ if (ret) {
+ pr_err("xennet_uio_init failed\n");
+ goto exit;
+ }
+
+ return info;
+exit:
+ if (info) {
+ if (info->gref_rxtx_head != GRANT_INVALID_REF)
+ gnttab_free_grant_references(info->gref_rxtx_head);
+ if (info->shared_info_page)
+ free_page((unsigned long)info->shared_info_page);
+ if (info->rxs)
+ free_page((unsigned long)info->rxs);
+ if (info->txs)
+ free_page((unsigned long)info->txs);
+ kfree(info);
+ }
+ return NULL;
+}
+
+void xennet_free_resources(struct xenbus_device *xbdev)
+{
+ struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+ xennet_uio_uninit(info);
+
+ gnttab_free_grant_references(info->gref_rxtx_head);
+
+ free_page((unsigned long)info->shared_info_page);
+ /*can be deferred free- in that case these pointers are NULL*/
+ if (info->rxs)
+ free_page((unsigned long)info->rxs);
+ if (info->txs)
+ free_page((unsigned long)info->txs);
+
+ kfree(info);
+}
+
+static int setup_netfront(struct xenbus_device *xbdev,
+ struct netfront_info *info)
+{
+ unsigned int feature_split_evtchn;
+ int err;
+
+ info->tx_ring_ref = GRANT_INVALID_REF;
+ info->rx_ring_ref = GRANT_INVALID_REF;
+ info->rx.sring = NULL;
+ info->tx.sring = NULL;
+
+ /* share otherend_id with user */
+ info->shared_info_page->otherend_id = xbdev->otherend_id;
+
+ err = xenbus_scanf(XBT_NIL, xbdev->otherend,
+ "feature-split-event-channels", "%u",
+ &feature_split_evtchn);
+ if (err < 0)
+ feature_split_evtchn = 0;
+
+ /* read mac */
+ err = xen_net_read_mac(xbdev, info->shared_info_page->mac);
+ if (err) {
+ xenbus_dev_fatal(xbdev, err, "parsing %s/mac",
+ xbdev->nodename);
+ goto fail;
+ }
+
+ /* set up queues */
+ SHARED_RING_INIT(info->txs);
+ FRONT_RING_INIT(&info->tx, info->txs, PAGE_SIZE);
+
+ SHARED_RING_INIT(info->rxs);
+ FRONT_RING_INIT(&info->rx, info->rxs, PAGE_SIZE);
+
+ err = xenbus_grant_ring(info->xbdev, virt_to_mfn(info->txs));
+ if (err < 0) {
+ pr_err("xenbus_grant_ring for txs failed!\n");
+ goto fail;
+ }
+ info->tx_ring_ref = err;
+
+ err = xenbus_grant_ring(info->xbdev, virt_to_mfn(info->rxs));
+ if (err < 0) {
+ pr_err("xenbus_grant_ring for rxs failed!\n");
+ goto fail;
+ }
+ info->rx_ring_ref = err;
+
+ /* alloc eventchn */
+ pr_info("feature_split_evtchn: %d\n",
+ (int)feature_split_evtchn);
+
+ err = xenbus_alloc_evtchn(xbdev, &info->shared_info_page->tx_evtchn);
+ if (err)
+ goto fail;
+
+ if (feature_split_evtchn) {
+ err = xenbus_alloc_evtchn(xbdev,
+ &info->shared_info_page->rx_evtchn);
+ if (err)
+ goto fail_split;
+ } else {
+ info->shared_info_page->rx_evtchn =
+ info->shared_info_page->tx_evtchn;
+ }
+
+ return 0;
+fail_split:
+ xenbus_free_evtchn(info->xbdev, info->shared_info_page->tx_evtchn);
+fail:
+ pr_err("setup_netfront failed\n");
+ return err;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_netback(struct xenbus_device *xbdev,
+ struct netfront_info *info)
+{
+ const char *message;
+ struct xenbus_transaction xbt;
+ int err;
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_netfront(xbdev, info);
+ if (err)
+ goto out;
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(xbdev, err, "starting transaction");
+ goto destroy_ring;
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "tx-ring-ref",
+ "%u", info->tx_ring_ref);
+ if (err) {
+ message = "writing tx ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, xbdev->nodename, "rx-ring-ref",
+ "%u", info->rx_ring_ref);
+ if (err) {
+ message = "writing rx ring-ref";
+ goto abort_transaction;
+ }
+
+ if (info->shared_info_page->tx_evtchn ==
+ info->shared_info_page->rx_evtchn) {
+ err = xenbus_printf(xbt, xbdev->nodename, "event-channel",
+ "%u", info->shared_info_page->tx_evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+ } else {
+ err = xenbus_printf(xbt, xbdev->nodename, "event-channel-tx",
+ "%u", info->shared_info_page->tx_evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, xbdev->nodename, "event-channel-rx",
+ "%u", info->shared_info_page->rx_evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "request-rx-copy", "%u", 1);
+ if (err) {
+ message = "writing request-rx-copy";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "feature-rx-notify",
+ "%d", 1);
+ if (err) {
+ message = "writing feature-rx-notify";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "feature-sg", "%d", 1);
+ if (err) {
+ message = "writing feature-sg";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "feature-gso-tcpv4",
+ "%d", 1);
+ if (err) {
+ message = "writing feature-gso-tcpv4";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(xbdev, err, "completing transaction");
+ goto destroy_ring;
+ }
+
+ return 0;
+abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(xbdev, err, "%s", message);
+destroy_ring:
+ xennet_disconnect_backend(info, 1);
+out:
+ pr_err("talk_to_netback failed\n");
+ return err;
+}
+
+static int xennet_connect_backend(struct netfront_info *info)
+{
+ int err;
+ unsigned int feature_rx_copy;
+
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, "feature-rx-copy",
+ "%u", &feature_rx_copy);
+ if (err != 1)
+ feature_rx_copy = 0;
+
+ if (!feature_rx_copy) {
+ pr_info("backend does not support copying receive path\n");
+ return -ENODEV;
+ }
+
+ err = talk_to_netback(info->xbdev, info);
+ if (err)
+ pr_err("talk_to_netback failed!\n");
+
+ info->shared_info_page->is_connected = 1;
+
+ return err;
+}
+
+static void xennet_disconnect_backend(struct netfront_info *info,
+ int deffered_free)
+{
+ if (info->shared_info_page->tx_evtchn !=
+ info->shared_info_page->rx_evtchn) {
+ xenbus_free_evtchn(info->xbdev,
+ info->shared_info_page->rx_evtchn);
+ }
+ xenbus_free_evtchn(info->xbdev, info->shared_info_page->tx_evtchn);
+
+ if (deffered_free) {
+ xennet_end_access(info->tx_ring_ref, info->txs);
+ xennet_end_access(info->rx_ring_ref, info->rxs);
+ info->txs = NULL;
+ info->rxs = NULL;
+ } else {
+ xennet_end_access(info->tx_ring_ref, NULL);
+ xennet_end_access(info->rx_ring_ref, NULL);
+ }
+
+ info->tx_ring_ref = GRANT_INVALID_REF;
+ info->rx_ring_ref = GRANT_INVALID_REF;
+ info->rx.sring = NULL;
+ info->tx.sring = NULL;
+
+ info->shared_info_page->is_connected = 0;
+ info->shared_info_page->disconnect_count++;
+}
+
+
+/**
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those.
+ */
+static int xennet_probe(struct xenbus_device *xbdev,
+ const struct xenbus_device_id *id)
+{
+ struct netfront_info *info;
+
+ info = xennet_alloc_resources(xbdev);
+
+ dev_set_drvdata(&xbdev->dev, info);
+
+ return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart. We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int xennet_resume(struct xenbus_device *xbdev)
+{
+ struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+ pr_devel("%s\n", xbdev->nodename);
+
+ /*we can use the same memory region - disable deffered free*/
+ xennet_disconnect_backend(info, 0);
+
+ return 0;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void netback_changed(struct xenbus_device *xbdev,
+ enum xenbus_state backend_state)
+{
+ struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+ pr_devel("%s\n", xenbus_strstate(backend_state));
+
+ switch (backend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitialised:
+ case XenbusStateReconfiguring:
+ case XenbusStateReconfigured:
+ break;
+ case XenbusStateUnknown:
+ break;
+
+ case XenbusStateInitWait:
+ if (xbdev->state != XenbusStateInitialising)
+ break;
+ if (xennet_connect_backend(info) != 0) {
+ pr_err("%s\n", xbdev->nodename);
+ break;
+ }
+ xenbus_switch_state(xbdev, XenbusStateConnected);
+ break;
+
+ case XenbusStateConnected:
+ break;
+
+ case XenbusStateClosed:
+ if (xbdev->state == XenbusStateClosed) {
+ xenbus_switch_state(xbdev, XenbusStateInitialising);
+ break;
+ }
+
+ case XenbusStateClosing:
+ xenbus_frontend_closed(xbdev);
+ break;
+ }
+}
+
+static const struct xenbus_device_id netfront_ids[] = {
+ { "vif" },
+ { "" }
+};
+
+static int xennet_remove(struct xenbus_device *xbdev)
+{
+ struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+ pr_devel("%s\n", xbdev->nodename);
+
+ xennet_disconnect_backend(info, 1);
+
+ xennet_free_resources(xbdev);
+
+ return 0;
+}
+
+static struct xenbus_driver xenuio_driver = {
+ .ids = netfront_ids,
+ .probe = xennet_probe,
+ .remove = xennet_remove,
+ .resume = xennet_resume,
+ .otherend_changed = netback_changed,
+};
+
+/*operations that we can't do through the shared memory*/
+static long xennet_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg) {
+ int rc;
+ void __user *uarg = (void __user *) arg;
+
+ switch (cmd) {
+ case IOCTL_EVTCHN_NOTIFY:
+ {
+ struct ioctl_evtchn_notify notify;
+
+ rc = -EFAULT;
+ if (copy_from_user(¬ify, uarg, sizeof(notify)))
+ break;
+ notify_remote_via_evtchn(notify.port);
+ rc = 0;
+ }
+ break;
+ case IOCTL_EVTCHN_NOTIFY_GRANT:
+ {
+ uint16_t i;
+ int notify;
+ struct ioctl_evtchn_notify_grant *ng;
+
+ rc = -EFAULT;
+
+ if (access_ok(VERIFY_READ, uarg, sizeof(ng)))
+ ng = uarg;
+ else
+ break;
+
+ for (i = 0; i < ng->rel_count; i++) {
+ gnttab_end_foreign_access_ref(ng->rel_gref[i],
+ 0);
+ }
+
+ if (ng->count) {
+ union {
+ struct xen_netif_rx_front_ring *rx;
+ struct xen_netif_tx_front_ring *tx;
+ } ring;
+
+ for (i = 0; i < ng->count; i++) {
+ gnttab_grant_foreign_access_ref(
+ ng->s[i].gref,
+ ng->otherend_id,
+ pfn_to_mfn(ng->s[i].paddr),
+ (!ng->is_rx));
+ }
+
+ if (ng->is_rx) {
+ ring.rx = ng->u.rx_ring;
+ if (&ng->info->rx != ring.rx) {
+ pr_err(
+ "bad info or rx ring addr\n");
+ return -(ENOSYS);
+ }
+ ring.rx->req_prod_pvt += ng->count;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(
+ ring.rx, notify);
+ } else {
+ ring.tx = ng->u.tx_ring;
+ if (&ng->info->tx != ring.tx) {
+ pr_err(
+ "bad info or tx ring addr\n");
+ return -(ENOSYS);
+ }
+ ring.tx->req_prod_pvt += ng->count;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(
+ ring.tx, notify);
+ }
+
+ if (notify)
+ notify_remote_via_evtchn(ng->port);
+ }
+
+ rc = 0;
+ }
+ break;
+ default:
+ rc = -ENOSYS;
+ break;
+ }
+ return rc;
+}
+
+static const struct file_operations xennet_fops = {
+ .owner = THIS_MODULE,
+ .read = NULL/*xennet_read*/,
+ .write = NULL/*xennet_write*/,
+ .unlocked_ioctl = xennet_ioctl,
+ .poll = NULL/*xennet_poll*/,
+ .fasync = NULL/*xennet_fasync*/,
+ .open = NULL/*xennet_open*/,
+ .mmap = NULL/*xennet_mmap*/,
+ .release = NULL/*xennet_release*/,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice xennet_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = XEN_PMD_UIO_NAME,
+ .fops = &xennet_fops,
+};
+
+static ssize_t read_domain(struct file *f, char __user *buf,
+ size_t count, loff_t *off)
+{
+ if (count > domain_len)
+ count = domain_len;
+
+ if (copy_to_user(buf, domain_name, count))
+ return -EFAULT;
+
+ domain_len = (count ? domain_len - count : sizeof(domain_name));
+
+ return count;
+}
+
+static const struct file_operations domain_fops = {
+ .owner = THIS_MODULE,
+ .read = read_domain,
+};
+
+static int __init netif_init(void)
+{
+ int err;
+
+ if (!xen_domain()) {
+ pr_err(KERN_INFO "xen bare hw\n");
+ return -ENODEV;
+ }
+
+ pr_info("xen %s domain\n", domains[xen_domain_type]);
+
+ snprintf(domain_name, sizeof(domain_name),
+ "%s\n", domains[xen_domain_type]);
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ pr_info("feature auto_translated_physmap is disabled\n");
+
+ pr_info("gnttab version: %d\n", (int)__gnttab_version());
+
+ domain_proc = proc_create(DOMAIN_PROC, S_IRUGO, NULL, &domain_fops);
+ if (domain_proc == NULL) {
+ pr_err("could not create /proc/%s\n", DOMAIN_PROC);
+ return -ENOMEM;
+ }
+
+ pr_info("/proc/%s created\n", DOMAIN_PROC);
+
+ err = misc_register(&xennet_miscdev);
+ if (err != 0) {
+ pr_err("could not register char device\n");
+ return err;
+ }
+
+ pr_info("initialising xen virtual ethernet driver\n");
+
+ err = xenbus_register_frontend(&xenuio_driver);
+
+ return err;
+}
+module_init(netif_init);
+
+static void __exit netif_exit(void)
+{
+ remove_proc_entry(DOMAIN_PROC, NULL);
+
+ xenbus_unregister_driver(&xenuio_driver);
+
+ misc_deregister(&xennet_miscdev);
+}
+module_exit(netif_exit);
+
+MODULE_DESCRIPTION("Xen virtual network device frontend");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:vif");
+MODULE_ALIAS("xennet");
--
2.1.4
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 5/5] xen: net-front poll mode driver
2015-02-15 15:24 [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Stephen Hemminger
` (2 preceding siblings ...)
2015-02-15 15:24 ` [dpdk-dev] [PATCH 4/5] xen: add uio driver Stephen Hemminger
@ 2015-02-15 15:24 ` Stephen Hemminger
2015-07-09 0:10 ` [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Thomas Monjalon
4 siblings, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2015-02-15 15:24 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
This driver implements DPDK driver that has the same functionality
as net-front driver in Linux kernel.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
v2 -- no changes
config/common_linuxapp | 6 +
lib/Makefile | 1 +
lib/librte_pmd_xen/Makefile | 30 ++
lib/librte_pmd_xen/virt_dev.c | 400 +++++++++++++++++++++++++
lib/librte_pmd_xen/virt_dev.h | 30 ++
lib/librte_pmd_xen/xen_adapter_info.h | 64 ++++
lib/librte_pmd_xen/xen_dev.c | 375 +++++++++++++++++++++++
lib/librte_pmd_xen/xen_dev.h | 97 ++++++
lib/librte_pmd_xen/xen_logs.h | 23 ++
lib/librte_pmd_xen/xen_rxtx.c | 546 ++++++++++++++++++++++++++++++++++
lib/librte_pmd_xen/xen_rxtx.h | 110 +++++++
mk/rte.app.mk | 4 +
12 files changed, 1686 insertions(+)
create mode 100644 lib/librte_pmd_xen/Makefile
create mode 100644 lib/librte_pmd_xen/virt_dev.c
create mode 100644 lib/librte_pmd_xen/virt_dev.h
create mode 100644 lib/librte_pmd_xen/xen_adapter_info.h
create mode 100644 lib/librte_pmd_xen/xen_dev.c
create mode 100644 lib/librte_pmd_xen/xen_dev.h
create mode 100644 lib/librte_pmd_xen/xen_logs.h
create mode 100644 lib/librte_pmd_xen/xen_rxtx.c
create mode 100644 lib/librte_pmd_xen/xen_rxtx.h
diff --git a/config/common_linuxapp b/config/common_linuxapp
index d428f84..668fc8d 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -232,6 +232,12 @@ CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
#
+# Compile XEN net-front PMD driver
+#
+CONFIG_RTE_LIBRTE_XEN_PMD=n
+CONFIG_RTE_LIBRTE_XEN_DEBUG_INIT=n
+
+#
# Do prefetch of packet data within PMD driver receive function
#
CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/lib/Makefile b/lib/Makefile
index d617d81..f405e40 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -52,6 +52,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += librte_pmd_af_packet
DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += librte_pmd_xen
DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
diff --git a/lib/librte_pmd_xen/Makefile b/lib/librte_pmd_xen/Makefile
new file mode 100644
index 0000000..d294d03
--- /dev/null
+++ b/lib/librte_pmd_xen/Makefile
@@ -0,0 +1,30 @@
+#
+# Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+# All rights reserved.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_xen.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+VPATH += $(RTE_SDK)/lib/librte_pmd_xen
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += virt_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += xen_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += xen_rxtx.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += lib/librte_net lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_xen/virt_dev.c b/lib/librte_pmd_xen/virt_dev.c
new file mode 100644
index 0000000..f824977
--- /dev/null
+++ b/lib/librte_pmd_xen/virt_dev.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "virt_dev.h"
+
+struct uio_map {
+ void *addr;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t phaddr;
+};
+
+struct uio_resource {
+ TAILQ_ENTRY(uio_resource) next;
+ struct rte_pci_addr pci_addr;
+ char path[PATH_MAX];
+ size_t nb_maps;
+ struct uio_map maps[PCI_MAX_RESOURCE];
+};
+
+static int
+virt_parse_sysfs_value(const char *filename, uint64_t *val)
+{
+ FILE *f;
+ char buf[BUFSIZ];
+ char *end = NULL;
+
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "cannot open sysfs value %s", filename);
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "cannot read sysfs value %s", filename);
+ fclose(f);
+ return -1;
+ }
+
+ *val = strtoull(buf, &end, 0);
+ if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+ RTE_LOG(ERR, EAL, "cannot parse sysfs value %s", filename);
+ fclose(f);
+ return -1;
+ }
+
+ fclose(f);
+ return 0;
+}
+
+#define OFF_MAX ((uint64_t)(off_t)-1)
+static ssize_t
+virt_uio_get_mappings(const char *devname, struct uio_map maps[],
+ size_t nb_maps)
+{
+ size_t i;
+ char dirname[PATH_MAX];
+ char filename[PATH_MAX];
+ uint64_t offset, size;
+
+ for (i = 0; i != nb_maps; i++) {
+
+ snprintf(dirname, sizeof(dirname),
+ "%s/maps/map%zu", devname, i);
+
+ if (access(dirname, F_OK) != 0)
+ break;
+
+ snprintf(filename, sizeof(filename), "%s/offset", dirname);
+ if (virt_parse_sysfs_value(filename, &offset) < 0) {
+ RTE_LOG(ERR, EAL, "cannot parse offset of %s",
+ dirname);
+ return -1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s/size", dirname);
+ if (virt_parse_sysfs_value(filename, &size) < 0) {
+ RTE_LOG(ERR, EAL, "cannot parse size of %s", dirname);
+ return -1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s/addr", dirname);
+ if (virt_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
+ RTE_LOG(ERR, EAL, "cannot parse addr of %s", dirname);
+ return -1;
+ }
+
+ if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+ RTE_LOG(ERR, EAL,
+ "offset/size exceed system max value");
+ return -1;
+ }
+
+ maps[i].offset = offset;
+ maps[i].size = size;
+ }
+
+ return i;
+}
+
+static void *
+virt_map_resource(void *requested_addr, const char *devname, off_t offset,
+ size_t size)
+{
+ int fd;
+ void *mapaddr;
+
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s",
+ devname, strerror(errno));
+ return NULL;
+ }
+
+ mapaddr = mmap(0, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, offset);
+ if (mapaddr == MAP_FAILED || (requested_addr != NULL &&
+ mapaddr != requested_addr)) {
+ RTE_LOG(ERR, EAL,
+ "cannot mmap(%s(%d), %p, 0x%lx, 0x%lx): %s (%p)",
+ devname, fd, requested_addr,
+ (unsigned long)size, (unsigned long)offset,
+ strerror(errno), mapaddr);
+ close(fd);
+ return NULL;
+ }
+
+ RTE_LOG(DEBUG, EAL, "memory mapped at %p", mapaddr);
+
+ return mapaddr;
+}
+
+void
+virt_uio_unmap_addresses(void **addresses, size_t *lens, int max_addresses)
+{
+ int j;
+
+ for (j = 0; j < max_addresses; j++) {
+ if (addresses[j] && lens[j]) {
+ munmap(addresses[j], lens[j]);
+ RTE_LOG(DEBUG, EAL, "memory umnmapped %p %d",
+ addresses[j], (int)lens[j]);
+ }
+ }
+}
+
+int
+virt_uio_map_addresses(const char *dirname, void **addresses, size_t *lens,
+ int max_addresses)
+{
+ int j;
+ DIR *dir;
+ struct dirent *e;
+ char dirname2[PATH_MAX];
+ char devname[PATH_MAX];
+ unsigned uio_num;
+ struct uio_resource *uio_res;
+ struct uio_map *maps;
+ uint64_t pagesz;
+ ssize_t nb_maps;
+ uint64_t offset;
+ void *mapaddr;
+
+ RTE_LOG(DEBUG, EAL, "dirname %s", dirname);
+
+ dir = opendir(dirname);
+
+ if (!dir) {
+ RTE_LOG(ERR, EAL, "Cannot opendir %s", dirname);
+ return -1;
+ }
+
+ while ((e = readdir(dir)) != NULL) {
+
+ int shortprefix_len = sizeof("uio") - 1;
+ char *endptr;
+
+ if (strncmp(e->d_name, "uio", 3) != 0)
+ continue;
+
+ errno = 0;
+ uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+ if (errno == 0 && endptr != e->d_name) {
+ snprintf(dirname2, sizeof(dirname2), "%s/uio%u",
+ dirname, uio_num);
+ break;
+ }
+ }
+ closedir(dir);
+
+ if (!e) {
+ RTE_LOG(ERR, EAL, "dirname %s not managed, skipping",
+ dirname);
+ return -1;
+ }
+
+ uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
+ if (uio_res == NULL) {
+ RTE_LOG(ERR, EAL, "cannot store uio mmap details");
+ return -1;
+ }
+
+ snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+ snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+
+ nb_maps = virt_uio_get_mappings(dirname2, uio_res->maps,
+ sizeof(uio_res->maps) / sizeof(uio_res->maps[0]));
+ if (nb_maps < 0)
+ return nb_maps;
+
+ uio_res->nb_maps = nb_maps;
+ pagesz = sysconf(_SC_PAGESIZE);
+ maps = uio_res->maps;
+
+ for (j = 0; j < nb_maps && j < max_addresses; j++) {
+ offset = j * pagesz;
+ mapaddr = virt_map_resource(NULL, devname,
+ (off_t)offset, (size_t)maps[j].size);
+ if (maps[j].addr || !mapaddr)
+ return -1;
+ maps[j].addr = mapaddr;
+ maps[j].offset = offset;
+ addresses[j] = mapaddr;
+ lens[j] = (size_t)maps[j].size;
+ }
+
+ return 0;
+}
+
+static struct
+rte_eth_dev *virt_eth_dev_allocate(const char *name,
+ struct eth_driver *eth_drv,
+ unsigned dev_private_size)
+{
+ struct rte_eth_dev *eth_dev;
+
+ eth_dev = rte_eth_dev_allocate(name);
+ if (!eth_dev) {
+ RTE_LOG(ERR, EAL, "virt eth_dev allocation was failed (%d)",
+ ENOMEM);
+ return NULL;
+ }
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ eth_dev->data->dev_private =
+ rte_zmalloc("eth_dev private data structure",
+ dev_private_size, RTE_CACHE_LINE_SIZE);
+ if (!eth_dev->data->dev_private)
+ rte_panic("virt eth_dev private data allocation was failed\n");
+ }
+
+ eth_dev->driver = eth_drv;
+ eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+ TAILQ_INIT(&(eth_dev->callbacks));
+
+ return eth_dev;
+}
+
+static int
+virt_eth_dev_init(const char *name,
+ struct virt_eth_driver *virt_eth_drv,
+ const char *dirname)
+{
+ int err = -ENOMEM;
+ struct rte_eth_dev *eth_dev;
+ struct eth_driver *eth_drv = &virt_eth_drv->eth_driver;
+ struct rte_pci_device *dev;
+
+ dev = malloc(sizeof(*dev));
+ if (dev == NULL)
+ goto error;
+
+ eth_dev = virt_eth_dev_allocate(name, eth_drv, eth_drv->dev_private_size);
+ if (!eth_dev)
+ goto error;
+
+ dev->numa_node = -1;
+ dev->driver = ð_drv->pci_drv;
+ eth_dev->pci_dev = dev;
+
+ if (eth_drv->eth_dev_init) {
+ err = (*eth_drv->eth_dev_init)(eth_drv, eth_dev);
+ if (err) {
+ RTE_LOG(ERR, EAL, "eth_dev_init was failed (%d)", err);
+ goto error;
+ }
+ }
+
+ if (virt_eth_drv->virt_eth_dev_init) {
+ err = (*virt_eth_drv->virt_eth_dev_init)(virt_eth_drv, eth_dev,
+ dirname);
+ if (err) {
+ RTE_LOG(ERR, EAL, "virt eth_dev_init was failed (%d)",
+ err);
+ goto error;
+ }
+ }
+
+ return 0;
+error:
+ free(dev);
+ return err;
+}
+
+#define PROC_MODULES "/proc/modules"
+static int
+virt_uio_check_module(const char *module_name)
+{
+ FILE *f;
+ unsigned i;
+ char buf[BUFSIZ];
+
+ if (module_name == NULL)
+ return 0;
+
+ f = fopen(PROC_MODULES, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot open "PROC_MODULES": %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+
+ for (i = 0; i < sizeof(buf) && buf[i] != '\0'; i++) {
+ if (isspace(buf[i]))
+ buf[i] = '\0';
+ }
+
+ if (strncmp(buf, module_name, sizeof(buf)) == 0) {
+ fclose(f);
+ return 0;
+ }
+ }
+
+ fclose(f);
+ return -1;
+}
+
+int
+virt_eth_driver_register(struct virt_eth_driver *virt_eth_drv)
+{
+ struct dirent *e;
+ DIR *dir;
+ char dirname[PATH_MAX];
+
+ if (virt_eth_drv->module_name) {
+ RTE_LOG(DEBUG, EAL, "module name: \"%s\", driver name: \"%s\"",
+ virt_eth_drv->module_name,
+ virt_eth_drv->eth_driver.pci_drv.name);
+
+ if (virt_uio_check_module(virt_eth_drv->module_name) != 0) {
+ RTE_LOG(ERR, EAL, "The %s is required by %s driver\n",
+ virt_eth_drv->module_name,
+ virt_eth_drv->eth_driver.pci_drv.name);
+ return -1;
+ }
+ }
+
+ dir = opendir(virt_eth_drv->sysfs_unbind_dir);
+ if (dir == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", __func__,
+ strerror(errno));
+ return -1;
+ }
+
+ while ((e = readdir(dir)) != NULL) {
+ if (e->d_name[0] == '.')
+ continue;
+
+ /*create or not*/
+ if (!(virt_eth_drv->is_eth_device_dir(e->d_name)))
+ continue;
+
+ snprintf(dirname, sizeof(dirname), "%s/%s/uio",
+ virt_eth_drv->sysfs_unbind_dir, e->d_name);
+ if (virt_eth_dev_init(e->d_name, virt_eth_drv, dirname) < 0)
+ goto error;
+ }
+ closedir(dir);
+ return 0;
+
+error:
+ closedir(dir);
+ return -1;
+}
diff --git a/lib/librte_pmd_xen/virt_dev.h b/lib/librte_pmd_xen/virt_dev.h
new file mode 100644
index 0000000..73223ee
--- /dev/null
+++ b/lib/librte_pmd_xen/virt_dev.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _VIRT_ETHDEV_H_
+#define _VIRT_ETHDEV_H_
+
+struct virt_eth_driver;
+
+typedef int (*virt_is_eth_device_dir_t)(const char *dir);
+typedef int (*virt_eth_dev_init_t)(struct virt_eth_driver *virt_eth_drv,
+ struct rte_eth_dev *dev, const char *dirname);
+
+struct virt_eth_driver {
+ struct eth_driver eth_driver;
+ const char *sysfs_bind_dir;
+ const char *sysfs_unbind_dir;
+ virt_is_eth_device_dir_t is_eth_device_dir;
+ virt_eth_dev_init_t virt_eth_dev_init;
+ const char *module_name;
+};
+
+int virt_eth_driver_register(struct virt_eth_driver *virt_eth_drv);
+int virt_uio_map_addresses(const char *dirname, void **addresses,
+ size_t *lens, int max_addresses);
+void virt_uio_unmap_addresses(void **addresses,
+ size_t *lens, int max_addresses);
+
+#endif /* _VIRT_ETHDEV_H_ */
diff --git a/lib/librte_pmd_xen/xen_adapter_info.h b/lib/librte_pmd_xen/xen_adapter_info.h
new file mode 100644
index 0000000..15d71ac
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_adapter_info.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef XEN_ADAPTER_INFO_H_
+#define XEN_ADAPTER_INFO_H_
+
+#define MAX_TARGET 256
+
+#define IOCTL_EVTCHN_NOTIFY_GRANT 7
+
+struct gref_addr {
+ grant_ref_t gref;
+ unsigned long paddr;
+};
+
+struct ioctl_evtchn_notify_grant {
+ unsigned int port;
+ int otherend_id;
+ uint16_t count;
+ uint8_t is_rx;
+ union {
+ struct xen_netif_rx_front_ring *rx_ring;
+ struct xen_netif_tx_front_ring *tx_ring;
+ } u;
+ struct netfront_info *info;
+ uint16_t rel_count;
+ grant_ref_t rel_gref[MAX_TARGET];
+ struct gref_addr s[MAX_TARGET];
+};
+
+#define XEN_PMD_UIO_NAME "xen/pmd_uio"
+
+enum {
+ INFO_MAP = 0,
+ RX_RING_MAP,
+ TX_RING_MAP,
+ XEN_MAP_MAX
+};
+
+struct xen_adapter_info {
+ /*global parameters */
+ struct xen_netif_rx_front_ring *rx_ring;
+ struct xen_netif_tx_front_ring *tx_ring;
+ struct netfront_info *info;
+
+ uint8_t is_connected;
+ uint8_t disconnect_count;
+
+ /*adapter specific data*/
+ int otherend_id;
+ unsigned int rx_evtchn;
+ unsigned int tx_evtchn;
+ u_int8_t mac[6];
+
+ /*params of grefs array*/
+ uint16_t rx_grefs_count;
+ uint16_t tx_grefs_count;
+ /* this field has to be the last */
+ grant_ref_t rxtx_grefs[];
+};
+
+#endif /* XEN_ADAPTER_INFO_H_ */
diff --git a/lib/librte_pmd_xen/xen_dev.c b/lib/librte_pmd_xen/xen_dev.c
new file mode 100644
index 0000000..a098cca
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_dev.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "virt_dev.h"
+
+#include <stdio.h>
+
+#include <sys/ioctl.h>
+#include <xen/sys/evtchn.h>
+
+#define XEN_MAX_RX_PKTLEN 0xFFFF
+#define XEN_MIN_RX_BUFSIZE (2 * PAGE_SIZE)
+
+static int xen_evt_fd = -1;
+
+void
+xen_set_rx_ng(struct xen_rx_queue *rxq)
+{
+ rxq->ng_rx.port = rxq->xa->info_page->rx_evtchn;
+ rxq->ng_rx.info = rxq->xa->info_page->info;
+ rxq->ng_rx.u.rx_ring = rxq->xa->info_page->rx_ring;
+ rxq->ng_rx.otherend_id = rxq->xa->info_page->otherend_id;
+}
+
+void
+xen_set_tx_ng(struct xen_tx_queue *txq)
+{
+ txq->ng_tx.port = txq->xa->info_page->tx_evtchn;
+ txq->ng_tx.info = txq->xa->info_page->info;
+ txq->ng_tx.u.tx_ring = txq->xa->info_page->tx_ring;
+ txq->ng_tx.otherend_id = txq->xa->info_page->otherend_id;
+}
+
+static int
+xen_evtchn_notify_grant_rxtx(struct ioctl_evtchn_notify_grant *ng)
+{
+ int rc;
+
+ rc = ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY_GRANT, ng);
+ if (rc)
+ rc = errno;
+
+ return rc;
+}
+
+int
+xen_evtchn_notify_grant_rx(struct xen_rx_queue *rxq)
+{
+ if (likely(xen_evt_fd >= 0)) {
+
+ xen_set_rx_ng(rxq);
+
+ return xen_evtchn_notify_grant_rxtx(&rxq->ng_rx);
+ }
+
+ return -1;
+}
+
+int
+xen_evtchn_notify_grant_tx(struct xen_tx_queue *txq)
+{
+ if (likely(xen_evt_fd >= 0)) {
+
+ xen_set_tx_ng(txq);
+
+ return xen_evtchn_notify_grant_rxtx(&txq->ng_tx);
+
+ }
+
+ return -1;
+}
+
+static int
+xen_evtchn_notify_rxtx(unsigned int evtchn)
+{
+ struct ioctl_evtchn_notify notify = { .port = evtchn };
+
+ if (xen_evt_fd >= 0)
+ return ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY, ¬ify);
+
+ return -1;
+}
+
+static int
+xen_evtchn_notify(struct xen_adapter *xa)
+{
+ int res = 0;
+
+ res += xen_evtchn_notify_rxtx(xa->info_page->tx_evtchn);
+
+ if (xa->info_page->tx_evtchn != xa->info_page->rx_evtchn)
+ res += xen_evtchn_notify_rxtx(xa->info_page->rx_evtchn);
+
+ return res;
+}
+
+static void
+xen_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+ unsigned i;
+
+ PMD_INIT_FUNC_TRACE();
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+ dev->data->tx_queues[i])->tx_stats;
+ if (NULL != txs) {
+ stats->opackets += txs->opackets;
+ stats->obytes += txs->obytes;
+ stats->oerrors += txs->oerrors;
+ } else {
+ continue;
+ }
+ }
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+ dev->data->rx_queues[i])->rx_stats;
+ if (NULL != rxs) {
+ stats->ipackets += rxs->ipackets;
+ stats->ierrors += rxs->ierrors;
+ stats->ibytes += rxs->ibytes;
+ } else {
+ continue;
+ }
+ }
+}
+
+static void
+xen_dev_stats_reset(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ PMD_INIT_FUNC_TRACE();
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+ dev->data->tx_queues[i])->tx_stats;
+ if (NULL != txs) {
+ txs->opackets = 0;
+ txs->obytes = 0;
+ txs->oerrors = 0;
+ } else {
+ continue;
+ }
+ }
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+ dev->data->rx_queues[i])->rx_stats;
+ if (NULL != rxs) {
+ rxs->ipackets = 0;
+ rxs->ibytes = 0;
+ rxs->ierrors = 0;
+ } else {
+ continue;
+ }
+ }
+}
+
+static void
+xen_dev_info_get(__attribute__((unused)) struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *dev_info)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ dev_info->max_rx_queues = (uint16_t)1;
+ dev_info->max_tx_queues = (uint16_t)1;
+ dev_info->max_mac_addrs = 1;
+ dev_info->min_rx_bufsize = XEN_MIN_RX_BUFSIZE;
+ dev_info->max_rx_pktlen = XEN_MAX_RX_PKTLEN;
+}
+
+static int
+xen_dev_configure(__attribute__((unused)) struct rte_eth_dev *dev)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ return 0;
+}
+
+static void
+xen_dev_close(__attribute__((unused)) struct rte_eth_dev *dev)
+{
+ PMD_INIT_FUNC_TRACE();
+}
+
+static int
+_xen_is_eth_device_dir(const char *dir)
+{
+ int devid;
+
+ return sscanf(dir, "vif-%d", &devid) == 1;
+}
+
+/**
+ * Atomically writes the link status information into global
+ * structure rte_eth_dev.
+ *
+ * @param dev
+ * - Pointer to the structure rte_eth_dev to read from.
+ * - Pointer to the buffer to be saved with the link status.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, negative value.
+ */
+static inline int
+xen_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+ struct rte_eth_link *link)
+{
+ struct rte_eth_link *dst = &(dev->data->dev_link);
+ struct rte_eth_link *src = link;
+
+ if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+ *(uint64_t *)src) == 0)
+ return -1;
+
+ return 0;
+}
+
+static int
+xen_dev_link_update(struct rte_eth_dev *dev,
+ __attribute__((unused)) int wait_to_complete)
+{
+ struct rte_eth_link link;
+
+ PMD_INIT_FUNC_TRACE();
+
+ link.link_status = 1;
+ link.link_speed = ETH_LINK_SPEED_1000;
+ link.link_duplex = ETH_LINK_FULL_DUPLEX;
+
+ xen_dev_atomic_write_link_status(dev, &link);
+
+ return 0;
+}
+
+static int
+xen_dev_start(struct rte_eth_dev *dev)
+{
+ struct xen_adapter *xa = VA_XA(dev);
+
+ PMD_INIT_FUNC_TRACE();
+
+ xen_dev_link_update(dev, 0);
+
+ xen_evtchn_notify(xa);
+
+ return 0;
+}
+
+static void
+xen_dev_stop(__attribute__((unused)) struct rte_eth_dev *dev)
+{
+ PMD_INIT_FUNC_TRACE();
+}
+
+static int
+wait_uio_init(uint8_t *state, const uint32_t timeout)
+{
+ uint32_t i;
+
+ for (i = 0; i < timeout * 10; i++) {
+ if (*state)
+ return 0;
+ usleep(100000);
+ }
+
+ return -1;
+}
+
+static struct eth_dev_ops xen_eth_dev_ops = {
+ /*dev*/
+ .dev_configure = xen_dev_configure,
+ .dev_close = xen_dev_close,
+ .dev_start = xen_dev_start,
+ .dev_stop = xen_dev_stop,
+ .dev_infos_get = xen_dev_info_get,
+ .link_update = xen_dev_link_update,
+ /*rxtx*/
+ .stats_get = xen_dev_stats_get,
+ .stats_reset = xen_dev_stats_reset,
+ .rx_queue_setup = xen_dev_rx_queue_setup,
+ .rx_queue_release = xen_dev_rx_queue_release,
+ .tx_queue_setup = xen_dev_tx_queue_setup,
+ .tx_queue_release = xen_dev_tx_queue_release,
+};
+
+static int
+xen_dev_init(struct virt_eth_driver *virt_eth_drv __attribute__((unused)),
+ struct rte_eth_dev *eth_dev, const char *dirname)
+{
+ int err = 0;
+
+ struct xen_adapter *xa = VA_XA(eth_dev);
+
+ PMD_INIT_FUNC_TRACE();
+
+ err = virt_uio_map_addresses(dirname, xa->uio_res, xa->uio_len,
+ XEN_MAP_MAX);
+ if (err != 0) {
+ PMD_INIT_LOG(ERR, "virt_uio_map_addresses failed (%d)", err);
+ return -1;
+ }
+
+ eth_dev->dev_ops = &xen_eth_dev_ops;
+
+ xa->info_page =
+ (struct xen_adapter_info *)xa->uio_res[INFO_MAP];
+
+ if (wait_uio_init(&xa->info_page->is_connected, 3)) {
+ PMD_INIT_LOG(ERR, "no connection to xen_netback");
+ virt_uio_unmap_addresses(xa->uio_res, xa->uio_len,
+ XEN_MAP_MAX);
+ return -1;
+ }
+
+ PMD_INIT_LOG(DEBUG, "rx: %d,rx_evtchn: %d,tx: %d,tx_evtchn: %d",
+ (int)xa->info_page->rx_grefs_count,
+ (int)xa->info_page->rx_evtchn,
+ (int)xa->info_page->tx_grefs_count,
+ (int)xa->info_page->tx_evtchn);
+
+ /* copy mac-addr */
+ eth_dev->data->mac_addrs = rte_malloc("xen", ETHER_ADDR_LEN, 0);
+ memcpy(ð_dev->data->mac_addrs->addr_bytes[0],
+ &xa->info_page->mac[0], ETHER_ADDR_LEN);
+
+ return 0;
+}
+
+static struct virt_eth_driver rte_xen_pmd = {
+ .eth_driver = {
+ .pci_drv = {
+ .name = "rte_xen_pmd",
+ .id_table = NULL,
+ },
+ .dev_private_size = sizeof(struct xen_adapter),
+ },
+ .sysfs_unbind_dir = "/sys/bus/xen/devices",
+ .sysfs_bind_dir = "/sys/bus/xen/drivers",
+ .is_eth_device_dir = _xen_is_eth_device_dir,
+ .virt_eth_dev_init = xen_dev_init,
+ .module_name = "xen_uio",
+};
+
+static int
+rte_xen_pmd_init(const char *name __rte_unused,
+ const char *param __rte_unused)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ xen_evt_fd = open("/dev/"XEN_PMD_UIO_NAME, O_RDWR);
+
+ if (xen_evt_fd == -1) {
+ if (errno != ENOENT)
+ PMD_INIT_LOG(ERR, "cannot open event device %s",
+ "/dev/"XEN_PMD_UIO_NAME);
+ return 0;
+ }
+
+ return virt_eth_driver_register(&rte_xen_pmd);
+}
+
+static struct rte_driver rte_xen_driver = {
+ .type = PMD_PDEV,
+ .init = rte_xen_pmd_init,
+};
+
+PMD_REGISTER_DRIVER(rte_xen_driver);
diff --git a/lib/librte_pmd_xen/xen_dev.h b/lib/librte_pmd_xen/xen_dev.h
new file mode 100644
index 0000000..b54287c
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_dev.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_ETHDEV_H_
+#define _XEN_ETHDEV_H_
+
+#include <assert.h>
+#include <sys/user.h>
+#include <inttypes.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <xen/io/netif.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+#include <rte_memzone.h>
+#include <rte_dev.h>
+
+#include "xen_logs.h"
+
+#include "xen_adapter_info.h"
+
+typedef uint64_t u64;
+
+#undef PAGE_SIZE
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+
+#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT))
+#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT)
+
+#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
+#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
+
+#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
+
+#if __XEN_LATEST_INTERFACE_VERSION__ > 0x0003020a
+
+#define FRONT_RING_ATTACH(_r, _s, __size) do { \
+ (_r)->sring = (_s); \
+ (_r)->req_prod_pvt = (_s)->req_prod; \
+ (_r)->rsp_cons = (_s)->rsp_prod; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+} while (0)
+
+#endif
+
+#define VA_XA(eth_dev) \
+ (struct xen_adapter *)((eth_dev->data->dev_private))
+
+#define min_t(t, x, y) ({ \
+ t _x = (x); \
+ t _y = (y); \
+ _x > _y ? _x : _y; })
+
+struct xen_adapter {
+ /* it's a place for all uio resources */
+ void *uio_res[XEN_MAP_MAX];
+ size_t uio_len[XEN_MAP_MAX];
+
+ /*pointer to the info page*/
+ struct xen_adapter_info *info_page;
+
+ void **rx_queues;
+ void **tx_queues;
+};
+
+#include "xen_rxtx.h"
+
+void xen_set_rx_ng(struct xen_rx_queue *rxq);
+void xen_set_tx_ng(struct xen_tx_queue *txq);
+int xen_evtchn_notify_grant_rx(struct xen_rx_queue *rxq);
+int xen_evtchn_notify_grant_tx(struct xen_tx_queue *txq);
+
+/*rx*/
+int xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+ uint16_t nb_rx_desc, unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mb_pool);
+void xen_dev_rx_queue_release(void *rxq);
+uint16_t xen_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
+/*tx*/
+int xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+ uint16_t nb_tx_desc, unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf);
+void xen_dev_tx_queue_release(void *txq);
+
+#endif /* _XEN_ETHDEV_H_ */
diff --git a/lib/librte_pmd_xen/xen_logs.h b/lib/librte_pmd_xen/xen_logs.h
new file mode 100644
index 0000000..2334db0
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_logs.h
@@ -0,0 +1,23 @@
+#ifndef _XEN_LOGS_H_
+#define _XEN_LOGS_H_
+
+
+#ifdef RTE_LIBRTE_XEN_DEBUG_INIT
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+ do { \
+ RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args);\
+ fflush(stdout);\
+ } while (0)
+
+#else
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+ do { } while (0)
+
+#endif
+
+#define PMD_INIT_FUNC_TRACE() \
+ PMD_INIT_LOG(DEBUG, " >>")
+
+#endif /* _XEN_LOGS_H_ */
diff --git a/lib/librte_pmd_xen/xen_rxtx.c b/lib/librte_pmd_xen/xen_rxtx.c
new file mode 100644
index 0000000..c45e67a
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_rxtx.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "virt_dev.h"
+
+#define RTE_MBUF_DATA_DMA_ADDR(mb) \
+ (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
+
+static void
+xen_rx_ring_init(struct xen_rx_queue *rxq)
+{
+ SHARED_RING_INIT(rxq->rxs);
+ FRONT_RING_ATTACH(&rxq->ring, rxq->rxs, PAGE_SIZE);
+ xen_dev_rx_send_requests(rxq);
+ rxq->rx_disconnect_count = rxq->xa->info_page->disconnect_count;
+ xen_set_rx_ng(rxq);
+}
+
+static void
+xen_tx_ring_init(struct xen_tx_queue *txq)
+{
+ SHARED_RING_INIT(txq->txs);
+ FRONT_RING_ATTACH(&txq->ring, txq->txs, PAGE_SIZE);
+ xen_dev_tx_recv_responses(txq);
+ txq->tx_disconnect_count = txq->xa->info_page->disconnect_count;
+ xen_set_tx_ng(txq);
+}
+
+int
+xen_dev_rx_send_requests(struct xen_rx_queue *rxq)
+{
+ uint16_t i;
+ struct netif_rx_request *req;
+ RING_IDX req_prod = rxq->ring.req_prod_pvt;
+ RING_IDX prod = req_prod;
+ uint16_t free_space = RING_FREE_REQUESTS(&rxq->ring);
+
+ xen_set_rx_ng(rxq);
+
+ for (i = 0; i < free_space; i++) {
+ struct rte_mbuf *mbuf;
+
+ prod = (req_prod + i) & (RING_SIZE(&rxq->ring) - 1);
+
+ req = RING_GET_REQUEST(&rxq->ring, prod);
+
+ mbuf = rte_pktmbuf_alloc(rxq->mb_pool);
+ if (unlikely(!mbuf)) {
+ PMD_INIT_LOG(ERR, "no mbuf");
+ break; /*skip*/
+ }
+
+ mbuf->ol_flags |= PKT_RX_IPV4_HDR;
+ rxq->mbuf[prod] = mbuf;
+
+ /*set data at the begin of the next page*/
+ uint64_t phys_addr = RTE_MBUF_DATA_DMA_ADDR(mbuf);
+ uint64_t phys_addr_shifted =
+ (phys_addr + PAGE_SIZE - 1) &
+ (~((uint64_t)PAGE_SIZE - 1));
+ uint64_t shift = phys_addr_shifted - phys_addr;
+
+ mbuf->data_off += shift;
+ rxq->ng_rx.s[i].gref = rxq->gref[prod];
+
+ rxq->ng_rx.s[i].paddr = __phys_to_pfn(phys_addr_shifted);
+
+ req->gref = rxq->gref[prod];
+ req->id = prod;
+ }
+
+ rxq->ring.req_prod_pvt = (req_prod + i);
+
+ rxq->ng_rx.count = i;
+ xen_evtchn_notify_grant_rx(rxq);
+ rxq->ng_rx.rel_count = 0;
+
+ return 0;
+}
+
+static void
+xen_dev_rx_recv_extra(struct xen_rx_queue *rxq, struct netif_extra_info *extra)
+{
+ if (unlikely(!extra)) {
+ PMD_INIT_LOG(ERR, "Invalid rxq state transition: %d",
+ rxq->state);
+ rxq->state = RX_RESP_GENERAL;
+ }
+
+ if (unlikely(!extra->type ||
+ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ PMD_INIT_LOG(WARNING, "Invalid extra type: %d", extra->type);
+ rxq->state = RX_RESP_GENERAL;
+ }
+
+ if (!(extra->flags & XEN_NETIF_EXTRA_FLAG_MORE)) {
+ PMD_INIT_LOG(DEBUG, "No XEN_NETIF_EXTRA_FLAG_MORE");
+ rxq->state = RX_RESP_GENERAL;
+ }
+}
+
+static uint16_t
+xen_dev_rx_recv_responses(struct xen_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ uint16_t nb_rx;
+ uint16_t i;
+ struct netif_rx_response *rsp;
+ struct netif_extra_info *extra = NULL;
+ RING_IDX rsp_cons = rxq->ring.rsp_cons;
+ RING_IDX cons = rsp_cons;
+ uint16_t work_todo;
+
+ nb_rx = 0;
+ work_todo = RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring);
+ for (i = 0; i < work_todo && nb_rx < nb_pkts; i++) {
+ struct rte_mbuf *mbuf;
+
+ cons = (rsp_cons + i) & (RING_SIZE(&rxq->ring) - 1);
+
+ rsp = RING_GET_RESPONSE(&rxq->ring, cons);
+
+ PMD_INIT_LOG(DEBUG, "id:%u status:%u offset:%u flags:%x",
+ rsp->id, rsp->status, rsp->offset, rsp->flags);
+
+ rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] = rxq->gref[cons];
+ rxq->ng_rx.rel_count++;
+
+ if (unlikely(rsp->status < 0)) {
+ PMD_INIT_LOG(WARNING, "bad rsp->status: %d",
+ rsp->status);
+ rte_pktmbuf_free(rxq->mbuf[cons]);
+ rxq->mbuf[cons] = NULL;
+ rxq->state = RX_RESP_GENERAL;
+ rxq->first_frag = rxq->prev_frag = NULL;
+ continue;
+ }
+
+ switch (rxq->state) {
+ case RX_RESP_GENERAL: /* normal receiving */
+ if (unlikely(rsp->flags & NETRXF_extra_info)) {
+ PMD_INIT_LOG(DEBUG,
+ "EXTRA_NETRXF_extra_info");
+ rxq->state = RX_RESP_EXTRA;
+ rte_pktmbuf_free(rxq->mbuf[cons]);
+ rxq->mbuf[cons] = NULL;
+ break;
+ }
+ /* normal receive */
+ if (rxq->mbuf[cons]) {
+ mbuf = rxq->mbuf[cons];
+ mbuf->port = rxq->port_id;
+ mbuf->data_len = mbuf->pkt_len = rsp->status;
+ mbuf->data_off += rsp->offset;
+
+ if (rsp->flags & NETRXF_more_data) {
+ rxq->state = RX_RESP_CONTINUE;
+ rxq->first_frag =
+ rxq->prev_frag = mbuf;
+ } else {
+ /*send to the upper level*/
+ rx_pkts[nb_rx++] = mbuf;
+ rxq->rx_stats.ipackets++;
+ rxq->rx_stats.ibytes +=
+ mbuf->pkt_len;
+ }
+
+ rxq->mbuf[cons] = NULL;
+ } else {
+ PMD_INIT_LOG(WARNING, "no rxq->mbuf[%d]",
+ cons);
+ rxq->rx_stats.ierrors++;
+ }
+ break;
+
+ case RX_RESP_EXTRA: /* extra */
+ extra = (struct netif_extra_info *)rsp;
+ xen_dev_rx_recv_extra(rxq, extra);
+ rte_pktmbuf_free(rxq->mbuf[cons]);
+ rxq->mbuf[cons] = NULL;
+ break;
+
+ case RX_RESP_CONTINUE: /* packet is segmented */
+ if (rxq->mbuf[cons]) {
+ mbuf = rxq->mbuf[cons];
+ /* mbuf->in_port = rxq->port_id; */
+ mbuf->data_len = mbuf->pkt_len =
+ rsp->status;
+ mbuf->data_off += rsp->offset;
+
+ rxq->first_frag->nb_segs++;
+ rxq->first_frag->pkt_len += mbuf->data_len;
+ rxq->prev_frag->next = mbuf;
+
+ if (rsp->flags & NETRXF_more_data)
+ rxq->prev_frag = mbuf;
+ else {
+ rxq->state = RX_RESP_GENERAL;
+ /*send to the upper level*/
+ rx_pkts[nb_rx++] = rxq->first_frag;
+ rxq->rx_stats.ipackets++;
+ rxq->rx_stats.ibytes += rxq->first_frag->pkt_len;
+ rxq->first_frag = rxq->prev_frag = NULL;
+ }
+
+ rxq->mbuf[cons] = NULL;
+ } else {
+ PMD_INIT_LOG(WARNING, "no cntn rxq->mbuf[%d]",
+ cons);
+ rxq->rx_stats.ierrors++;
+ }
+ break;
+ }
+
+ rxq->mbuf[cons] = NULL;
+ }
+ rxq->ring.rsp_cons = (rsp_cons + i);
+
+ return nb_rx;
+}
+
+uint16_t
+xen_dev_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ uint16_t res = 0;
+
+ struct xen_rx_queue *rxq = (struct xen_rx_queue *)rx_queue;
+
+ if (likely(rxq->xa->info_page->is_connected)) {
+
+ if (unlikely(rxq->xa->info_page->disconnect_count !=
+ rxq->rx_disconnect_count)) {
+
+ xen_rx_queue_release(rxq);
+
+ xen_rx_ring_init(rxq);
+
+ }
+
+ res = xen_dev_rx_recv_responses(rxq, rx_pkts, nb_pkts);
+
+ xen_dev_rx_send_requests(rxq);
+ }
+
+ return res;
+}
+
+void
+xen_rx_queue_release(struct xen_rx_queue *rxq)
+{
+ uint16_t i;
+
+ rxq->ng_rx.count = 0;
+ rxq->ng_rx.rel_count = 0;
+
+ for (i = 0; i < (RING_SIZE(&rxq->ring)); i++) {
+ rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] =
+ rxq->gref[i];
+ rxq->ng_rx.rel_count++;
+ if (NULL != rxq->mbuf[i]) {
+ rte_pktmbuf_free(rxq->mbuf[i]);
+ rxq->mbuf[i] = NULL;
+ }
+ }
+ xen_evtchn_notify_grant_rx(rxq);
+}
+
+void
+xen_dev_rx_queue_release(void *rxq)
+{
+ struct xen_rx_queue *rx_q = (struct xen_rx_queue *)rxq;
+
+ if (NULL != rx_q) {
+ xen_rx_queue_release(rx_q);
+ rte_free(rx_q);
+ }
+}
+
+int
+xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+ __attribute__((unused)) uint16_t nb_desc,
+ __attribute__((unused)) unsigned int socket_id,
+ __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp)
+{
+ struct xen_rx_queue *rxq;
+ struct xen_adapter *xa = VA_XA(dev);
+ struct xen_adapter_info *info = xa->info_page;
+
+ if (NET_RX_RING_SIZE > info->rx_grefs_count) {
+ PMD_INIT_LOG(ERR, "rx ring size greater than rx grefs count");
+ return -ENOMEM;
+ }
+
+ rxq = rte_zmalloc("rx_queue", sizeof(struct xen_rx_queue),
+ RTE_CACHE_LINE_SIZE);
+ if (NULL == rxq) {
+ PMD_INIT_LOG(ERR, "rte_zmalloc for rxq failed!");
+ return -ENOMEM;
+ }
+
+ rxq->xa = xa;
+ rxq->queue_id = queue_idx;
+ rxq->port_id = dev->data->port_id;
+ rxq->state = RX_RESP_GENERAL;
+ rxq->first_frag = rxq->prev_frag = NULL;
+ rxq->mb_pool = mp;
+ rxq->ng_rx.is_rx = 1;
+ rxq->ng_rx.rel_count = 0;
+ rxq->gref = &info->rxtx_grefs[0];
+
+ rxq->rxs = (struct netif_rx_sring *)xa->uio_res[RX_RING_MAP];
+
+ dev->data->rx_queues[queue_idx] = rxq;
+ if (!xa->rx_queues)
+ xa->rx_queues = dev->data->rx_queues;
+
+ xen_rx_ring_init(rxq);
+
+ dev->rx_pkt_burst = xen_dev_recv_pkts;
+
+ return 0;
+}
+
+static void
+xen_dev_tx_prepare_request(struct xen_tx_queue *txq, uint16_t i, uint16_t size,
+ uint16_t offset, uint16_t flags, unsigned long paddr)
+{
+ RING_IDX prod = (txq->ring.req_prod_pvt+i) & (RING_SIZE(&txq->ring)-1);
+ struct netif_tx_request *req = RING_GET_REQUEST(&txq->ring, prod);
+
+ txq->ng_tx.s[i].gref = txq->gref[prod];
+ txq->ng_tx.s[i].paddr = paddr;
+
+ req->id = prod;
+ req->flags = flags;
+ req->offset = offset;
+ req->gref = txq->gref[prod];
+ req->size = (txq->mbuf[prod] ? txq->mbuf[prod]->pkt_len : size);
+
+ PMD_INIT_LOG(DEBUG, "id:%u size:%u offset:%u gref:%u flags:%x",
+ req->id, req->size, req->offset, req->gref, req->flags);
+}
+
+static int
+xen_dev_tx_send_requests(struct xen_tx_queue *txq, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct rte_mbuf *mbuf;
+ unsigned long paddr;
+ uint16_t offset;
+ uint16_t flags;
+ uint16_t size;
+ uint16_t i = 0;
+ uint16_t nb_tx = 0;
+ uint16_t free_space = RING_FREE_REQUESTS(&txq->ring);
+
+ xen_set_tx_ng(txq);
+
+ while (i < free_space && nb_tx < nb_pkts) {
+
+ RING_IDX prod = (txq->ring.req_prod_pvt + i) &
+ (RING_SIZE(&txq->ring) - 1);
+ txq->mbuf[prod] = mbuf = tx_pkts[nb_tx];
+
+ if (unlikely(NULL == mbuf)) {
+ PMD_INIT_LOG(WARNING, "no mbuf for req");
+ break;
+ }
+
+ /* each segment could be splited because of offset
+ * so it must be twice */
+ if (i + (tx_pkts[nb_tx]->nb_segs * 2) > free_space)
+ break;
+
+ /* prepare request for each mbuf segment */
+ do {
+ size = mbuf->data_len;
+ flags = (mbuf->next ? NETTXF_more_data : 0);
+ paddr = __phys_to_pfn(RTE_MBUF_DATA_DMA_ADDR(mbuf));
+ offset = (RTE_MBUF_DATA_DMA_ADDR(mbuf)) &
+ ((uint64_t)PAGE_SIZE - 1);
+
+ /* check if additional segmentation is needed */
+ if (size + offset > PAGE_SIZE) {
+ size = PAGE_SIZE - offset;
+ xen_dev_tx_prepare_request(txq, i, size,
+ offset, NETTXF_more_data, paddr);
+ paddr += size;
+ offset = (offset + size) % PAGE_SIZE;
+ size = mbuf->data_len - size;
+ i++;
+ }
+
+ xen_dev_tx_prepare_request(txq, i, size,
+ offset, flags, paddr);
+ i++;
+
+ } while ((mbuf = mbuf->next));
+
+ nb_tx++;
+ txq->tx_stats.opackets++;
+ txq->tx_stats.obytes += txq->mbuf[prod]->pkt_len;
+ }
+
+ txq->ring.req_prod_pvt += i;
+ txq->ng_tx.count = i;
+ xen_evtchn_notify_grant_tx(txq);
+ txq->ng_tx.rel_count = 0;
+
+ return nb_tx;
+}
+
+int
+xen_dev_tx_recv_responses(struct xen_tx_queue *txq)
+{
+ uint16_t i;
+ struct netif_tx_response *rsp;
+ RING_IDX rsp_cons = txq->ring.rsp_cons;
+ RING_IDX cons;
+ uint16_t work_todo;
+
+ work_todo = RING_HAS_UNCONSUMED_RESPONSES(&txq->ring);
+ for (i = 0; i < work_todo; i++) {
+ cons = (rsp_cons + i) & (RING_SIZE(&txq->ring) - 1);
+
+ rsp = RING_GET_RESPONSE(&txq->ring, cons);
+
+ if (unlikely(rsp->status == NETIF_RSP_NULL))
+ PMD_INIT_LOG(WARNING, "NETIF_RSP_NULL");
+
+ txq->ng_tx.rel_gref[txq->ng_tx.rel_count] = txq->gref[cons];
+ txq->ng_tx.rel_count++;
+
+ if (likely(txq->mbuf[cons] != NULL)) {
+ rte_pktmbuf_free(txq->mbuf[cons]);
+ txq->mbuf[cons] = NULL;
+ }
+ }
+ txq->ring.rsp_cons = (rsp_cons + i);
+
+ return 0;
+}
+
+uint16_t
+xen_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ uint16_t res = 0;
+
+ struct xen_tx_queue *txq = (struct xen_tx_queue *)tx_queue;
+
+ if (likely(txq->xa->info_page->is_connected)) {
+
+ if (unlikely(txq->xa->info_page->disconnect_count !=
+ txq->tx_disconnect_count)) {
+
+ xen_tx_queue_release(txq);
+
+ xen_tx_ring_init(txq);
+
+ }
+
+ xen_dev_tx_recv_responses(txq);
+
+ res = xen_dev_tx_send_requests(txq, tx_pkts, nb_pkts);
+ }
+
+ return res;
+}
+
+void
+xen_tx_queue_release(struct xen_tx_queue *txq)
+{
+ uint16_t i;
+
+ txq->ng_tx.count = 0;
+ txq->ng_tx.rel_count = 0;
+
+ for (i = 0; i < (RING_SIZE(&txq->ring)); i++) {
+ if (NULL != txq->mbuf[i]) {
+ rte_pktmbuf_free(txq->mbuf[i]);
+ txq->mbuf[i] = NULL;
+ txq->ng_tx.rel_gref[txq->ng_tx.rel_count] =
+ txq->gref[i];
+ txq->ng_tx.rel_count++;
+ }
+ }
+ xen_evtchn_notify_grant_tx(txq);
+}
+
+void
+xen_dev_tx_queue_release(void *txq)
+{
+ struct xen_tx_queue *tx_q = (struct xen_tx_queue *)txq;
+
+ if (NULL != tx_q) {
+ xen_tx_queue_release(tx_q);
+ rte_free(tx_q);
+ }
+}
+
+int
+xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+ __attribute__((unused)) uint16_t nb_desc,
+ __attribute__((unused)) unsigned int socket_id,
+ __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
+{
+ struct xen_tx_queue *txq;
+ struct xen_adapter *xa = VA_XA(dev);
+ struct xen_adapter_info *info = xa->info_page;
+
+ if (NET_TX_RING_SIZE > info->tx_grefs_count) {
+ PMD_INIT_LOG(ERR, "tx ring size greater than tx grefs count");
+ return -ENOMEM;
+ }
+
+ txq = rte_zmalloc("tx_queue", sizeof(struct xen_tx_queue),
+ RTE_CACHE_LINE_SIZE);
+ if (NULL == txq) {
+ PMD_INIT_LOG(ERR, "rte_zmalloc for txq failed!");
+ return -ENOMEM;
+ }
+
+ txq->txs = (struct netif_tx_sring *)xa->uio_res[TX_RING_MAP];
+
+ txq->xa = xa;
+ txq->queue_id = queue_idx;
+ txq->port_id = dev->data->port_id;
+ txq->ng_tx.is_rx = 0;
+ txq->ng_tx.rel_count = 0;
+ txq->gref = &info->rxtx_grefs[info->rx_grefs_count];
+
+ dev->data->tx_queues[queue_idx] = txq;
+ if (!xa->tx_queues)
+ xa->tx_queues = dev->data->tx_queues;
+
+ xen_tx_ring_init(txq);
+
+ dev->tx_pkt_burst = xen_dev_xmit_pkts;
+
+ return 0;
+}
diff --git a/lib/librte_pmd_xen/xen_rxtx.h b/lib/librte_pmd_xen/xen_rxtx.h
new file mode 100644
index 0000000..eea41c8
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_rxtx.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_RXTX_H_
+#define _XEN_RXTX_H_
+
+#define DEFAULT_RX_FREE_THRESH 0
+#define DEFAULT_TX_FREE_THRESH 512
+
+struct xen_tx_stats {
+ uint64_t opackets;
+ uint64_t obytes;
+ uint64_t oerrors;
+};
+
+struct xen_rx_stats {
+ uint64_t ipackets;
+ uint64_t ibytes;
+ uint64_t ierrors;
+};
+
+enum rx_resp_state {
+ RX_RESP_GENERAL = 0,
+ RX_RESP_CONTINUE,
+ RX_RESP_EXTRA
+};
+
+struct xen_rx_queue {
+ /**< RX queue index. */
+ uint16_t queue_id;
+ /**< Device port identifier. */
+ uint8_t port_id;
+ /**< mbuf pool to populate RX ring. */
+ struct rte_mempool *mb_pool;
+ /**< Ptr to dev_private data. */
+ struct xen_adapter *xa;
+
+ /* Xen specific */
+
+ /**< Pointer to the xen rx ring shared with other end. */
+ netif_rx_front_ring_t ring;
+ struct netif_rx_sring *rxs;
+ /**< Grefs for sharing with the other end. */
+ grant_ref_t *gref;
+ /**< Allocated for RING_INX mbufs. */
+ struct rte_mbuf *mbuf[NET_RX_RING_SIZE];
+ /**< state machine */
+ enum rx_resp_state state;
+ /**< First packet segment. */
+ struct rte_mbuf *first_frag;
+ /**< Previous packet segment. */
+ struct rte_mbuf *prev_frag;
+ /**< Statistics. */
+ struct xen_rx_stats rx_stats;
+ /**< Number of disconnections. */
+ uint8_t rx_disconnect_count;
+ /**< Notify and gnttab ioctl struct. */
+ struct ioctl_evtchn_notify_grant ng_rx;
+};
+
+struct xen_tx_queue {
+ uint16_t queue_id;
+ /**< TX queue index. */
+ uint8_t port_id;
+ /**< Device port identifier. */
+ struct xen_adapter *xa;
+ /**< Ptr to dev_private data */
+
+ /* Xen specific */
+
+ /**< Pointer to the xen tx ring shared with other end. */
+ netif_tx_front_ring_t ring;
+ struct netif_tx_sring *txs;
+ /**< Grefs for sharing with the other end. */
+ grant_ref_t *gref;
+ /**< Allocated for RING_INX mbufs. */
+ struct rte_mbuf *mbuf[NET_TX_RING_SIZE];
+ /**< Statistics. */
+ struct xen_tx_stats tx_stats;
+ /**< Number of disconnections. */
+ uint8_t tx_disconnect_count;
+ /**< Notify and gnttab ioctl struct. */
+ struct ioctl_evtchn_notify_grant ng_tx;
+};
+
+int xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+ uint16_t nb_rx_desc, unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mb_pool);
+
+int xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+ uint16_t nb_tx_desc, unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf);
+
+void xen_dev_rx_queue_release(void *rxq);
+void xen_dev_tx_queue_release(void *txq);
+void xen_rx_queue_release(struct xen_rx_queue *rxq);
+void xen_tx_queue_release(struct xen_tx_queue *txq);
+
+int xen_dev_rx_send_requests(struct xen_rx_queue *rxq);
+int xen_dev_tx_recv_responses(struct xen_tx_queue *txq);
+
+uint16_t xen_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+uint16_t xen_dev_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
+#endif /* _XEN_RXTX_H_ */
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 334cb25..5d0927c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -192,6 +192,10 @@ LDLIBS += -lrte_pmd_xenvirt
LDLIBS += -lxenstore
endif
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_PMD),y)
+LDLIBS += -lrte_pmd_xen
+endif
+
ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
# plugins (link only if static libraries)
--
2.1.4
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH 3/5] xen: add phys-addr command line argument
2015-02-15 15:24 ` [dpdk-dev] [PATCH 3/5] xen: add phys-addr command line argument Stephen Hemminger
@ 2015-02-26 7:55 ` Liu, Jijiang
2015-02-26 16:09 ` Stephen Hemminger
0 siblings, 1 reply; 23+ messages in thread
From: Liu, Jijiang @ 2015-02-26 7:55 UTC (permalink / raw)
To: Stephen Hemminger, dev; +Cc: Stephen Hemminger
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
> Sent: Sunday, February 15, 2015 11:25 PM
> To: dev@dpdk.org
> Cc: Stephen Hemminger
> Subject: [dpdk-dev] [PATCH 3/5] xen: add phys-addr command line argument
>
> Allow overriding default Xen DOM0 behavior to use physical addresses insted of
> mfn
If the application is built with DOM0 support , the application will run in non Dom0 environment when using the new argument, correct?
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
> v2 -- no changes
>
> lib/librte_eal/common/eal_common_options.c | 5 +++++
> lib/librte_eal/common/eal_internal_cfg.h | 1 +
> lib/librte_eal/common/eal_options.h | 2 ++
> lib/librte_eal/common/include/rte_memory.h | 3 +++
> lib/librte_eal/linuxapp/eal/eal_memory.c | 5 +++++
> lib/librte_mempool/rte_dom0_mempool.c | 10 ++++++++--
> 6 files changed, 24 insertions(+), 2 deletions(-)
>
> diff --git a/lib/librte_eal/common/eal_common_options.c
> b/lib/librte_eal/common/eal_common_options.c
> index 67e02dc..1742364 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -83,6 +83,7 @@ eal_long_options[] = {
> {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM},
> {OPT_BASE_VIRTADDR, 1, 0, OPT_BASE_VIRTADDR_NUM},
> {OPT_XEN_DOM0, 0, 0, OPT_XEN_DOM0_NUM},
> + {OPT_XEN_PHYS_ADDR, 0, 0, OPT_XEN_PHYS_ADDR_NUM},
> {OPT_CREATE_UIO_DEV, 1, NULL, OPT_CREATE_UIO_DEV_NUM},
> {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM},
> {0, 0, 0, 0}
> @@ -491,6 +492,10 @@ eal_parse_common_option(int opt, const char
> *optarg,
> }
> conf->log_level = log;
> break;
> +
> + case OPT_XEN_PHYS_ADDR_NUM:
> + conf->xen_phys_addr_support = 1;
> + break;
> }
>
> /* don't know what to do, leave this to caller */ diff --git
> a/lib/librte_eal/common/eal_internal_cfg.h
> b/lib/librte_eal/common/eal_internal_cfg.h
> index e2ecb0d..41b4169 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -65,6 +65,7 @@ struct internal_config {
> volatile unsigned force_nrank; /**< force number of ranks */
> volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
> volatile unsigned xen_dom0_support; /**< support app running on Xen
> Dom0*/
> + volatile unsigned xen_phys_addr_support; /**< support phys addr */
> volatile unsigned no_pci; /**< true to disable PCI */
> volatile unsigned no_hpet; /**< true to disable HPET */
> volatile unsigned vmware_tsc_map; /**< true to use VMware TSC
> mapping diff --git a/lib/librte_eal/common/eal_options.h
> b/lib/librte_eal/common/eal_options.h
> index e476f8d..8aee959 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -73,6 +73,8 @@ enum {
> OPT_BASE_VIRTADDR_NUM,
> #define OPT_XEN_DOM0 "xen-dom0"
> OPT_XEN_DOM0_NUM,
> +#define OPT_XEN_PHYS_ADDR "xen-phys-addr"
> + OPT_XEN_PHYS_ADDR_NUM,
> #define OPT_CREATE_UIO_DEV "create-uio-dev"
> OPT_CREATE_UIO_DEV_NUM,
> #define OPT_VFIO_INTR "vfio-intr"
> diff --git a/lib/librte_eal/common/include/rte_memory.h
> b/lib/librte_eal/common/include/rte_memory.h
> index ab6c1ff..c3b8a98 100644
> --- a/lib/librte_eal/common/include/rte_memory.h
> +++ b/lib/librte_eal/common/include/rte_memory.h
> @@ -180,6 +180,9 @@ unsigned rte_memory_get_nrank(void); /**< Internal
> use only - should DOM0 memory mapping be used */ extern int
> is_xen_dom0_supported(void);
>
> +/**< Internal use only - should DOM0 use physical addresses insted of
> +mfn */ extern int is_xen_phys_addr_supported(void);
> +
> /**
> * Return the physical address of elt, which is an element of the pool mp.
> *
> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c
> b/lib/librte_eal/linuxapp/eal/eal_memory.c
> index 4afda2a..a759ac9 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_memory.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
> @@ -103,6 +103,11 @@ int is_xen_dom0_supported(void) {
> return internal_config.xen_dom0_support; }
> +
> +int is_xen_phys_addr_supported(void)
> +{
> + return internal_config.xen_phys_addr_support;
> +}
> #endif
>
> /**
> diff --git a/lib/librte_mempool/rte_dom0_mempool.c
> b/lib/librte_mempool/rte_dom0_mempool.c
> index 9ec68fb..ab35826 100644
> --- a/lib/librte_mempool/rte_dom0_mempool.c
> +++ b/lib/librte_mempool/rte_dom0_mempool.c
> @@ -74,8 +74,14 @@ get_phys_map(void *va, phys_addr_t pa[], uint32_t
> pg_num,
> virt_addr =(uintptr_t) mcfg->memseg[memseg_id].addr;
>
> for (i = 0; i != pg_num; i++) {
> - mfn_id = ((uintptr_t)va + i * pg_sz - virt_addr) / RTE_PGSIZE_2M;
> - pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
> + if (!is_xen_phys_addr_supported()) {
> + mfn_id = ((uintptr_t)va + i * pg_sz -
> + virt_addr) / RTE_PGSIZE_2M;
> + pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
> + } else {
> + pa[i] = mcfg->memseg[memseg_id].phys_addr + i * pg_sz +
> + (uintptr_t)va - virt_addr;
> + }
> }
> }
>
> --
> 2.1.4
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH 3/5] xen: add phys-addr command line argument
2015-02-26 7:55 ` Liu, Jijiang
@ 2015-02-26 16:09 ` Stephen Hemminger
0 siblings, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2015-02-26 16:09 UTC (permalink / raw)
To: Liu, Jijiang; +Cc: dev, Stephen Hemminger
On Thu, 26 Feb 2015 07:55:13 +0000
"Liu, Jijiang" <jijiang.liu@intel.com> wrote:
> If the application is built with DOM0 support , the application will run in non Dom0 environment when using the new argument, correct?
Yes.
Our startup script has
if [ -d /proc/xen ] ; then
log_progress_msg "xen_uio"
modprobe xen_uio
if [ "$(cat /proc/xen/domain)" == "pv" ]; then
log_progress_msg "rte_dom0_mm"
modprobe rte_dom0_mm xen_phys_addr=y
set_domU_memsize
XEN_ARGS="--xen-dom0 --xen-phys-addr"
fi
fi
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH 2/5] enic: fix device to work with Xen DOM0
2015-02-15 15:24 ` [dpdk-dev] [PATCH 2/5] enic: fix device to work with Xen DOM0 Stephen Hemminger
@ 2015-03-10 7:08 ` Liu, Jijiang
0 siblings, 0 replies; 23+ messages in thread
From: Liu, Jijiang @ 2015-03-10 7:08 UTC (permalink / raw)
To: Stephen Hemminger, dev; +Cc: Stephen Hemminger
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
> Sent: Sunday, February 15, 2015 11:25 PM
> To: dev@dpdk.org
> Cc: Stephen Hemminger
> Subject: [dpdk-dev] [PATCH 2/5] enic: fix device to work with Xen DOM0
>
> It is possible to passthrough a PCI device when running in Xen Paravirt mode.
> The device driver has to accomodate by using memory zones differently. This
> patch models the memory allocation for ENIC device based on changes already
> done for ixgbe and igb.
>
> Build tested only; has not been tested on ENIC hardware.
> ---
Acked-by: Jijiang Liu <Jijiang.liu@intel.com>
> v2 -- this patch is added
>
> lib/librte_pmd_enic/enic_main.c | 19 ++++++++++++++++---
> lib/librte_pmd_enic/vnic/vnic_dev.c | 19 +++++++++++++++----
> 2 files changed, 31 insertions(+), 7 deletions(-)
>
> diff --git a/lib/librte_pmd_enic/enic_main.c b/lib/librte_pmd_enic/enic_main.c
> index 48fdca2..0be5172 100644
> --- a/lib/librte_pmd_enic/enic_main.c
> +++ b/lib/librte_pmd_enic/enic_main.c
> @@ -537,8 +537,14 @@ enic_alloc_consistent(__rte_unused void *priv, size_t
> size,
> const struct rte_memzone *rz;
> *dma_handle = 0;
>
> - rz = rte_memzone_reserve_aligned((const char *)name,
> - size, 0, 0, ENIC_ALIGN);
> +#ifdef RTE_LIBRTE_XEN_DOM0
> + if (is_xen_dom0_supported())
> + rz = rte_memzone_reserve_bounded((char *)name, size,
> + 0, 0, ENIC_ALIGN, RTE_PGSIZE_2M);
> + else
> +#endif
> + rz = rte_memzone_reserve_aligned((char *)name, size,
> + 0, 0, ENIC_ALIGN);
> if (!rz) {
> pr_err("%s : Failed to allocate memory requested for %s",
> __func__, name);
> @@ -546,7 +552,14 @@ enic_alloc_consistent(__rte_unused void *priv, size_t
> size,
> }
>
> vaddr = rz->addr;
> - *dma_handle = (dma_addr_t)rz->phys_addr;
> +
> +#ifdef RTE_LIBRTE_XEN_DOM0
> + if (is_xen_dom0_supported())
> + *dma_handle = rte_mem_phy2mch(rz->memseg_id,
> + rz->phys_addr);
> + else
> +#endif
> + *dma_handle = (dma_addr_t)rz->phys_addr;
>
> return vaddr;
> }
> diff --git a/lib/librte_pmd_enic/vnic/vnic_dev.c
> b/lib/librte_pmd_enic/vnic/vnic_dev.c
> index 6407994..e660aaf 100644
> --- a/lib/librte_pmd_enic/vnic/vnic_dev.c
> +++ b/lib/librte_pmd_enic/vnic/vnic_dev.c
> @@ -276,9 +276,14 @@ int vnic_dev_alloc_desc_ring(__attribute__((unused))
> struct vnic_dev *vdev,
>
> vnic_dev_desc_ring_size(ring, desc_count, desc_size);
>
> - rz = rte_memzone_reserve_aligned(z_name,
> - ring->size_unaligned, socket_id,
> - 0, ENIC_ALIGN);
> +#ifdef RTE_LIBRTE_XEN_DOM0
> + if (is_xen_dom0_supported())
> + rz = rte_memzone_reserve_bounded(z_name, ring-
> >size_unaligned,
> + socket_id, 0, ENIC_ALIGN,
> RTE_PGSIZE_2M);
> + else
> +#endif
> + rz = rte_memzone_reserve_aligned(z_name, ring-
> >size_unaligned,
> + socket_id, 0, ENIC_ALIGN);
> if (!rz) {
> pr_err("Failed to allocate ring (size=%d), aborting\n",
> (int)ring->size);
> @@ -292,7 +297,13 @@ int vnic_dev_alloc_desc_ring(__attribute__((unused))
> struct vnic_dev *vdev,
> return -ENOMEM;
> }
>
> - ring->base_addr_unaligned = (dma_addr_t)rz->phys_addr;
> +#ifdef RTE_LIBRTE_XEN_DOM0
> + if (is_xen_dom0_supported())
> + ring->base_addr_unaligned = rte_mem_phy2mch(rz-
> >memseg_id,
> + rz->phys_addr);
> + else
> +#endif
> + ring->base_addr_unaligned = (dma_addr_t)rz->phys_addr;
>
> ring->base_addr = ALIGN(ring->base_addr_unaligned,
> ring->base_align);
> --
> 2.1.4
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime
2015-02-15 15:24 [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Stephen Hemminger
` (3 preceding siblings ...)
2015-02-15 15:24 ` [dpdk-dev] [PATCH 5/5] xen: net-front " Stephen Hemminger
@ 2015-07-09 0:10 ` Thomas Monjalon
4 siblings, 0 replies; 23+ messages in thread
From: Thomas Monjalon @ 2015-07-09 0:10 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: dev, Stephen Hemminger
Please, could you try to rebase it?
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver
2015-02-15 15:24 ` [dpdk-dev] [PATCH 4/5] xen: add uio driver Stephen Hemminger
@ 2016-03-22 9:55 ` Jan Blunck
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver Jan Blunck
` (4 more replies)
0 siblings, 5 replies; 23+ messages in thread
From: Jan Blunck @ 2016-03-22 9:55 UTC (permalink / raw)
To: dev; +Cc: jblunck, shemming
v3 changes:
- removed fake PCI interface
- removed struct virt_eth_driver
- check for UIO name and version
- added basic documentation
Jan Blunck (3):
xen: Add UIO kernel driver
xen: Add netfront poll mode driver
xen: Add documentation
config/common_base | 6 +
doc/guides/nics/overview.rst | 28 +-
doc/guides/nics/xen.rst | 101 ++++
drivers/net/Makefile | 1 +
drivers/net/xen/Makefile | 30 +
drivers/net/xen/uio.c | 245 ++++++++
drivers/net/xen/uio.h | 54 ++
drivers/net/xen/xen_adapter_info.h | 64 ++
drivers/net/xen/xen_dev.c | 489 +++++++++++++++
drivers/net/xen/xen_dev.h | 30 +
drivers/net/xen/xen_logs.h | 19 +
drivers/net/xen/xen_rxtx.c | 757 ++++++++++++++++++++++++
drivers/net/xen/xen_rxtx.h | 131 ++++
lib/librte_eal/linuxapp/Makefile | 1 +
lib/librte_eal/linuxapp/xen_uio/Makefile | 56 ++
lib/librte_eal/linuxapp/xen_uio/compat.h | 47 ++
lib/librte_eal/linuxapp/xen_uio/xen_uio.c | 954 ++++++++++++++++++++++++++++++
17 files changed, 2999 insertions(+), 14 deletions(-)
create mode 100644 doc/guides/nics/xen.rst
create mode 100644 drivers/net/xen/Makefile
create mode 100644 drivers/net/xen/uio.c
create mode 100644 drivers/net/xen/uio.h
create mode 100644 drivers/net/xen/xen_adapter_info.h
create mode 100644 drivers/net/xen/xen_dev.c
create mode 100644 drivers/net/xen/xen_dev.h
create mode 100644 drivers/net/xen/xen_logs.h
create mode 100644 drivers/net/xen/xen_rxtx.c
create mode 100644 drivers/net/xen/xen_rxtx.h
create mode 100644 lib/librte_eal/linuxapp/xen_uio/Makefile
create mode 100644 lib/librte_eal/linuxapp/xen_uio/compat.h
create mode 100644 lib/librte_eal/linuxapp/xen_uio/xen_uio.c
--
2.5.5
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Jan Blunck
@ 2016-03-22 9:55 ` Jan Blunck
2016-03-22 10:42 ` Thomas Monjalon
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 2/3] xen: Add netfront poll mode driver Jan Blunck
` (3 subsequent siblings)
4 siblings, 1 reply; 23+ messages in thread
From: Jan Blunck @ 2016-03-22 9:55 UTC (permalink / raw)
To: dev; +Cc: jblunck, shemming, Stephen Hemminger
New UIO helper kernel driver for Xen netfront UIO poll mode driver.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Jan Blunck <jblunck@infradead.org>
---
lib/librte_eal/linuxapp/Makefile | 1 +
lib/librte_eal/linuxapp/xen_uio/Makefile | 56 ++
lib/librte_eal/linuxapp/xen_uio/compat.h | 47 ++
lib/librte_eal/linuxapp/xen_uio/xen_uio.c | 954 ++++++++++++++++++++++++++++++
4 files changed, 1058 insertions(+)
create mode 100644 lib/librte_eal/linuxapp/xen_uio/Makefile
create mode 100644 lib/librte_eal/linuxapp/xen_uio/compat.h
create mode 100644 lib/librte_eal/linuxapp/xen_uio/xen_uio.c
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 20d2a91..6b33e87 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -35,5 +35,6 @@ DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal
DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += xen_uio
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/xen_uio/Makefile b/lib/librte_eal/linuxapp/xen_uio/Makefile
new file mode 100644
index 0000000..936e8bf
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_uio/Makefile
@@ -0,0 +1,56 @@
+# BSD LICENSE
+#
+# Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = xen_uio
+MODULE_PATH = drivers/net//xen_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+MODULE_CFLAGS += -I$(RTE_SDK)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := xen_uio.c
+
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/xen_uio/compat.h b/lib/librte_eal/linuxapp/xen_uio/compat.h
new file mode 100644
index 0000000..b4f30d9
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_uio/compat.h
@@ -0,0 +1,47 @@
+/*
+ * Minimal wrappers to allow compiling xen_uio on older kernels.
+ *
+ * Copyright (c) 2016 Brocade Communications Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _XEN_UIO_COMPAT_H_
+#define _XEN_UIO_COMPAT_H_
+
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)
+#define INVALID_GRANT_HANDLE (~0U)
+
+static inline int compat_xenbus_grant_ring(struct xenbus_device *dev,
+ void *vaddr,
+ unsigned int nr_pages,
+ grant_ref_t *grefs)
+{
+ int ret;
+
+ ret = xenbus_grant_ring(dev, virt_to_mfn(vaddr));
+
+ if (ret >= 0)
+ *grefs = ret;
+
+ return ret;
+}
+
+#define xenbus_grant_ring(dev, vaddr, nr_pages, grefs) \
+ compat_xenbus_grant_ring(dev, vaddr, nr_pages, grefs)
+
+#endif /* < 4.1.0 */
+
+#endif /* _XEN_UIO_COMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/xen_uio/xen_uio.c b/lib/librte_eal/linuxapp/xen_uio/xen_uio.c
new file mode 100644
index 0000000..4f35956
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_uio/xen_uio.c
@@ -0,0 +1,954 @@
+/*
+ * Virtual network driver for conversing with remote driver backends.
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ * Copyright (c) 2005, XenSource Ltd
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/if_ether.h>
+#include <linux/proc_fs.h>
+#include <linux/delay.h>
+
+#include <xen/xenbus.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+#include <xen/interface/io/netif.h>
+#include <xen/platform_pci.h>
+
+#include <xen/events.h>
+#include <xen/evtchn.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <linux/uio_driver.h>
+
+#include "drivers/net/xen/xen_adapter_info.h"
+#include "compat.h"
+
+#define NET_TX_RING_SIZE \
+ __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
+#define NET_RX_RING_SIZE \
+ __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
+
+#define TX_MAX_TARGET \
+ min_t(int, NET_RX_RING_SIZE, 256)
+#define RX_MAX_TARGET \
+ min_t(int, NET_RX_RING_SIZE, 256)
+
+#define RXTX_GREFS (TX_MAX_TARGET + RX_MAX_TARGET)
+
+#define DOMAIN_PROC "xen/domain"
+struct proc_dir_entry *domain_proc;
+char domain_name[9];
+size_t domain_len = sizeof(domain_name);
+static const char * const domains[] = { "native", "pv", "hvm", "unknown" };
+
+struct netfront_info *xennet_alloc_resources(struct xenbus_device *xbdev);
+static void xennet_free_resources(struct xenbus_device *xbdev);
+static int xennet_connect_backend(struct netfront_info *info);
+static void xennet_disconnect_backend(struct netfront_info *info,
+ int deffered_free);
+
+/* some helpers */
+static int __gnttab_version(void)
+{
+ int err;
+ struct gnttab_get_version ggv;
+
+ ggv.dom = DOMID_SELF;
+
+ err = HYPERVISOR_grant_table_op(GNTTABOP_get_version, &ggv, 1);
+ if (err >= 0)
+ return (int)ggv.version;
+
+ return err;
+}
+
+static void xennet_end_access(int ref, void *page)
+{
+ /* This frees the page as a side-effect */
+ if (ref != INVALID_GRANT_HANDLE)
+ gnttab_end_foreign_access(ref, 0, (unsigned long)page);
+}
+
+static int xen_net_read_mac(struct xenbus_device *xbdev, u8 *mac)
+{
+ char *macstr;
+ int ret = 0;
+
+ macstr = xenbus_read(XBT_NIL, xbdev->nodename, "mac", NULL);
+ if (IS_ERR(macstr))
+ return PTR_ERR(macstr);
+
+ pr_info("mac addr: %s\n", macstr);
+
+ if (sscanf(macstr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &mac[0], &mac[1],
+ &mac[2], &mac[3], &mac[4], &mac[5]) != ETH_ALEN) {
+ pr_warn("can't parse mac address\n");
+ ret = -ENOENT;
+ }
+
+ kfree(macstr);
+ return ret;
+}
+
+struct xen_uio_dev {
+ struct uio_info info;
+};
+
+struct netfront_info {
+ struct xenbus_device *xbdev;
+
+ int tx_ring_ref;
+ struct xen_netif_tx_front_ring tx;
+
+ int rx_ring_ref;
+ struct xen_netif_rx_front_ring rx;
+
+ struct xen_netif_tx_sring *txs;
+ struct xen_netif_rx_sring *rxs;
+
+ grant_ref_t gref_rxtx_head;
+
+ struct xen_uio_dev *xen_udev;
+
+ struct xen_adapter_info *shared_info_page;
+};
+
+static int xennet_uio_init(struct xenbus_device *xbdev,
+ struct netfront_info *info)
+{
+ int err;
+ struct xen_uio_dev *udev;
+
+ udev = kzalloc(sizeof(struct xen_uio_dev), GFP_KERNEL);
+ if (!udev)
+ return -ENOMEM;
+
+ info->xen_udev = udev;
+
+ /* fill uio infos */
+ udev->info.name = "xen_uio";
+ udev->info.version = "0.1";
+ udev->info.irq = UIO_IRQ_NONE;
+ udev->info.irq_flags = 0;
+
+ /*share all working info here*/
+ udev->info.mem[INFO_MAP].name = "xennet info page";
+ udev->info.mem[INFO_MAP].memtype = UIO_MEM_LOGICAL;
+ udev->info.mem[INFO_MAP].addr = (phys_addr_t)info->shared_info_page;
+ udev->info.mem[INFO_MAP].size = PAGE_SIZE;
+
+ udev->info.mem[RX_RING_MAP].name = "xennet front rx ring";
+ udev->info.mem[RX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+ udev->info.mem[RX_RING_MAP].addr = (phys_addr_t)info->rxs;
+ udev->info.mem[RX_RING_MAP].size = PAGE_SIZE;
+
+ udev->info.mem[TX_RING_MAP].name = "xennet front tx ring";
+ udev->info.mem[TX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+ udev->info.mem[TX_RING_MAP].addr = (phys_addr_t)info->txs;
+ udev->info.mem[TX_RING_MAP].size = PAGE_SIZE;
+
+ err = uio_register_device(&xbdev->dev, &info->xen_udev->info);
+ if (err) {
+ pr_err("uio register failed: %d\n", err);
+ kfree(info->xen_udev);
+ info->xen_udev = NULL;
+ } else {
+ pr_info("uio device registered with irq %lx\n",
+ info->xen_udev->info.irq);
+ }
+
+ return err;
+}
+
+
+static void xennet_uio_uninit(struct netfront_info *info)
+{
+ if (info->xen_udev)
+ uio_unregister_device(&info->xen_udev->info);
+ info->xen_udev = NULL;
+}
+
+struct netfront_info *xennet_alloc_resources(struct xenbus_device *xbdev)
+{
+ int ret;
+ uint16_t i;
+ int gref = 0;
+ grant_ref_t gref_rxtx_head;
+
+ struct netfront_info *info =
+ kzalloc(sizeof(struct netfront_info), GFP_KERNEL);
+ if (!info)
+ goto exit;
+
+ info->gref_rxtx_head = INVALID_GRANT_HANDLE;
+ info->xbdev = xbdev;
+
+ /* allocate place for tx ring */
+ info->txs = (struct xen_netif_tx_sring *)get_zeroed_page(
+ GFP_NOIO | __GFP_HIGH);
+ if (!info->txs) {
+ ret = -ENOMEM;
+ xenbus_dev_fatal(xbdev, ret, "allocating tx ring page");
+ goto exit;
+ }
+
+ /* allocate place for rx ring */
+ info->rxs = (struct xen_netif_rx_sring *)get_zeroed_page(
+ GFP_NOIO | __GFP_HIGH);
+ if (!info->rxs) {
+ ret = -ENOMEM;
+ xenbus_dev_fatal(xbdev, ret, "allocating rx ring page");
+ goto exit;
+ }
+
+ /* allocate shared with user page (info page) */
+ info->shared_info_page =
+ (struct xen_adapter_info *)__get_free_page(GFP_KERNEL);
+ if (!info->shared_info_page) {
+ pr_alert("xen_uio can't alloc shared page\n");
+ goto exit;
+ }
+
+ /* just assertion */
+ if (((char *)&info->shared_info_page->rxtx_grefs[RXTX_GREFS - 1])
+ - ((char *)info->shared_info_page) > PAGE_SIZE) {
+ pr_err("ASSERT: no mem for grefs\n");
+ goto exit;
+ }
+
+ /* allocate grefs for every tx ring and rx ring slot */
+ ret = gnttab_alloc_grant_references(RXTX_GREFS, &info->gref_rxtx_head);
+ if (ret < 0) {
+ pr_err("xen_uio can't alloc rx and tx grefs\n");
+ goto exit;
+ }
+
+ /* fill in all grefs*/
+ gref_rxtx_head = info->gref_rxtx_head;
+ info->shared_info_page->rx_grefs_count = RX_MAX_TARGET;
+ info->shared_info_page->tx_grefs_count = TX_MAX_TARGET;
+ info->shared_info_page->rx_evtchn = 0;
+ info->shared_info_page->tx_evtchn = 0;
+
+ /*go through the list and collect put all grefs to array*/
+ for (i = 0; i < (RXTX_GREFS); i++) {
+ gref = gnttab_claim_grant_reference(&gref_rxtx_head);
+ if (gref < 0) {
+ pr_err("not expected end of list\n");
+ goto exit;
+ }
+ info->shared_info_page->rxtx_grefs[i] = (grant_ref_t)gref;
+ }
+
+ /*setup shared_info_page*/
+ info->shared_info_page->rx_ring = &info->rx;
+ info->shared_info_page->tx_ring = &info->tx;
+ /*it's not secure - we need here something else*/
+ info->shared_info_page->info = info;
+
+ info->shared_info_page->is_connected = 0;
+ info->shared_info_page->disconnect_count = 0;
+
+ /* share struct by UIO */
+ ret = xennet_uio_init(xbdev, info);
+ if (ret) {
+ pr_err("xennet_uio_init failed\n");
+ goto exit;
+ }
+
+ return info;
+exit:
+ if (info) {
+ if (info->gref_rxtx_head != INVALID_GRANT_HANDLE)
+ gnttab_free_grant_references(info->gref_rxtx_head);
+ if (info->shared_info_page)
+ free_page((unsigned long)info->shared_info_page);
+ if (info->rxs)
+ free_page((unsigned long)info->rxs);
+ if (info->txs)
+ free_page((unsigned long)info->txs);
+ kfree(info);
+ }
+ return NULL;
+}
+
+void xennet_free_resources(struct xenbus_device *xbdev)
+{
+ struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+ xennet_uio_uninit(info);
+
+ gnttab_free_grant_references(info->gref_rxtx_head);
+
+ free_page((unsigned long)info->shared_info_page);
+ /*can be deferred free- in that case these pointers are NULL*/
+ if (info->rxs)
+ free_page((unsigned long)info->rxs);
+ if (info->txs)
+ free_page((unsigned long)info->txs);
+
+ kfree(info);
+}
+
+static int setup_netfront(struct xenbus_device *xbdev,
+ struct netfront_info *info)
+{
+ unsigned int feature_split_evtchn;
+ unsigned int max_queues;
+ grant_ref_t gref;
+ int err;
+
+ info->tx_ring_ref = INVALID_GRANT_HANDLE;
+ info->rx_ring_ref = INVALID_GRANT_HANDLE;
+ info->rx.sring = NULL;
+ info->tx.sring = NULL;
+
+ /* share otherend_id with user */
+ info->shared_info_page->otherend_id = xbdev->otherend_id;
+
+ err = xenbus_scanf(XBT_NIL, xbdev->otherend,
+ "multi-queue-max-queues", "%u", &max_queues);
+ if (err < 0)
+ max_queues = 1;
+
+ pr_info("multi-queue-max-queues: %u\n", max_queues);
+
+ err = xenbus_scanf(XBT_NIL, xbdev->otherend,
+ "feature-split-event-channels", "%u",
+ &feature_split_evtchn);
+ if (err < 0)
+ feature_split_evtchn = 0;
+
+ /* read mac */
+ err = xen_net_read_mac(xbdev, info->shared_info_page->mac);
+ if (err) {
+ xenbus_dev_fatal(xbdev, err, "parsing %s/mac",
+ xbdev->nodename);
+ goto fail;
+ }
+
+ /* set up queues */
+ SHARED_RING_INIT(info->txs);
+ FRONT_RING_INIT(&info->tx, info->txs, PAGE_SIZE);
+
+ SHARED_RING_INIT(info->rxs);
+ FRONT_RING_INIT(&info->rx, info->rxs, PAGE_SIZE);
+
+ err = xenbus_grant_ring(info->xbdev, info->txs, 1, &gref);
+ if (err < 0) {
+ pr_err("xenbus_grant_ring for txs failed!\n");
+ goto fail;
+ }
+ info->tx_ring_ref = gref;
+
+ err = xenbus_grant_ring(info->xbdev, info->rxs, 1, &gref);
+ if (err < 0) {
+ pr_err("xenbus_grant_ring for rxs failed!\n");
+ goto fail;
+ }
+ info->rx_ring_ref = gref;
+
+ /* alloc eventchn */
+ pr_info("feature_split_evtchn: %d\n",
+ (int)feature_split_evtchn);
+
+ err = xenbus_alloc_evtchn(xbdev, &info->shared_info_page->tx_evtchn);
+ if (err)
+ goto fail;
+
+ if (feature_split_evtchn) {
+ err = xenbus_alloc_evtchn(xbdev,
+ &info->shared_info_page->rx_evtchn);
+ if (err)
+ goto fail_split;
+ } else {
+ info->shared_info_page->rx_evtchn =
+ info->shared_info_page->tx_evtchn;
+ }
+
+ return 0;
+fail_split:
+ xenbus_free_evtchn(info->xbdev, info->shared_info_page->tx_evtchn);
+fail:
+ pr_err("setup_netfront failed\n");
+ return err;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_netback(struct xenbus_device *xbdev,
+ struct netfront_info *info)
+{
+ const char *message;
+ struct xenbus_transaction xbt;
+ int err;
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_netfront(xbdev, info);
+ if (err)
+ goto out;
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(xbdev, err, "starting transaction");
+ goto destroy_ring;
+ }
+
+ if (xenbus_exists(XBT_NIL, xbdev->otherend,
+ "multi-queue-max-queues")) {
+ /* Write the number of queues */
+ err = xenbus_printf(xbt, xbdev->nodename,
+ "multi-queue-num-queues", "%u", 1);
+ if (err) {
+ message = "writing multi-queue-num-queues";
+ goto abort_transaction;
+ }
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "tx-ring-ref",
+ "%u", info->tx_ring_ref);
+ if (err) {
+ message = "writing tx ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, xbdev->nodename, "rx-ring-ref",
+ "%u", info->rx_ring_ref);
+ if (err) {
+ message = "writing rx ring-ref";
+ goto abort_transaction;
+ }
+
+ if (info->shared_info_page->tx_evtchn ==
+ info->shared_info_page->rx_evtchn) {
+ err = xenbus_printf(xbt, xbdev->nodename, "event-channel",
+ "%u", info->shared_info_page->tx_evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+ } else {
+ err = xenbus_printf(xbt, xbdev->nodename, "event-channel-tx",
+ "%u", info->shared_info_page->tx_evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, xbdev->nodename, "event-channel-rx",
+ "%u", info->shared_info_page->rx_evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "request-rx-copy", "%u", 1);
+ if (err) {
+ message = "writing request-rx-copy";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "feature-rx-notify",
+ "%d", 1);
+ if (err) {
+ message = "writing feature-rx-notify";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "feature-sg", "%d", 1);
+ if (err) {
+ message = "writing feature-sg";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, xbdev->nodename, "feature-gso-tcpv4",
+ "%d", 0);
+ if (err) {
+ message = "writing feature-gso-tcpv4";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(xbdev, err, "completing transaction");
+ goto destroy_ring;
+ }
+
+ return 0;
+abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(xbdev, err, "%s", message);
+destroy_ring:
+ xennet_disconnect_backend(info, 1);
+out:
+ pr_err("talk_to_netback failed\n");
+ return err;
+}
+
+static int xennet_connect_backend(struct netfront_info *info)
+{
+ int err;
+ unsigned int feature_rx_copy;
+
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, "feature-rx-copy",
+ "%u", &feature_rx_copy);
+ if (err != 1)
+ feature_rx_copy = 0;
+
+ if (!feature_rx_copy) {
+ pr_info("backend does not support copying receive path\n");
+ return -ENODEV;
+ }
+
+ err = talk_to_netback(info->xbdev, info);
+ if (err)
+ pr_err("talk_to_netback failed!\n");
+
+ info->shared_info_page->is_connected = 1;
+
+ return err;
+}
+
+static void xennet_disconnect_backend(struct netfront_info *info,
+ int deffered_free)
+{
+ xenbus_switch_state(info->xbdev, XenbusStateClosing);
+
+ if (info->shared_info_page->tx_evtchn !=
+ info->shared_info_page->rx_evtchn) {
+ xenbus_free_evtchn(info->xbdev,
+ info->shared_info_page->rx_evtchn);
+ }
+ xenbus_free_evtchn(info->xbdev, info->shared_info_page->tx_evtchn);
+
+ if (deffered_free) {
+ xennet_end_access(info->tx_ring_ref, info->txs);
+ xennet_end_access(info->rx_ring_ref, info->rxs);
+ info->txs = NULL;
+ info->rxs = NULL;
+ } else {
+ xennet_end_access(info->tx_ring_ref, NULL);
+ xennet_end_access(info->rx_ring_ref, NULL);
+ }
+
+ info->tx_ring_ref = INVALID_GRANT_HANDLE;
+ info->rx_ring_ref = INVALID_GRANT_HANDLE;
+ info->rx.sring = NULL;
+ info->tx.sring = NULL;
+
+ info->shared_info_page->is_connected = 0;
+ info->shared_info_page->disconnect_count++;
+}
+
+struct xenbus_backend_state_adapter {
+ struct xenbus_watch watch;
+ int state;
+ struct xenbus_device *xbdev;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
+
+static void xenbus_backend_state_changed(struct xenbus_watch *xbw,
+ const char **vec, unsigned int len)
+{
+ struct xenbus_backend_state_adapter *adapter =
+ container_of(xbw, struct xenbus_backend_state_adapter, watch);
+ struct xenbus_device *xbdev = adapter->xbdev;
+
+ xenbus_scanf(XBT_NIL, vec[XS_WATCH_PATH], "", "%i", &adapter->state);
+ dev_dbg(&xbdev->dev, "backend %s %s\n", vec[XS_WATCH_PATH],
+ xenbus_strstate(adapter->state));
+ wake_up(&backend_state_wq);
+}
+
+static void xenbus_wait_for_backend_state(
+ struct xenbus_backend_state_adapter *adapter, int expected)
+{
+ struct xenbus_device *xbdev = adapter->xbdev;
+ long timeout;
+
+ timeout = wait_event_interruptible_timeout(backend_state_wq,
+ adapter->state == expected,
+ 5 * HZ);
+ if (timeout <= 0)
+ dev_info(&xbdev->dev, "backend %s timed out\n",
+ xbdev->otherend);
+}
+
+/*
+ * Lets move through XenbusStateClosing due to bugs in other xen_netfront
+ * implementations that move directly from XenbusStateConnected to
+ * XenbusStateClosed.
+ */
+static int
+xennet_reconnect_frontend(struct xenbus_device *xbdev)
+{
+ struct xenbus_backend_state_adapter adapter = {
+ .state = XenbusStateUnknown,
+ .xbdev = xbdev,
+ };
+ int err;
+
+ dev_dbg(&xbdev->dev, "%s: reconnecting to backend %s\n", __func__,
+ xbdev->otherend);
+
+ err = xenbus_watch_pathfmt(xbdev, &adapter.watch,
+ xenbus_backend_state_changed,
+ "%s/state", xbdev->otherend);
+ if (err)
+ return err;
+
+ xenbus_switch_state(xbdev, XenbusStateClosing);
+ xenbus_wait_for_backend_state(&adapter, XenbusStateClosing);
+
+ xenbus_switch_state(xbdev, XenbusStateClosed);
+ xenbus_wait_for_backend_state(&adapter, XenbusStateClosed);
+
+ xenbus_switch_state(xbdev, XenbusStateInitialising);
+ xenbus_wait_for_backend_state(&adapter, XenbusStateInitWait);
+
+ unregister_xenbus_watch(&adapter.watch);
+ dev_info(&xbdev->dev, "reconnect done on %s\n", xbdev->otherend);
+ kfree(adapter.watch.node);
+ return 0;
+}
+
+/**
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those.
+ */
+static int xennet_probe(struct xenbus_device *xbdev,
+ const struct xenbus_device_id *id)
+{
+ struct netfront_info *info;
+ int backend_state = XenbusStateUnknown;
+ int err;
+
+ err = xennet_reconnect_frontend(xbdev);
+ if (err)
+ return err;
+
+ err = xenbus_scanf(XBT_NIL, xbdev->otherend, "state", "%i",
+ &backend_state);
+ if (err != 1)
+ backend_state = XenbusStateUnknown;
+
+ if (backend_state != XenbusStateInitWait) {
+ dev_err(&xbdev->dev, "%s, stuck in state %s\n",
+ xbdev->nodename, xenbus_strstate(backend_state));
+ return -ENODEV;
+ }
+
+ info = xennet_alloc_resources(xbdev);
+ dev_set_drvdata(&xbdev->dev, info);
+ return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart. We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int xennet_resume(struct xenbus_device *xbdev)
+{
+ struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+ pr_devel("%s\n", xbdev->nodename);
+
+ /*we can use the same memory region - disable deffered free*/
+ xennet_disconnect_backend(info, 0);
+
+ return 0;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void netback_changed(struct xenbus_device *xbdev,
+ enum xenbus_state backend_state)
+{
+ struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+ pr_devel("%s\n", xenbus_strstate(backend_state));
+
+ switch (backend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitialised:
+ case XenbusStateReconfiguring:
+ case XenbusStateReconfigured:
+ break;
+ case XenbusStateUnknown:
+ break;
+
+ case XenbusStateInitWait:
+ if (xbdev->state != XenbusStateInitialising)
+ break;
+ if (xennet_connect_backend(info) != 0) {
+ pr_err("%s\n", xbdev->nodename);
+ break;
+ }
+ xenbus_switch_state(xbdev, XenbusStateConnected);
+ break;
+
+ case XenbusStateConnected:
+ break;
+
+ case XenbusStateClosed:
+ if (xbdev->state == XenbusStateClosed) {
+ xenbus_switch_state(xbdev, XenbusStateInitialising);
+ break;
+ }
+
+ case XenbusStateClosing:
+ xenbus_frontend_closed(xbdev);
+ break;
+ }
+}
+
+static const struct xenbus_device_id netfront_ids[] = {
+ { "vif" },
+ { "" }
+};
+
+static int xennet_remove(struct xenbus_device *xbdev)
+{
+ struct netfront_info *info = dev_get_drvdata(&xbdev->dev);
+
+ pr_devel("%s\n", xbdev->nodename);
+
+ xennet_disconnect_backend(info, 1);
+
+ xennet_free_resources(xbdev);
+
+ return 0;
+}
+
+static struct xenbus_driver xenuio_driver = {
+ .ids = netfront_ids,
+ .probe = xennet_probe,
+ .remove = xennet_remove,
+ .resume = xennet_resume,
+ .otherend_changed = netback_changed,
+#ifndef DEFINE_XENBUS_DRIVER
+ .name = "xen_uio",
+#endif
+ .driver = {
+ .name = "xen_uio",
+ },
+};
+
+/*operations that we can't do through the shared memory*/
+static long xennet_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg) {
+ int rc;
+ void __user *uarg = (void __user *) arg;
+
+ switch (cmd) {
+ case IOCTL_EVTCHN_NOTIFY:
+ {
+ struct ioctl_evtchn_notify notify;
+
+ rc = -EFAULT;
+ if (copy_from_user(¬ify, uarg, sizeof(notify)))
+ break;
+ notify_remote_via_evtchn(notify.port);
+ rc = 0;
+ }
+ break;
+ case IOCTL_EVTCHN_NOTIFY_GRANT:
+ {
+ uint16_t i;
+ int notify;
+ struct ioctl_evtchn_notify_grant *ng;
+
+ rc = -EFAULT;
+
+ if (access_ok(VERIFY_READ, uarg, sizeof(ng)))
+ ng = uarg;
+ else
+ break;
+
+ for (i = 0; i < ng->rel_count; i++) {
+ gnttab_end_foreign_access_ref(
+ ng->rel_gref[i],
+ (ng->is_rx ? 0 : GNTMAP_readonly));
+ }
+
+ if (ng->count) {
+ union {
+ struct xen_netif_rx_front_ring *rx;
+ struct xen_netif_tx_front_ring *tx;
+ } ring;
+
+ for (i = 0; i < ng->count; i++) {
+ gnttab_grant_foreign_access_ref(
+ ng->s[i].gref,
+ ng->otherend_id,
+ pfn_to_mfn(ng->s[i].paddr),
+ (ng->is_rx ? 0 :
+ GNTMAP_readonly));
+ }
+
+ if (ng->is_rx) {
+ ring.rx = ng->u.rx_ring;
+ if (&ng->info->rx != ring.rx) {
+ pr_err(
+ "bad info or rx ring addr\n");
+ return -EINVAL;
+ }
+ ring.rx->req_prod_pvt += ng->count;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(
+ ring.rx, notify);
+ } else {
+ ring.tx = ng->u.tx_ring;
+ if (&ng->info->tx != ring.tx) {
+ pr_err(
+ "bad info or tx ring addr\n");
+ return -EINVAL;
+ }
+ ring.tx->req_prod_pvt += ng->count;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(
+ ring.tx, notify);
+ }
+
+ if (notify)
+ notify_remote_via_evtchn(ng->port);
+ }
+
+ rc = 0;
+ }
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ return rc;
+}
+
+static const struct file_operations xennet_fops = {
+ .owner = THIS_MODULE,
+ .read = NULL/*xennet_read*/,
+ .write = NULL/*xennet_write*/,
+ .unlocked_ioctl = xennet_ioctl,
+ .poll = NULL/*xennet_poll*/,
+ .fasync = NULL/*xennet_fasync*/,
+ .open = NULL/*xennet_open*/,
+ .mmap = NULL/*xennet_mmap*/,
+ .release = NULL/*xennet_release*/,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice xennet_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = XEN_PMD_UIO_NAME,
+ .fops = &xennet_fops,
+};
+
+static ssize_t read_domain(struct file *f, char __user *buf,
+ size_t count, loff_t *off)
+{
+ if (count > domain_len)
+ count = domain_len;
+
+ if (copy_to_user(buf, domain_name, count))
+ return -EFAULT;
+
+ domain_len = (count ? domain_len - count : sizeof(domain_name));
+
+ return count;
+}
+
+static const struct file_operations domain_fops = {
+ .owner = THIS_MODULE,
+ .read = read_domain,
+};
+
+static int __init netif_init(void)
+{
+ int err;
+
+ if (!xen_domain()) {
+ pr_err("xen bare hw\n");
+ return -ENODEV;
+ }
+
+ pr_info("xen %s domain\n", domains[xen_domain_type]);
+
+ snprintf(domain_name, sizeof(domain_name),
+ "%s\n", domains[xen_domain_type]);
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ pr_info("feature auto_translated_physmap is disabled\n");
+
+ pr_info("gnttab version: %d\n", (int)__gnttab_version());
+
+ domain_proc = proc_create(DOMAIN_PROC, S_IRUGO, NULL, &domain_fops);
+ if (domain_proc == NULL) {
+ pr_err("could not create /proc/%s\n", DOMAIN_PROC);
+ return -ENOMEM;
+ }
+
+ pr_info("/proc/%s created\n", DOMAIN_PROC);
+
+ err = misc_register(&xennet_miscdev);
+ if (err != 0) {
+ pr_err("could not register char device\n");
+ return err;
+ }
+
+ pr_info("initialising xen virtual ethernet driver\n");
+
+ err = xenbus_register_frontend(&xenuio_driver);
+
+ return err;
+}
+module_init(netif_init);
+
+static void __exit netif_exit(void)
+{
+ remove_proc_entry(DOMAIN_PROC, NULL);
+
+ xenbus_unregister_driver(&xenuio_driver);
+
+ misc_deregister(&xennet_miscdev);
+}
+module_exit(netif_exit);
+
+MODULE_DESCRIPTION("Xen virtual network device frontend");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:vif");
+MODULE_ALIAS("xennet");
--
2.5.5
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH v3 2/3] xen: Add netfront poll mode driver
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Jan Blunck
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver Jan Blunck
@ 2016-03-22 9:55 ` Jan Blunck
2016-03-22 10:07 ` David Marchand
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 3/3] xen: Add documentation Jan Blunck
` (2 subsequent siblings)
4 siblings, 1 reply; 23+ messages in thread
From: Jan Blunck @ 2016-03-22 9:55 UTC (permalink / raw)
To: dev; +Cc: jblunck, shemming, Stephen Hemminger
This implements a poll mode driver that has the same functionality as
the Xen netfront driver in the Linux kernel.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Jan Blunck <jblunck@infradead.org>
---
config/common_base | 6 +
drivers/net/Makefile | 1 +
drivers/net/xen/Makefile | 30 ++
drivers/net/xen/uio.c | 245 ++++++++++++
drivers/net/xen/uio.h | 54 +++
drivers/net/xen/xen_adapter_info.h | 64 ++++
drivers/net/xen/xen_dev.c | 489 ++++++++++++++++++++++++
drivers/net/xen/xen_dev.h | 30 ++
drivers/net/xen/xen_logs.h | 19 +
drivers/net/xen/xen_rxtx.c | 757 +++++++++++++++++++++++++++++++++++++
drivers/net/xen/xen_rxtx.h | 131 +++++++
11 files changed, 1826 insertions(+)
create mode 100644 drivers/net/xen/Makefile
create mode 100644 drivers/net/xen/uio.c
create mode 100644 drivers/net/xen/uio.h
create mode 100644 drivers/net/xen/xen_adapter_info.h
create mode 100644 drivers/net/xen/xen_dev.c
create mode 100644 drivers/net/xen/xen_dev.h
create mode 100644 drivers/net/xen/xen_logs.h
create mode 100644 drivers/net/xen/xen_rxtx.c
create mode 100644 drivers/net/xen/xen_rxtx.h
diff --git a/config/common_base b/config/common_base
index dbd405b..36e4b59 100644
--- a/config/common_base
+++ b/config/common_base
@@ -306,6 +306,12 @@ CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n
CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
#
+# Compile XEN UIO net-front PMD driver
+#
+CONFIG_RTE_LIBRTE_PMD_XEN=n
+CONFIG_RTE_LIBRTE_PMD_XEN_DEBUG_INIT=n
+
+#
# Compile null PMD
#
CONFIG_RTE_LIBRTE_PMD_NULL=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 0c3393f..003e51b 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,5 +51,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += xen
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/drivers/net/xen/Makefile b/drivers/net/xen/Makefile
new file mode 100644
index 0000000..9e75157
--- /dev/null
+++ b/drivers/net/xen/Makefile
@@ -0,0 +1,30 @@
+#
+# Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+# All rights reserved.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_xen.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+VPATH += $(RTE_SDK)/drivers/net/xen
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += uio.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += xen_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += xen_rxtx.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN) += lib/librte_net lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/xen/uio.c b/drivers/net/xen/uio.c
new file mode 100644
index 0000000..54e10b9
--- /dev/null
+++ b/drivers/net/xen/uio.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/types.h>
+
+#include <rte_log.h>
+
+#include "uio.h"
+
+#define SYSFS_DEVICES_PATH "/sys/devices"
+#define SYSFS_CLASS_UIO_PATH "/sys/class/uio"
+
+int
+sysfs_device_get_uio_num(const char *device)
+{
+ DIR *dir;
+ struct dirent *e;
+ char path[PATH_MAX];
+ unsigned long uio_num;
+
+ snprintf(path, sizeof(path), "%s/%s/uio", SYSFS_DEVICES_PATH, device);
+ dir = opendir(path);
+ if (!dir)
+ return -errno;
+
+ while ((e = readdir(dir)) != NULL) {
+ char *endptr;
+
+ if (strncmp(e->d_name, "uio", 3) != 0)
+ continue;
+
+ uio_num = strtoul(e->d_name + 3, &endptr, 10);
+ if (endptr == e->d_name || *endptr != '\0' ||
+ uio_num == ULONG_MAX)
+ continue;
+
+ RTE_LOG(DEBUG, PMD, "%s uio_num = %lu\n", device, uio_num);
+ break;
+ }
+
+ closedir(dir);
+
+ if (!e)
+ return -ENODEV;
+
+ if (uio_num > 255)
+ return -EINVAL;
+
+ return (int)uio_num;
+}
+
+static int
+sysfs_get_buffer(const char *filename, char *buf, size_t bufsize)
+{
+ FILE *f;
+ char *ptr;
+
+ f = fopen(filename, "r");
+ if (!f) {
+ RTE_LOG(ERR, EAL, "cannot open sysfs file %s\n", filename);
+ return -1;
+ }
+
+ ptr = fgets(buf, bufsize, f);
+ fclose(f);
+ if (!ptr) {
+ RTE_LOG(ERR, EAL, "cannot read sysfs file %s\n", filename);
+ return -1;
+ }
+
+ /* Jump to the end (on success fgets adds a terminating null byte)
+ * and eat the trailing newline.
+ */
+ ptr += strlen(ptr) - 1;
+ if (*ptr == '\n')
+ *ptr = '\0';
+
+ return 0;
+}
+
+static int
+sysfs_get_value(const char *filename, uint64_t *val)
+{
+ char buf[BUFSIZ];
+ char *end = NULL;
+
+ if (sysfs_get_buffer(filename, buf, sizeof(buf)) < 0)
+ return -1;
+
+ *val = strtoull(buf, &end, 0);
+ if ((buf[0] == '\0') || !end || (*end != '\0')) {
+ RTE_LOG(ERR, EAL, "cannot parse sysfs value %s\n", filename);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+sysfs_uio_get_info(struct uio_resource *uio)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), SYSFS_CLASS_UIO_PATH "/uio%u/name",
+ uio->idx);
+ if (sysfs_get_buffer(path, uio->name, ARRAY_SIZE(uio->name)))
+ return -ENODEV;
+
+ snprintf(path, sizeof(path), SYSFS_CLASS_UIO_PATH "/uio%u/version",
+ uio->idx);
+ if (sysfs_get_buffer(path, uio->version, ARRAY_SIZE(uio->version)))
+ return -ENODEV;
+
+ return 0;
+}
+
+#define OFF_MAX ((uint64_t)(off_t)-1)
+static ssize_t
+__uio_get_mappings(const char *name, struct uio_map maps[], size_t nb_maps)
+{
+ size_t i;
+ char dirname[PATH_MAX];
+ char filename[PATH_MAX];
+ uint64_t offset, size;
+
+ for (i = 0; i < nb_maps; i++) {
+ snprintf(dirname, sizeof(dirname), "%s/maps/map%zu", name, i);
+
+ if (access(dirname, F_OK) != 0)
+ break;
+
+ snprintf(filename, sizeof(filename), "%s/offset", dirname);
+ if (sysfs_get_value(filename, &offset) < 0) {
+ RTE_LOG(ERR, EAL, "cannot parse offset of %s\n",
+ dirname);
+ return -1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s/size", dirname);
+ if (sysfs_get_value(filename, &size) < 0) {
+ RTE_LOG(ERR, EAL, "cannot parse size of %s\n", dirname);
+ return -1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s/addr", dirname);
+ if (sysfs_get_value(filename, &maps[i].phaddr) < 0) {
+ RTE_LOG(ERR, EAL, "cannot parse addr of %s\n", dirname);
+ return -1;
+ }
+
+ if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+ RTE_LOG(ERR, EAL,
+ "offset/size exceed system max value\n");
+ return -1;
+ }
+
+ maps[i].addr = NULL;
+ maps[i].offset = offset;
+ maps[i].size = size;
+ }
+
+ return i;
+}
+
+int
+sysfs_uio_get_mappings(struct uio_resource *uio)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), SYSFS_CLASS_UIO_PATH "/uio%u", uio->idx);
+ uio->nb_maps = __uio_get_mappings(path, uio->maps,
+ ARRAY_SIZE(uio->maps));
+ return uio->nb_maps;
+}
+
+int
+uio_map_addresses(struct uio_resource *uio, unsigned int max_addresses)
+{
+ char path[PATH_MAX];
+ unsigned int j;
+ int fd;
+ const uint64_t pagesz = sysconf(_SC_PAGESIZE);
+
+ snprintf(path, sizeof(path), "/dev/uio%u", uio->idx);
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, PMD, "%s can't open file descriptor: %s\n",
+ path, strerror(errno));
+ return -errno;
+ }
+
+ for (j = 0; j < uio->nb_maps && j < max_addresses; j++) {
+ struct uio_map *map = &uio->maps[j];
+
+ map->addr = mmap(NULL, map->size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, j * pagesz);
+ if (map->addr == MAP_FAILED) {
+ RTE_LOG(ERR, PMD,
+ "%s mmap map%u (%ld@0x%lx) failed (%s)\n",
+ path, j, map->size, map->offset,
+ strerror(errno));
+ close(fd);
+ return -errno;
+ }
+
+ RTE_LOG(DEBUG, PMD, "%s mmap map%u (%ld@0x%lx) to %p\n",
+ path, j, map->size, map->offset, map->addr);
+ }
+
+ close(fd);
+ return 0;
+}
+
+void
+uio_unmap_addresses(struct uio_resource *uio, unsigned int max_addresses)
+{
+ char path[PATH_MAX];
+ unsigned int j;
+
+ snprintf(path, sizeof(path), "/dev/uio%u", uio->idx);
+
+ for (j = 0; j < uio->nb_maps && j < max_addresses; ++j) {
+ struct uio_map *map = &uio->maps[j];
+
+ if (!map->addr || map->addr == MAP_FAILED)
+ continue;
+
+ munmap(map->addr, map->size);
+ RTE_LOG(DEBUG, PMD, "%s munmap map%u (%ld@0x%lx) to %p\n",
+ path, j, map->size, map->offset, map->addr);
+ }
+}
diff --git a/drivers/net/xen/uio.h b/drivers/net/xen/uio.h
new file mode 100644
index 0000000..737b984
--- /dev/null
+++ b/drivers/net/xen/uio.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _DRIVERS_NET_XEN_UIO_H_
+#define _DRIVERS_NET_XEN_UIO_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define MAX_UIO_NAME 64
+
+/* from include/linux/uio_driver.h */
+#ifndef MAX_UIO_MAPS
+#define MAX_UIO_MAPS 5
+#endif
+
+struct uio_map {
+ void *addr;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t phaddr;
+};
+
+struct uio_resource {
+ unsigned int idx;
+ char name[MAX_UIO_NAME];
+ char version[MAX_UIO_NAME];
+ size_t nb_maps;
+ struct uio_map maps[MAX_UIO_MAPS];
+};
+
+int sysfs_device_get_uio_num(const char *device);
+int sysfs_uio_get_info(struct uio_resource *uio);
+int sysfs_uio_get_mappings(struct uio_resource *uio);
+
+int uio_map_addresses(struct uio_resource *uio, unsigned int max_addresses);
+void uio_unmap_addresses(struct uio_resource *uio, unsigned int max_addresses);
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+static inline void *
+uio_get_map_addr(struct uio_resource *uio, unsigned int idx)
+{
+ if (idx >= uio->nb_maps)
+ return NULL;
+
+ return (char *)uio->maps[idx].addr + uio->maps[idx].offset;
+}
+
+#endif /* _DRIVERS_NET_XEN_UIO_H_ */
diff --git a/drivers/net/xen/xen_adapter_info.h b/drivers/net/xen/xen_adapter_info.h
new file mode 100644
index 0000000..80f918d
--- /dev/null
+++ b/drivers/net/xen/xen_adapter_info.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef XEN_ADAPTER_INFO_H_
+#define XEN_ADAPTER_INFO_H_
+
+#define MAX_TARGET 256
+
+#define IOCTL_EVTCHN_NOTIFY_GRANT 7
+
+struct gref_addr {
+ grant_ref_t gref;
+ unsigned long paddr;
+};
+
+struct ioctl_evtchn_notify_grant {
+ unsigned int port;
+ int otherend_id;
+ uint16_t count;
+ uint8_t is_rx;
+ union {
+ struct xen_netif_rx_front_ring *rx_ring;
+ struct xen_netif_tx_front_ring *tx_ring;
+ } u;
+ struct netfront_info *info;
+ uint16_t rel_count;
+ grant_ref_t rel_gref[MAX_TARGET];
+ struct gref_addr s[MAX_TARGET];
+};
+
+#define XEN_PMD_UIO_NAME "xen/pmd_uio"
+
+enum {
+ INFO_MAP = 0,
+ RX_RING_MAP,
+ TX_RING_MAP,
+ XEN_MAP_MAX
+};
+
+struct xen_adapter_info {
+ /*global parameters */
+ struct xen_netif_rx_front_ring *rx_ring;
+ struct xen_netif_tx_front_ring *tx_ring;
+ struct netfront_info *info;
+
+ uint8_t is_connected;
+ uint8_t disconnect_count;
+
+ /*adapter specific data*/
+ int otherend_id;
+ unsigned int rx_evtchn;
+ unsigned int tx_evtchn;
+ u_int8_t mac[6];
+
+ /*params of grefs array*/
+ uint16_t rx_grefs_count;
+ uint16_t tx_grefs_count;
+ /* this field has to be the last */
+ grant_ref_t rxtx_grefs[];
+};
+
+#endif /* XEN_ADAPTER_INFO_H_ */
diff --git a/drivers/net/xen/xen_dev.c b/drivers/net/xen/xen_dev.c
new file mode 100644
index 0000000..e32255e
--- /dev/null
+++ b/drivers/net/xen/xen_dev.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "xen_logs.h"
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include <xen/sys/evtchn.h>
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#define XEN_MAX_RX_PKTLEN 0xFFFF
+#define XEN_MIN_RX_BUFSIZE (2 * PAGE_SIZE)
+
+#define XEN_DEV_PATH "/sys/bus/xen/devices"
+
+static const char *drivername = "xen-netfront PMD";
+
+static int xen_evt_fd = -1;
+
+int
+xen_evtchn_notify_grant_rxtx(struct ioctl_evtchn_notify_grant *ng)
+{
+ int rc;
+
+ if (unlikely(xen_evt_fd < 0))
+ return -1;
+
+ rc = ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY_GRANT, ng);
+ if (rc == -1)
+ rc = errno;
+
+ return rc;
+}
+
+static int
+xen_evtchn_notify_rxtx(unsigned int evtchn)
+{
+ struct ioctl_evtchn_notify notify = { .port = evtchn };
+ int rc;
+
+ if (unlikely(xen_evt_fd < 0))
+ return -1;
+
+ rc = ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY, ¬ify);
+ if (rc == -1)
+ rc = errno;
+
+ return rc;
+}
+
+static int
+xen_evtchn_notify(struct xen_adapter *xa)
+{
+ int res = 0;
+
+ res += xen_evtchn_notify_rxtx(xa->info_page->tx_evtchn);
+
+ if (xa->info_page->tx_evtchn != xa->info_page->rx_evtchn)
+ res += xen_evtchn_notify_rxtx(xa->info_page->rx_evtchn);
+
+ return res;
+}
+
+static void
+xen_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+ unsigned i;
+
+ PMD_INIT_FUNC_TRACE();
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+ dev->data->tx_queues[i])->tx_stats;
+ if (!txs)
+ continue;
+
+ stats->opackets += txs->opackets;
+ stats->obytes += txs->obytes;
+ stats->oerrors += txs->oerrors;
+ }
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+ dev->data->rx_queues[i])->rx_stats;
+ if (!rxs)
+ continue;
+
+ stats->ipackets += rxs->ipackets;
+ stats->ierrors += rxs->ierrors;
+ stats->ibytes += rxs->ibytes;
+ }
+}
+
+static void
+xen_dev_stats_reset(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ PMD_INIT_FUNC_TRACE();
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+ dev->data->tx_queues[i])->tx_stats;
+ if (!txs)
+ continue;
+
+ txs->opackets = 0;
+ txs->obytes = 0;
+ txs->oerrors = 0;
+ }
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+ dev->data->rx_queues[i])->rx_stats;
+ if (!rxs)
+ continue;
+
+ rxs->ipackets = 0;
+ rxs->ibytes = 0;
+ rxs->ierrors = 0;
+ }
+}
+
+static void
+xen_dev_info_get(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *dev_info)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ dev_info->max_mac_addrs = 1;
+ dev_info->max_rx_pktlen = XEN_MAX_RX_PKTLEN;
+ dev_info->max_rx_queues = dev->data->nb_rx_queues;
+ dev_info->max_tx_queues = dev->data->nb_tx_queues;
+ dev_info->min_rx_bufsize = XEN_MIN_RX_BUFSIZE;
+}
+
+static int
+xen_dev_configure(struct rte_eth_dev *dev)
+{
+ const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+ struct xen_adapter *xa = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+
+ if (rxmode->hw_ip_checksum) {
+ RTE_LOG(ERR, PMD, "HW IP checksum not supported");
+ return -EINVAL;
+ }
+
+ xa->vlan_strip = rxmode->hw_vlan_strip;
+
+ return 0;
+}
+
+static void
+xen_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+ PMD_INIT_FUNC_TRACE();
+}
+
+/**
+ * Atomically writes the link status information into global
+ * structure rte_eth_dev.
+ *
+ * @param dev
+ * - Pointer to the structure rte_eth_dev to read from.
+ * - Pointer to the buffer to be saved with the link status.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, negative value.
+ */
+static inline int
+xen_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+ struct rte_eth_link *link)
+{
+ struct rte_eth_link *dst = &dev->data->dev_link;
+ struct rte_eth_link *src = link;
+
+ if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+ *(uint64_t *)src) == 0)
+ return -1;
+
+ return 0;
+}
+
+static int
+xen_dev_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete __rte_unused)
+{
+ struct rte_eth_link link;
+
+ PMD_INIT_FUNC_TRACE();
+
+ link.link_status = 1;
+ link.link_speed = ETH_LINK_SPEED_10G;
+ link.link_duplex = ETH_LINK_FULL_DUPLEX;
+
+ xen_dev_atomic_write_link_status(dev, &link);
+
+ return 0;
+}
+
+static int
+xen_dev_start(struct rte_eth_dev *dev)
+{
+ struct xen_adapter *xa = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+
+ xen_dev_link_update(dev, 0);
+
+ xen_evtchn_notify(xa);
+
+ return 0;
+}
+
+static void
+xen_dev_stop(struct rte_eth_dev *dev __rte_unused)
+{
+ PMD_INIT_FUNC_TRACE();
+}
+
+static int
+wait_uio_init(uint8_t *state, const uint32_t timeout)
+{
+ uint32_t i;
+
+ for (i = 0; i < timeout * 10; i++) {
+ if (*state)
+ return 0;
+ usleep(100000);
+ }
+
+ return -1;
+}
+
+static struct eth_dev_ops xen_eth_dev_ops = {
+ /*dev*/
+ .dev_configure = xen_dev_configure,
+ .dev_close = xen_dev_close,
+ .dev_start = xen_dev_start,
+ .dev_stop = xen_dev_stop,
+ .dev_infos_get = xen_dev_info_get,
+ .link_update = xen_dev_link_update,
+ /*rxtx*/
+ .stats_get = xen_dev_stats_get,
+ .stats_reset = xen_dev_stats_reset,
+ .rx_queue_setup = xen_dev_rx_queue_setup,
+ .rx_queue_release = xen_dev_rx_queue_release,
+ .tx_queue_setup = xen_dev_tx_queue_setup,
+ .tx_queue_release = xen_dev_tx_queue_release,
+};
+
+static int
+xen_uio_connect_netback(const char *name, struct xen_adapter *xa)
+{
+ int err;
+
+ err = uio_map_addresses(&xa->uio, XEN_MAP_MAX);
+ if (err) {
+ RTE_LOG(ERR, PMD, "%s mapping info_page failed (%d)\n", name,
+ err);
+ return -EINVAL;
+ }
+
+ xa->info_page = uio_get_map_addr(&xa->uio, INFO_MAP);
+
+ if (wait_uio_init(&xa->info_page->is_connected, 8)) {
+ RTE_LOG(ERR, PMD, "%s no connection to xen_netback\n", name);
+ uio_unmap_addresses(&xa->uio, XEN_MAP_MAX);
+ return -ENODEV;
+ }
+
+ PMD_INIT_LOG(DEBUG, "%s rx:%d, rx_evtchn:%d, tx:%d, tx_evtchn:%d\n",
+ name, (int)xa->info_page->rx_grefs_count,
+ (int)xa->info_page->rx_evtchn,
+ (int)xa->info_page->tx_grefs_count,
+ (int)xa->info_page->tx_evtchn);
+
+ return 0;
+}
+
+static int
+xen_dev_create(const char *name, const unsigned node)
+{
+ int ret = -ENOMEM;
+ struct xen_adapter *internals = NULL;
+ struct rte_eth_dev *eth_dev = NULL;
+ struct rte_eth_dev_data *data = NULL;
+
+ if (!name)
+ return -EINVAL;
+
+ RTE_LOG(INFO, PMD, "%s Creating %s ethdev on socket %u\n", name,
+ drivername, node);
+
+ internals = rte_zmalloc_socket(name, sizeof(*internals), 0, node);
+ if (!internals) {
+ RTE_LOG(ERR, PMD, "%s internals allocation has failed (%d)\n",
+ name, ENOMEM);
+ goto error;
+ }
+
+ ret = sysfs_device_get_uio_num(name);
+ if (ret < 0) {
+ RTE_LOG(ERR, PMD, "%s getting UIO number failed (%d)\n",
+ name, ret);
+ goto error;
+ }
+
+ internals->uio.idx = ret;
+
+ ret = sysfs_uio_get_info(&internals->uio);
+ if (ret < 0) {
+ RTE_LOG(ERR, PMD, "%s getting UIO info failed (%d)\n",
+ name, ret);
+ goto error;
+ }
+ if (strcmp(internals->uio.name, "xen_uio") != 0 ||
+ strcmp(internals->uio.version, "0.1") != 0) {
+ RTE_LOG(ERR, PMD, "%s invalid UIO name/version (%s/%s)\n",
+ name, internals->uio.name, internals->uio.version);
+ ret = -EINVAL;
+ goto error;
+ }
+
+ ret = sysfs_uio_get_mappings(&internals->uio);
+ if (ret < 0) {
+ RTE_LOG(ERR, PMD, "%s getting UIO mappings failed (%d)\n",
+ name, ret);
+ goto error;
+ }
+
+ eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+ if (!eth_dev) {
+ RTE_LOG(ERR, PMD, "%s eth_dev allocation has failed (%d)\n",
+ name, ENOMEM);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ /* use eth_dev->data allocated in rte_eth_dev_allocate() */
+ data = eth_dev->data;
+
+ data->dev_private = internals;
+ data->nb_rx_queues = (uint16_t)1;
+ data->nb_tx_queues = (uint16_t)1;
+ data->rx_mbuf_alloc_failed = 0;
+ data->mtu = ETHER_MTU;
+
+ eth_dev->dev_ops = &xen_eth_dev_ops;
+ eth_dev->data->kdrv = RTE_KDRV_NONE;
+ eth_dev->data->drv_name = drivername;
+ eth_dev->data->numa_node = node;
+ eth_dev->driver = NULL;
+
+ TAILQ_INIT(ð_dev->link_intr_cbs);
+
+ if (xen_uio_connect_netback(name, internals)) {
+ ret = -ENODEV;
+ goto uninit;
+ }
+
+ /* copy mac-addr */
+ data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0, node);
+ memcpy(&data->mac_addrs->addr_bytes[0],
+ &internals->info_page->mac[0], ETHER_ADDR_LEN);
+
+ PMD_INIT_LOG(DEBUG, "%s MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", name,
+ data->mac_addrs->addr_bytes[0],
+ data->mac_addrs->addr_bytes[1],
+ data->mac_addrs->addr_bytes[2],
+ data->mac_addrs->addr_bytes[3],
+ data->mac_addrs->addr_bytes[4],
+ data->mac_addrs->addr_bytes[5]);
+
+ return 0;
+
+uninit:
+ rte_eth_dev_release_port(eth_dev);
+error:
+ rte_free(internals);
+ return ret;
+}
+
+static const char *
+sysfs_get_device_driver(const char *device_path, char *buf)
+{
+ char *driver;
+ int count;
+
+ count = readlink(device_path, buf, PATH_MAX);
+ if (count >= PATH_MAX)
+ return NULL;
+
+ /* device doesn't have a driver */
+ if (count < 0)
+ return "";
+
+ buf[count] = '\0';
+ driver = strrchr(buf, '/');
+ if (driver)
+ ++driver;
+ else
+ driver = buf;
+
+ return driver;
+}
+
+static int
+rte_xen_netfront_init(const char *name __rte_unused,
+ const char *args __rte_unused)
+{
+ struct dirent *e;
+ DIR *dir;
+ char dirname[PATH_MAX];
+ unsigned int devices = 0;
+
+ PMD_INIT_FUNC_TRACE();
+
+ xen_evt_fd = open("/dev/" XEN_PMD_UIO_NAME, O_RDWR);
+
+ if (xen_evt_fd == -1) {
+ if (errno != ENOENT)
+ RTE_LOG(ERR, PMD, "cannot open event device %s",
+ "/dev/" XEN_PMD_UIO_NAME);
+ return -1;
+ }
+
+ dir = opendir(XEN_DEV_PATH);
+ if (!dir) {
+ RTE_LOG(ERR, PMD, "%s(): opendir failed: %s\n", __func__,
+ strerror(errno));
+ return -1;
+ }
+
+ while ((e = readdir(dir)) != NULL) {
+ unsigned int devid;
+ char buf[PATH_MAX];
+ const char *driver;
+
+ if (e->d_name[0] == '.')
+ continue;
+
+ if (sscanf(e->d_name, "vif-%d", &devid) != 1)
+ continue;
+
+ snprintf(dirname, sizeof(dirname), "%s/%s/driver",
+ XEN_DEV_PATH, e->d_name);
+ driver = sysfs_get_device_driver(dirname, buf);
+ if (!driver)
+ continue;
+
+ /* only interested in devices bound to our uio kmod */
+ if (strcmp(driver, "xen_uio")) {
+ RTE_LOG(DEBUG, PMD,
+ "%s skipping device with driver %s\n",
+ e->d_name, driver);
+ continue;
+ }
+
+ if (xen_dev_create(e->d_name, rte_socket_id()) < 0)
+ continue;
+
+ devices++;
+ }
+
+ closedir(dir);
+ return devices ? 0 : -1;
+}
+
+static struct rte_driver rte_xen_netfront_driver = {
+ .name = "rte_xen_netfront",
+ .type = PMD_PDEV,
+ .init = rte_xen_netfront_init,
+};
+
+PMD_REGISTER_DRIVER(rte_xen_netfront_driver);
diff --git a/drivers/net/xen/xen_dev.h b/drivers/net/xen/xen_dev.h
new file mode 100644
index 0000000..df6c747
--- /dev/null
+++ b/drivers/net/xen/xen_dev.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_ETHDEV_H_
+#define _XEN_ETHDEV_H_
+
+#include "uio.h"
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <sys/user.h>
+
+#include <xen/io/netif.h>
+#include "xen_adapter_info.h"
+
+struct xen_adapter {
+ /* it's a place for all uio resources */
+ struct uio_resource uio;
+
+ /*pointer to the info page*/
+ struct xen_adapter_info *info_page;
+ uint8_t vlan_strip;
+};
+
+int
+xen_evtchn_notify_grant_rxtx(struct ioctl_evtchn_notify_grant *ng);
+
+#endif /* _XEN_ETHDEV_H_ */
diff --git a/drivers/net/xen/xen_logs.h b/drivers/net/xen/xen_logs.h
new file mode 100644
index 0000000..e1f3cc6
--- /dev/null
+++ b/drivers/net/xen/xen_logs.h
@@ -0,0 +1,19 @@
+#ifndef _XEN_LOGS_H_
+#define _XEN_LOGS_H_
+
+#ifdef RTE_LIBRTE_PMD_XEN_DEBUG_INIT
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+ RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+
+#else /* RTE_LIBRTE_PMD_XEN_DEBUG_INIT */
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+ do { } while (0)
+
+#endif /* RTE_LIBRTE_PMD_XEN_DEBUG_INIT */
+
+#define PMD_INIT_FUNC_TRACE() \
+ PMD_INIT_LOG(DEBUG, " >>")
+
+#endif /* _XEN_LOGS_H_ */
diff --git a/drivers/net/xen/xen_rxtx.c b/drivers/net/xen/xen_rxtx.c
new file mode 100644
index 0000000..91d95b5
--- /dev/null
+++ b/drivers/net/xen/xen_rxtx.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "xen_logs.h"
+
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_malloc.h>
+
+#include <linux/if_ether.h>
+
+#define RTE_MBUF_DATA_DMA_ADDR(mb) \
+ ((uint64_t)((mb)->buf_physaddr + (mb)->data_off))
+
+#if __XEN_LATEST_INTERFACE_VERSION__ > 0x0003020a
+
+#define FRONT_RING_ATTACH(_r, _s, __size) do { \
+ (_r)->sring = (_s); \
+ (_r)->req_prod_pvt = (_s)->req_prod; \
+ (_r)->rsp_cons = (_s)->rsp_prod; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+} while (0)
+
+#endif
+
+static void
+xen_set_rx_ng(struct xen_rx_queue *rxq)
+{
+ rxq->ng_rx.port = rxq->xa->info_page->rx_evtchn;
+ rxq->ng_rx.info = rxq->xa->info_page->info;
+ rxq->ng_rx.u.rx_ring = rxq->xa->info_page->rx_ring;
+ rxq->ng_rx.otherend_id = rxq->xa->info_page->otherend_id;
+}
+
+static void
+xen_set_tx_ng(struct xen_tx_queue *txq)
+{
+ txq->ng_tx.port = txq->xa->info_page->tx_evtchn;
+ txq->ng_tx.info = txq->xa->info_page->info;
+ txq->ng_tx.u.tx_ring = txq->xa->info_page->tx_ring;
+ txq->ng_tx.otherend_id = txq->xa->info_page->otherend_id;
+}
+
+static int
+xen_evtchn_notify_grant_rx(struct xen_rx_queue *rxq)
+{
+ xen_set_rx_ng(rxq);
+ return xen_evtchn_notify_grant_rxtx(&rxq->ng_rx);
+}
+
+static int
+xen_evtchn_notify_grant_tx(struct xen_tx_queue *txq)
+{
+ xen_set_tx_ng(txq);
+ return xen_evtchn_notify_grant_rxtx(&txq->ng_tx);
+}
+
+static int
+xen_dev_rx_send_requests(struct xen_rx_queue *rxq)
+{
+ uint16_t i;
+ struct netif_rx_request *req;
+ RING_IDX req_prod = rxq->ring.req_prod_pvt;
+ RING_IDX prod = req_prod;
+ uint16_t free_space = RING_FREE_REQUESTS(&rxq->ring);
+
+ xen_set_rx_ng(rxq);
+
+ for (i = 0; i < free_space; i++) {
+ struct rte_mbuf *mbuf;
+
+ prod = (req_prod + i) & (RING_SIZE(&rxq->ring) - 1);
+
+ req = RING_GET_REQUEST(&rxq->ring, prod);
+
+ mbuf = rte_pktmbuf_alloc(rxq->mb_pool);
+ if (unlikely(!mbuf)) {
+ PMD_INIT_LOG(ERR, "no mbuf");
+ break; /*skip*/
+ }
+
+ rxq->mbuf[prod] = mbuf;
+
+ /*set data at the begin of the next page*/
+ uint64_t phys_addr = RTE_MBUF_DATA_DMA_ADDR(mbuf);
+ uint64_t phys_addr_shifted =
+ (phys_addr + PAGE_SIZE - 1) &
+ (~((uint64_t)PAGE_SIZE - 1));
+ uint64_t shift = phys_addr_shifted - phys_addr;
+
+ mbuf->data_off += shift;
+ rxq->ng_rx.s[i].gref = rxq->gref[prod];
+
+ rxq->ng_rx.s[i].paddr = __phys_to_pfn(phys_addr_shifted);
+
+ req->gref = rxq->gref[prod];
+ req->id = prod;
+ }
+
+ rxq->ring.req_prod_pvt = (req_prod + i);
+
+ rxq->ng_rx.count = i;
+
+ if (rxq->ng_rx.count > 0 || rxq->ng_rx.rel_count > 0) {
+ xen_evtchn_notify_grant_rx(rxq);
+ rxq->ng_rx.count = 0;
+ rxq->ng_rx.rel_count = 0;
+ }
+
+ return 0;
+}
+
+static void
+xen_dev_rx_recv_extra(struct xen_rx_queue *rxq, struct netif_extra_info *extra)
+{
+ if (unlikely(!extra)) {
+ PMD_INIT_LOG(ERR, "Invalid rxq state transition: %d",
+ rxq->state);
+ rxq->estate = RX_RESP_GENERAL;
+ }
+
+ if (unlikely(!extra->type ||
+ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ PMD_INIT_LOG(WARNING, "Invalid extra type: %d", extra->type);
+ rxq->estate = RX_RESP_GENERAL;
+ }
+
+ if (!(extra->flags & XEN_NETIF_EXTRA_FLAG_MORE)) {
+ PMD_INIT_LOG(DEBUG, "No XEN_NETIF_EXTRA_FLAG_MORE");
+ rxq->estate = RX_RESP_GENERAL;
+ }
+}
+
+static uint16_t
+get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
+{
+ if (ethertype == ETHER_TYPE_IPv4)
+ return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
+ else /* assume ethertype == ETHER_TYPE_IPv6 */
+ return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
+}
+
+static void
+recalculate_checksum(struct rte_mbuf *mbuf)
+{
+ struct ether_hdr *eth_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+ struct udp_hdr *udp_hdr;
+ struct tcp_hdr *tcp_hdr;
+ void *l3_hdr;
+#define l2_len sizeof(struct ether_hdr)
+ uint16_t ethertype, l3_len;
+ uint8_t l4_proto;
+
+ eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+ ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+ switch (ethertype) {
+ case ETHER_TYPE_IPv4:
+ ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + l2_len);
+ l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
+ l4_proto = ipv4_hdr->next_proto_id;
+ break;
+ case ETHER_TYPE_IPv6:
+ ipv6_hdr = (struct ipv6_hdr *)((char *)eth_hdr + l2_len);
+ l3_len = sizeof(struct ipv6_hdr);
+ l4_proto = ipv6_hdr->proto;
+ break;
+ default:
+ l3_len = 0;
+ l4_proto = 0;
+ }
+
+ l3_hdr = (char *)eth_hdr + l2_len;
+
+ if (l4_proto == IPPROTO_TCP) {
+ tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + l3_len);
+ tcp_hdr->cksum = 0;
+ tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr,
+ ethertype);
+ } else if (l4_proto == IPPROTO_UDP) {
+ udp_hdr = (struct udp_hdr *)((char *)l3_hdr + l3_len);
+ /* do not recalculate udp cksum if it was 0 */
+ if (udp_hdr->dgram_cksum != 0) {
+ udp_hdr->dgram_cksum = 0;
+ udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr,
+ udp_hdr,
+ ethertype);
+ }
+ }
+}
+
+static uint16_t
+xen_dev_rx_recv_responses(struct xen_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ uint16_t nb_rx;
+ uint16_t i;
+ struct netif_rx_response *rsp;
+ struct netif_extra_info *extra = NULL;
+ RING_IDX rsp_cons = rxq->ring.rsp_cons;
+ RING_IDX cons = rsp_cons;
+ uint16_t work_todo;
+
+ nb_rx = 0;
+ work_todo = RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring);
+ for (i = 0; i < work_todo && nb_rx < nb_pkts; i++) {
+ struct rte_mbuf *mbuf;
+
+ cons = (rsp_cons + i) & (RING_SIZE(&rxq->ring) - 1);
+
+ rsp = RING_GET_RESPONSE(&rxq->ring, cons);
+
+ PMD_INIT_LOG(DEBUG, "id:%u status:%u offset:%u flags:%x",
+ rsp->id, rsp->status, rsp->offset, rsp->flags);
+
+ rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] = rxq->gref[cons];
+ rxq->ng_rx.rel_count++;
+
+ if (unlikely(rsp->status < 0 ||
+ rsp->offset + rsp->status > PAGE_SIZE)) {
+ PMD_INIT_LOG(WARNING, "bad rsp->status: %d offset: %d",
+ rsp->status, rsp->offset);
+ rte_pktmbuf_free(rxq->mbuf[cons]);
+ rxq->mbuf[cons] = NULL;
+ rxq->state = RX_RESP_GENERAL;
+ rxq->first_frag = rxq->prev_frag = NULL;
+ continue;
+ }
+
+ if (unlikely(rxq->estate & RX_RESP_EXTRA)) {
+ extra = (struct netif_extra_info *)rsp;
+ xen_dev_rx_recv_extra(rxq, extra);
+ rte_pktmbuf_free(rxq->mbuf[cons]);
+ rxq->mbuf[cons] = NULL;
+ continue;
+ }
+
+ if (unlikely(rsp->flags & NETRXF_extra_info)) {
+ PMD_INIT_LOG(DEBUG, "EXTRA_NETRXF_extra_info");
+ rxq->estate = RX_RESP_EXTRA;
+ /* next ring will contain extra info */
+ /* current ring entry is still valid */
+ }
+
+ if (rxq->state == RX_RESP_GENERAL) {
+ /* normal receive */
+ if (likely(!!rxq->mbuf[cons])) {
+ mbuf = rxq->mbuf[cons];
+ mbuf->port = rxq->port_id;
+ mbuf->data_len = mbuf->pkt_len = rsp->status;
+ mbuf->data_off += rsp->offset;
+ if (rxq->xa->vlan_strip)
+ rte_vlan_strip(mbuf);
+
+ if (rsp->flags & NETRXF_more_data) {
+ rxq->state = RX_RESP_CONTINUE;
+ rxq->first_frag =
+ rxq->prev_frag = mbuf;
+ } else {
+ /*send to the upper level*/
+ rx_pkts[nb_rx++] = mbuf;
+ recalculate_checksum(mbuf);
+ rxq->rx_stats.ipackets++;
+ rxq->rx_stats.ibytes +=
+ mbuf->pkt_len;
+ }
+
+ rxq->mbuf[cons] = NULL;
+ } else {
+ PMD_INIT_LOG(WARNING, "no rxq->mbuf[%d]",
+ cons);
+ rxq->rx_stats.ierrors++;
+ }
+ } else { /* RX_RESP_CONTINUE -- packet is segmented */
+ if (likely(!!rxq->mbuf[cons])) {
+ mbuf = rxq->mbuf[cons];
+ /* mbuf->in_port = rxq->port_id; */
+ mbuf->data_len = mbuf->pkt_len =
+ rsp->status;
+ mbuf->data_off += rsp->offset;
+
+ rxq->first_frag->nb_segs++;
+ rxq->first_frag->pkt_len += mbuf->data_len;
+ rxq->prev_frag->next = mbuf;
+
+ if (rsp->flags & NETRXF_more_data) {
+ rxq->prev_frag = mbuf;
+ } else {
+ rxq->state = RX_RESP_GENERAL;
+ /*send to the upper level*/
+ rx_pkts[nb_rx++] = rxq->first_frag;
+ recalculate_checksum(rxq->first_frag);
+ rxq->rx_stats.ipackets++;
+ rxq->rx_stats.ibytes +=
+ rxq->first_frag->pkt_len;
+ rxq->first_frag = rxq->prev_frag
+ = NULL;
+ }
+
+ rxq->mbuf[cons] = NULL;
+ } else {
+ PMD_INIT_LOG(WARNING, "no cntn rxq->mbuf[%d]",
+ cons);
+ rxq->rx_stats.ierrors++;
+ }
+ }
+
+ rxq->mbuf[cons] = NULL;
+ }
+ rxq->ring.rsp_cons = (rsp_cons + i);
+
+ return nb_rx;
+}
+
+static void
+xen_rx_queue_release(struct xen_rx_queue *rxq)
+{
+ uint16_t i;
+
+ rxq->ng_rx.count = 0;
+ rxq->ng_rx.rel_count = 0;
+
+ for (i = 0; i < (RING_SIZE(&rxq->ring)); i++) {
+ rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] =
+ rxq->gref[i];
+ rxq->ng_rx.rel_count++;
+ if (rxq->mbuf[i]) {
+ rte_pktmbuf_free(rxq->mbuf[i]);
+ rxq->mbuf[i] = NULL;
+ }
+ }
+ xen_evtchn_notify_grant_rx(rxq);
+}
+
+void
+xen_dev_rx_queue_release(void *rxq)
+{
+ struct xen_rx_queue *rx_q = (struct xen_rx_queue *)rxq;
+
+ if (rx_q) {
+ xen_rx_queue_release(rx_q);
+ rte_free(rx_q);
+ }
+}
+
+static void
+xen_rx_ring_init(struct xen_rx_queue *rxq)
+{
+ SHARED_RING_INIT(rxq->rxs);
+ FRONT_RING_ATTACH(&rxq->ring, rxq->rxs, PAGE_SIZE);
+ xen_dev_rx_send_requests(rxq);
+ rxq->rx_disconnect_count = rxq->xa->info_page->disconnect_count;
+ xen_set_rx_ng(rxq);
+}
+
+static uint16_t
+xen_dev_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ uint16_t res = 0;
+
+ struct xen_rx_queue *rxq = (struct xen_rx_queue *)rx_queue;
+
+ if (likely(rxq->xa->info_page->is_connected)) {
+ if (unlikely(rxq->xa->info_page->disconnect_count !=
+ rxq->rx_disconnect_count)) {
+ xen_rx_queue_release(rxq);
+
+ xen_rx_ring_init(rxq);
+ }
+
+ res = xen_dev_rx_recv_responses(rxq, rx_pkts, nb_pkts);
+
+ xen_dev_rx_send_requests(rxq);
+ }
+
+ return res;
+}
+
+int
+xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+ uint16_t nb_desc __rte_unused,
+ unsigned int socket_id __rte_unused,
+ const struct rte_eth_rxconf *rx_conf __rte_unused,
+ struct rte_mempool *mp)
+{
+ struct xen_adapter *xa = dev->data->dev_private;
+ struct xen_adapter_info *info = xa->info_page;
+ struct xen_rx_queue *rxq;
+
+ PMD_INIT_FUNC_TRACE();
+ if (info->rx_grefs_count < NET_RX_RING_SIZE) {
+ RTE_LOG(ERR, PMD, "rx ring size greater than rx grefs count");
+ return -ENOMEM;
+ }
+
+ rxq = rte_zmalloc("rx_queue", sizeof(struct xen_rx_queue),
+ RTE_CACHE_LINE_SIZE);
+ if (!rxq) {
+ RTE_LOG(ERR, PMD, "rte_zmalloc for rxq failed!");
+ return -ENOMEM;
+ }
+
+ rxq->xa = xa;
+ rxq->queue_id = queue_idx;
+ rxq->port_id = dev->data->port_id;
+ rxq->state = RX_RESP_GENERAL;
+ rxq->estate = RX_RESP_GENERAL;
+ rxq->first_frag = rxq->prev_frag = NULL;
+ rxq->mb_pool = mp;
+ rxq->ng_rx.is_rx = 1;
+ rxq->ng_rx.rel_count = 0;
+ rxq->gref = &info->rxtx_grefs[0];
+
+ rxq->rxs = uio_get_map_addr(&xa->uio, RX_RING_MAP);
+
+ dev->data->rx_queues[queue_idx] = rxq;
+
+ xen_rx_ring_init(rxq);
+
+ dev->rx_pkt_burst = xen_dev_recv_pkts;
+
+ return 0;
+}
+
+static void
+xen_dev_tx_prepare_request(struct xen_tx_queue *txq, uint16_t i,
+ uint16_t pkt_len, uint16_t size, uint16_t offset,
+ uint16_t segno, uint16_t flags, unsigned long paddr)
+{
+ RING_IDX prod = (txq->ring.req_prod_pvt + i) &
+ (RING_SIZE(&txq->ring) - 1);
+ struct netif_tx_request *req = RING_GET_REQUEST(&txq->ring, prod);
+ struct slot *slot = STAILQ_FIRST(&txq->slotlist);
+
+ STAILQ_REMOVE_HEAD(&txq->slotlist, list_entry);
+ txq->freeslots--;
+
+ txq->ng_tx.s[i].gref = txq->gref[slot->id];
+ txq->ng_tx.s[i].paddr = paddr;
+
+ req->id = slot->id;
+ req->flags = flags;
+ req->offset = offset;
+ req->gref = txq->gref[slot->id];
+ req->size = segno == 0 ? pkt_len : size;
+
+ PMD_INIT_LOG(DEBUG, "id:%u size:%u offset:%u gref:%u flags:%x",
+ req->id, req->size, req->offset, req->gref, req->flags);
+}
+
+static int
+xen_dev_tx_send_requests(struct xen_tx_queue *txq, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct rte_mbuf *mbuf;
+ unsigned long paddr;
+ uint16_t offset;
+ uint16_t flags;
+ uint16_t size;
+ uint16_t i = 0;
+ uint16_t nb_tx = 0;
+ uint16_t pkt_len;
+ uint16_t nsegs;
+ uint16_t free_space = RTE_MIN(RING_FREE_REQUESTS(&txq->ring),
+ txq->freeslots);
+ struct slot *slot;
+
+ xen_set_tx_ng(txq);
+
+ while (i < free_space && nb_tx < nb_pkts) {
+ slot = STAILQ_FIRST(&txq->slotlist); /* peek ahead */
+
+ mbuf = tx_pkts[nb_tx];
+
+ if (unlikely(!mbuf)) {
+ PMD_INIT_LOG(WARNING, "no mbuf for req");
+ break;
+ }
+
+ /* each segment could be splited because of offset
+ * so it must be twice
+ */
+ if (i + (tx_pkts[nb_tx]->nb_segs * 2 + 1) > free_space) {
+ PMD_INIT_LOG(WARNING, "no ring space for req");
+ txq->tx_stats.oerrors++;
+ break;
+ }
+
+ /* Do VLAN tag insertion */
+ if ((mbuf->ol_flags & PKT_TX_VLAN_PKT) &&
+ unlikely(rte_vlan_insert(&mbuf) != 0)) {
+ rte_pktmbuf_free(mbuf);
+ txq->tx_stats.oerrors++;
+ ++nb_tx;
+ continue;
+ }
+
+ /* if the first segment is < ETH_HLEN transmit will fail */
+ offset = (RTE_MBUF_DATA_DMA_ADDR(mbuf)) &
+ ((uint64_t)PAGE_SIZE - 1);
+ if (PAGE_SIZE - offset < ETH_HLEN) {
+ struct rte_mbuf *mbuf_new;
+
+ mbuf_new = rte_pktmbuf_alloc(txq->mb_pool);
+ if (unlikely(!mbuf_new)) {
+ rte_pktmbuf_free(mbuf);
+ txq->tx_stats.oerrors++;
+ ++nb_tx;
+ continue;
+ }
+ rte_memcpy(rte_pktmbuf_mtod(mbuf_new, void *),
+ rte_pktmbuf_mtod(mbuf, void *),
+ ETH_HLEN);
+ mbuf_new->pkt_len = mbuf_new->data_len = ETH_HLEN;
+ rte_pktmbuf_adj(mbuf, ETH_HLEN);
+
+ mbuf_new->pkt_len += mbuf->pkt_len;
+ mbuf_new->nb_segs = mbuf->nb_segs + 1;
+ mbuf_new->next = mbuf;
+
+ mbuf = mbuf_new;
+ }
+
+ txq->mbuf[slot->id] = mbuf;
+ pkt_len = mbuf->pkt_len;
+ nsegs = 0;
+
+ /* prepare request for each mbuf segment */
+ do {
+ size = mbuf->data_len;
+ flags = (mbuf->next ? NETTXF_more_data : 0);
+ paddr = __phys_to_pfn(RTE_MBUF_DATA_DMA_ADDR(mbuf));
+ offset = (RTE_MBUF_DATA_DMA_ADDR(mbuf)) &
+ ((uint64_t)PAGE_SIZE - 1);
+
+ /* check if additional segmentation is needed */
+ if (size + offset > PAGE_SIZE) {
+ size = PAGE_SIZE - offset;
+ xen_dev_tx_prepare_request(txq, i, pkt_len,
+ size, offset, nsegs,
+ NETTXF_more_data,
+ paddr);
+ paddr++;
+ offset = (offset + size) % PAGE_SIZE;
+ size = mbuf->data_len - size;
+ i++;
+ nsegs++;
+ }
+
+ xen_dev_tx_prepare_request(txq, i, pkt_len, size,
+ offset, nsegs, flags,
+ paddr);
+ i++;
+ nsegs++;
+
+ } while ((mbuf = mbuf->next));
+
+ nb_tx++;
+ txq->tx_stats.opackets++;
+ txq->tx_stats.obytes += pkt_len;
+ }
+
+ txq->ring.req_prod_pvt += i;
+ txq->ng_tx.count = i;
+ if (txq->ng_tx.count > 0 || txq->ng_tx.rel_count > 0) {
+ xen_evtchn_notify_grant_tx(txq);
+ txq->ng_tx.rel_count = 0;
+ txq->ng_tx.count = 0;
+ }
+
+ return nb_tx;
+}
+
+static int
+xen_dev_tx_recv_responses(struct xen_tx_queue *txq)
+{
+ uint16_t i, id;
+ struct netif_tx_response *rsp;
+ RING_IDX rsp_cons = txq->ring.rsp_cons;
+ RING_IDX cons;
+ uint16_t work_todo;
+
+ work_todo = RING_HAS_UNCONSUMED_RESPONSES(&txq->ring);
+ for (i = 0; i < work_todo; i++) {
+ cons = (rsp_cons + i) & (RING_SIZE(&txq->ring) - 1);
+
+ rsp = RING_GET_RESPONSE(&txq->ring, cons);
+ id = rsp->id;
+
+ STAILQ_INSERT_TAIL(&txq->slotlist, &txq->slots[id], list_entry);
+ txq->freeslots++;
+
+ if (unlikely(rsp->status == NETIF_RSP_NULL))
+ PMD_INIT_LOG(WARNING, "NETIF_RSP_NULL");
+
+ txq->ng_tx.rel_gref[txq->ng_tx.rel_count] = txq->gref[id];
+ txq->ng_tx.rel_count++;
+
+ if (likely(!!txq->mbuf[id])) {
+ rte_pktmbuf_free(txq->mbuf[id]);
+ txq->mbuf[id] = NULL;
+ }
+ }
+ txq->ring.rsp_cons = (rsp_cons + i);
+
+ return 0;
+}
+
+static void
+xen_tx_queue_release(struct xen_tx_queue *txq)
+{
+ uint16_t i;
+
+ txq->ng_tx.count = 0;
+ txq->ng_tx.rel_count = 0;
+
+ for (i = 0; i < (RING_SIZE(&txq->ring)); i++) {
+ if (txq->mbuf[i]) {
+ rte_pktmbuf_free(txq->mbuf[i]);
+ txq->mbuf[i] = NULL;
+ txq->ng_tx.rel_gref[txq->ng_tx.rel_count] =
+ txq->gref[i];
+ txq->ng_tx.rel_count++;
+ }
+ }
+ xen_evtchn_notify_grant_tx(txq);
+}
+
+void
+xen_dev_tx_queue_release(void *txq)
+{
+ struct xen_tx_queue *tx_q = (struct xen_tx_queue *)txq;
+
+ if (tx_q) {
+ xen_tx_queue_release(tx_q);
+ rte_free(tx_q->slots);
+ rte_free(tx_q);
+ }
+}
+
+static void
+xen_tx_ring_init(struct xen_tx_queue *txq)
+{
+ SHARED_RING_INIT(txq->txs);
+ FRONT_RING_ATTACH(&txq->ring, txq->txs, PAGE_SIZE);
+ xen_dev_tx_recv_responses(txq);
+ txq->tx_disconnect_count = txq->xa->info_page->disconnect_count;
+ xen_set_tx_ng(txq);
+}
+
+static uint16_t
+xen_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ uint16_t res = 0;
+
+ struct xen_tx_queue *txq = (struct xen_tx_queue *)tx_queue;
+
+ if (likely(txq->xa->info_page->is_connected)) {
+ if (unlikely(txq->xa->info_page->disconnect_count !=
+ txq->tx_disconnect_count)) {
+ xen_tx_queue_release(txq);
+
+ xen_tx_ring_init(txq);
+ }
+
+ xen_dev_tx_recv_responses(txq);
+
+ res = xen_dev_tx_send_requests(txq, tx_pkts, nb_pkts);
+ }
+
+ return res;
+}
+
+int
+xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+ uint16_t nb_desc __rte_unused,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct xen_adapter *xa = dev->data->dev_private;
+ struct xen_adapter_info *info = xa->info_page;
+ struct xen_tx_queue *txq;
+ char pool_name[RTE_MEMPOOL_NAMESIZE];
+ uint16_t i;
+
+ PMD_INIT_FUNC_TRACE();
+
+ if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
+ != ETH_TXQ_FLAGS_NOXSUMS) {
+ RTE_LOG(ERR, PMD, "TX checksum offload not supported\n");
+ return -EINVAL;
+ }
+
+ if (info->tx_grefs_count < NET_TX_RING_SIZE) {
+ RTE_LOG(ERR, PMD, "tx ring size greater than tx grefs count");
+ return -ENOMEM;
+ }
+
+ txq = rte_zmalloc("tx_queue", sizeof(struct xen_tx_queue),
+ RTE_CACHE_LINE_SIZE);
+ if (!txq) {
+ RTE_LOG(ERR, PMD, "rte_zmalloc for txq failed!");
+ return -ENOMEM;
+ }
+
+ txq->xa = xa;
+ txq->queue_id = queue_idx;
+ txq->port_id = dev->data->port_id;
+
+ snprintf(pool_name, RTE_MEMPOOL_NAMESIZE, "mbuf_%u_tx", txq->port_id);
+ txq->mb_pool = rte_mempool_create(pool_name, NET_TX_RING_SIZE - 1,
+ RTE_PKTMBUF_HEADROOM, 0,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ socket_id, 0);
+ if (!txq->mb_pool) {
+ RTE_LOG(ERR, PMD, "Could not initialize tx mbuf pool\n");
+ return -ENOMEM;
+ }
+
+ txq->txs = uio_get_map_addr(&xa->uio, TX_RING_MAP);
+
+ txq->slots = rte_zmalloc("slots",
+ info->tx_grefs_count * sizeof(struct slot),
+ RTE_CACHE_LINE_SIZE);
+ if (!txq->slots) {
+ RTE_LOG(ERR, PMD, "rte_zmalloc for slots failed!");
+ return -ENOMEM;
+ }
+
+ txq->ng_tx.is_rx = 0;
+ txq->ng_tx.rel_count = 0;
+ txq->gref = &info->rxtx_grefs[info->rx_grefs_count];
+ STAILQ_INIT(&txq->slotlist);
+ for (i = 0; i < info->tx_grefs_count; i++) {
+ txq->slots[i].id = i;
+ STAILQ_INSERT_TAIL(&txq->slotlist, &txq->slots[i], list_entry);
+ }
+ txq->freeslots = info->tx_grefs_count;
+
+ dev->data->tx_queues[queue_idx] = txq;
+
+ xen_tx_ring_init(txq);
+
+ dev->tx_pkt_burst = xen_dev_xmit_pkts;
+
+ return 0;
+}
diff --git a/drivers/net/xen/xen_rxtx.h b/drivers/net/xen/xen_rxtx.h
new file mode 100644
index 0000000..eee633c
--- /dev/null
+++ b/drivers/net/xen/xen_rxtx.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_RXTX_H_
+#define _XEN_RXTX_H_
+
+#include <rte_mbuf.h>
+#include <rte_tailq.h>
+
+#include <xen/io/netif.h>
+#include "xen_adapter_info.h"
+
+#define DEFAULT_RX_FREE_THRESH 0
+#define DEFAULT_TX_FREE_THRESH 512
+
+#undef PAGE_SIZE
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+
+#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT))
+#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT)
+
+#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
+#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
+
+struct xen_tx_stats {
+ uint64_t opackets;
+ uint64_t obytes;
+ uint64_t oerrors;
+};
+
+struct xen_rx_stats {
+ uint64_t ipackets;
+ uint64_t ibytes;
+ uint64_t ierrors;
+};
+
+enum rx_resp_state {
+ RX_RESP_GENERAL = 0,
+ RX_RESP_CONTINUE,
+ RX_RESP_EXTRA
+};
+
+struct xen_rx_queue {
+ /**< RX queue index. */
+ uint16_t queue_id;
+ /**< Device port identifier. */
+ uint8_t port_id;
+ /**< mbuf pool to populate RX ring. */
+ struct rte_mempool *mb_pool;
+ /**< Ptr to dev_private data. */
+ struct xen_adapter *xa;
+
+ /* Xen specific */
+
+ /**< Pointer to the xen rx ring shared with other end. */
+ netif_rx_front_ring_t ring;
+ struct netif_rx_sring *rxs;
+ /**< Grefs for sharing with the other end. */
+ grant_ref_t *gref;
+ /**< Allocated for RING_INX mbufs. */
+ struct rte_mbuf *mbuf[NET_RX_RING_SIZE];
+ /**< packet state machine */
+ enum rx_resp_state state;
+ /**< extra state machine */
+ enum rx_resp_state estate;
+ /**< First packet segment. */
+ struct rte_mbuf *first_frag;
+ /**< Previous packet segment. */
+ struct rte_mbuf *prev_frag;
+ /**< Statistics. */
+ struct xen_rx_stats rx_stats;
+ /**< Number of disconnections. */
+ uint8_t rx_disconnect_count;
+ /**< Notify and gnttab ioctl struct. */
+ struct ioctl_evtchn_notify_grant ng_rx;
+};
+
+struct xen_tx_queue {
+ uint16_t queue_id;
+ /**< TX queue index. */
+ uint8_t port_id;
+ /**< Device port identifier. */
+ struct xen_adapter *xa;
+ /**< Ptr to dev_private data */
+
+ /* Xen specific */
+
+ /**< Pointer to the xen tx ring shared with other end. */
+ netif_tx_front_ring_t ring;
+ struct netif_tx_sring *txs;
+ /**< Grefs for sharing with the other end. */
+ grant_ref_t *gref;
+ /**< Allocated for RING_INX mbufs. */
+ struct rte_mbuf *mbuf[NET_TX_RING_SIZE];
+ /**< Used for workaround in tx path */
+ struct rte_mempool *mb_pool;
+ /**< Statistics. */
+ struct xen_tx_stats tx_stats;
+ /**< Number of disconnections. */
+ uint8_t tx_disconnect_count;
+ /**< Notify and gnttab ioctl struct. */
+ struct ioctl_evtchn_notify_grant ng_tx;
+ /**< List of unused slots in gref[] */
+ STAILQ_HEAD(listhead, slot) slotlist;
+ struct slot {
+ STAILQ_ENTRY(slot) list_entry;
+ uint16_t id;
+ } *slots;
+ uint16_t freeslots;
+};
+
+struct rte_eth_dev;
+struct rte_eth_rxconf;
+struct rte_eth_txconf;
+
+int xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+ uint16_t nb_rx_desc, unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mb_pool);
+
+int xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+ uint16_t nb_tx_desc, unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf);
+
+void xen_dev_rx_queue_release(void *rxq);
+void xen_dev_tx_queue_release(void *txq);
+
+#endif /* _XEN_RXTX_H_ */
--
2.5.5
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH v3 3/3] xen: Add documentation
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Jan Blunck
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver Jan Blunck
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 2/3] xen: Add netfront poll mode driver Jan Blunck
@ 2016-03-22 9:55 ` Jan Blunck
2016-04-20 14:18 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Bruce Richardson
2017-02-05 14:44 ` Thomas Monjalon
4 siblings, 0 replies; 23+ messages in thread
From: Jan Blunck @ 2016-03-22 9:55 UTC (permalink / raw)
To: dev; +Cc: jblunck, shemming
Add basic documentation for Xen Netfront PMD compilation and testpmd
invocation.
Signed-off-by: Jan Blunck <jblunck@infradead.org>
---
doc/guides/nics/overview.rst | 28 ++++++------
doc/guides/nics/xen.rst | 101 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 115 insertions(+), 14 deletions(-)
create mode 100644 doc/guides/nics/xen.rst
diff --git a/doc/guides/nics/overview.rst b/doc/guides/nics/overview.rst
index 2d4f014..de07390 100644
--- a/doc/guides/nics/overview.rst
+++ b/doc/guides/nics/overview.rst
@@ -74,18 +74,18 @@ Most of these differences are summarized below.
.. table:: Features availability in networking drivers
- ==================== = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
- Feature a b b b c e e i i i i i i i i i i f f m m m n n p r s v v v x
- f n n o x 1 n 4 4 4 4 g g x x x x m m l l p f u c i z i i m e
- p x x n g 0 i 0 0 0 0 b b g g g g 1 1 x x i p l a n e r r x n
- a 2 2 d b 0 c e e e e v b b b b 0 0 4 5 p l p g d t t n v
- c x x i e 0 . v v f e e e e k k e a i i e i
- k v n . f f . v v . t o o t r
- e f g . . . f f . a . 3 t
+ ==================== = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+ Feature a b b b c e e i i i i i i i i i i f f m m m n n p r s v v v x x
+ f n n o x 1 n 4 4 4 4 g g x x x x m m l l p f u c i z i i m e e
+ p x x n g 0 i 0 0 0 0 b b g g g g 1 1 x x i p l a n e r r x n n
+ a 2 2 d b 0 c e e e e v b b b b 0 0 4 5 p l p g d t t n v
+ c x x i e 0 . v v f e e e e k k e a i i e i
+ k v n . f f . v v . t o o t r
+ e f g . . . f f . a . 3 t
t v v v v v 2 v
e e e e e e
c c c c c c
- ==================== = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+ ==================== = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
link status X X X X
link status event X X
Rx interrupt X X X X
@@ -125,23 +125,23 @@ Most of these differences are summarized below.
inner L4 checksum X X
packet type parsing X X X
timesync X X
- basic stats X X X X X X
+ basic stats X X X X X X X
extended stats X X X X
stats per queue X X
EEPROM dump
registers dump
multiprocess aware X X X X
BSD nic_uio X X X X X
- Linux UIO X X X X X
+ Linux UIO X X X X X X
Linux VFIO X X X X X
other kdrv X
ARMv7
ARMv8
Power8
TILE-Gx
- x86-32 X X X X X
- x86-64 X X X X X X
+ x86-32 X X X X X X
+ x86-64 X X X X X X X
usage doc X X
design doc
perf doc
- ==================== = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+ ==================== = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
diff --git a/doc/guides/nics/xen.rst b/doc/guides/nics/xen.rst
new file mode 100644
index 0000000..4e4531e
--- /dev/null
+++ b/doc/guides/nics/xen.rst
@@ -0,0 +1,101 @@
+.. BSD LICENSE
+ Copyright(c) 2016 Brocade Communications Systems, Inc.
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Brocade Communications Systems nor the names of
+ its contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+XEN Netfront Poll Mode Driver
+=============================
+
+The XEN netfront poll mode driver connects to the XEN netback driver offering the same functionality as the
+Xen netfront driver in the Linux kernel.
+
+Supported Features
+------------------
+
+- Single TX/RX queue pair
+- Single MAC address
+
+Config File Options
+-------------------
+
+
+- ``CONFIG_RTE_LIBRTE_PMD_XEN`` (default **n**)
+
+ Toggle compilation of the ``librte_pmd_xen`` driver.
+
+
+Linux
+-----
+
+This section demonstrates how to load the Xen netfront PMD.
+
+#. Load ``xen_uio`` driver:
+
+ .. code-block:: console
+
+ modprobe uio
+ insmod ./x86_64-native-linuxapp-gcc/kmod/xen_uio.ko
+
+#. Bind the XEN network device vif-0 to ``xen_uio`` driver:
+
+ .. code-block:: console
+
+ echo vif-0 > /sys/bus/xen/drivers/vif/unbind
+ echo vif-0 > /sys/bus/xen/drivers/xen_uio/bind
+
+#. Start ``testpmd`` with basic parameters:
+
+ .. code-block:: console
+
+ testpmd -c 0x3 -n 4 -- -i --txqflags=0x0e00 --mbuf-size=9000 --total-num-mbufs=2048
+
+ Example output:
+
+ .. code-block:: console
+
+ ...
+ EAL: Master lcore 0 is ready (tid=6e4128c0;cpuset=[0])
+ PMD: vif-0 skipping device with driver vif
+ PMD: vif-1 Creating xen-netfront PMD ethdev on socket 0
+ PMD: vif-1 uio_num = 0
+ PMD: /dev/uio0 mmap map0 (4096@0x0) to 0x7f366e419000
+ PMD: /dev/uio0 mmap map1 (4096@0x0) to 0x7f366e418000
+ PMD: /dev/uio0 mmap map2 (4096@0x0) to 0x7f366e411000
+ EAL: lcore 1 is ready (tid=4c5ff700;cpuset=[1])
+ Interactive-mode selected
+ Configuring Port 0 (socket 0)
+ rte_eth_dev_config_restore: port 0: MAC address array not supported
+ rte_eth_promiscuous_disable: Function not supported
+ rte_eth_allmulticast_disable: Function not supported
+ Port 0: 00:16:3E:0A:CC:6D
+ Checking link statuses...
+ Port 0 Link Up - speed 10000 Mbps - full-duplex
+ Done
+ rte_eth_promiscuous_enable: Function not supported
+ testpmd>
--
2.5.5
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 2/3] xen: Add netfront poll mode driver
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 2/3] xen: Add netfront poll mode driver Jan Blunck
@ 2016-03-22 10:07 ` David Marchand
2016-03-22 10:42 ` Jan Blunck
0 siblings, 1 reply; 23+ messages in thread
From: David Marchand @ 2016-03-22 10:07 UTC (permalink / raw)
To: Jan Blunck; +Cc: dev, jblunck, Stephen Hemminger, Stephen Hemminger
Hello,
On Tue, Mar 22, 2016 at 10:55 AM, Jan Blunck <jblunck@infradead.org> wrote:
> +static struct eth_dev_ops xen_eth_dev_ops = {
> + /*dev*/
> + .dev_configure = xen_dev_configure,
> + .dev_close = xen_dev_close,
> + .dev_start = xen_dev_start,
> + .dev_stop = xen_dev_stop,
> + .dev_infos_get = xen_dev_info_get,
> + .link_update = xen_dev_link_update,
> + /*rxtx*/
> + .stats_get = xen_dev_stats_get,
> + .stats_reset = xen_dev_stats_reset,
> + .rx_queue_setup = xen_dev_rx_queue_setup,
> + .rx_queue_release = xen_dev_rx_queue_release,
> + .tx_queue_setup = xen_dev_tx_queue_setup,
> + .tx_queue_release = xen_dev_tx_queue_release,
> +};
Is there anything preventing it from being const ?
--
David Marchand
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 2/3] xen: Add netfront poll mode driver
2016-03-22 10:07 ` David Marchand
@ 2016-03-22 10:42 ` Jan Blunck
0 siblings, 0 replies; 23+ messages in thread
From: Jan Blunck @ 2016-03-22 10:42 UTC (permalink / raw)
To: David Marchand; +Cc: dev, jblunck, Stephen Hemminger, Stephen Hemminger
On Tue, Mar 22, 2016 at 11:07 AM, David Marchand
<david.marchand@6wind.com> wrote:
> Hello,
>
> On Tue, Mar 22, 2016 at 10:55 AM, Jan Blunck <jblunck@infradead.org> wrote:
>> +static struct eth_dev_ops xen_eth_dev_ops = {
>> + /*dev*/
>> + .dev_configure = xen_dev_configure,
>> + .dev_close = xen_dev_close,
>> + .dev_start = xen_dev_start,
>> + .dev_stop = xen_dev_stop,
>> + .dev_infos_get = xen_dev_info_get,
>> + .link_update = xen_dev_link_update,
>> + /*rxtx*/
>> + .stats_get = xen_dev_stats_get,
>> + .stats_reset = xen_dev_stats_reset,
>> + .rx_queue_setup = xen_dev_rx_queue_setup,
>> + .rx_queue_release = xen_dev_rx_queue_release,
>> + .tx_queue_setup = xen_dev_tx_queue_setup,
>> + .tx_queue_release = xen_dev_tx_queue_release,
>> +};
>
> Is there anything preventing it from being const ?
>
I don't think so. Will constify it for the next round.
Thanks,
Jan
>
> --
> David Marchand
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver Jan Blunck
@ 2016-03-22 10:42 ` Thomas Monjalon
2016-03-22 11:04 ` Jan Blunck
0 siblings, 1 reply; 23+ messages in thread
From: Thomas Monjalon @ 2016-03-22 10:42 UTC (permalink / raw)
To: jblunck, shemming; +Cc: dev, Jan Blunck, Stephen Hemminger
2016-03-22 10:55, Jan Blunck:
> New UIO helper kernel driver for Xen netfront UIO poll mode driver.
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Signed-off-by: Jan Blunck <jblunck@infradead.org>
Is it contributed upstream?
Is there something common with igb_uio?
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver
2016-03-22 10:42 ` Thomas Monjalon
@ 2016-03-22 11:04 ` Jan Blunck
2016-03-22 11:27 ` Thomas Monjalon
0 siblings, 1 reply; 23+ messages in thread
From: Jan Blunck @ 2016-03-22 11:04 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: jblunck, Stephen Hemminger, dev, Stephen Hemminger
On Tue, Mar 22, 2016 at 11:42 AM, Thomas Monjalon
<thomas.monjalon@6wind.com> wrote:
> 2016-03-22 10:55, Jan Blunck:
>> New UIO helper kernel driver for Xen netfront UIO poll mode driver.
>>
>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>> Signed-off-by: Jan Blunck <jblunck@infradead.org>
>
> Is it contributed upstream?
No. Haven't planed that yet.
> Is there something common with igb_uio?
AFAIK igb_uio is mapping the bars via UIO. The Xen netfront driver
isn't simulating a PCI interface. Instead it directly maps the TX/RX
ring.
Overlap is limited to reading sysfs files. Don't know if that is worth
being extracted into a standalone header.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver
2016-03-22 11:04 ` Jan Blunck
@ 2016-03-22 11:27 ` Thomas Monjalon
2016-03-22 14:39 ` Jan Blunck
0 siblings, 1 reply; 23+ messages in thread
From: Thomas Monjalon @ 2016-03-22 11:27 UTC (permalink / raw)
To: Jan Blunck; +Cc: jblunck, Stephen Hemminger, dev, Stephen Hemminger
2016-03-22 12:04, Jan Blunck:
> On Tue, Mar 22, 2016 at 11:42 AM, Thomas Monjalon
> <thomas.monjalon@6wind.com> wrote:
> > 2016-03-22 10:55, Jan Blunck:
> >> New UIO helper kernel driver for Xen netfront UIO poll mode driver.
> >>
> >> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> >> Signed-off-by: Jan Blunck <jblunck@infradead.org>
> >
> > Is it contributed upstream?
>
> No. Haven't planed that yet.
Integrating new kernel modules in DPDK, without upstream work,
is not planned.
> > Is there something common with igb_uio?
>
> AFAIK igb_uio is mapping the bars via UIO. The Xen netfront driver
> isn't simulating a PCI interface. Instead it directly maps the TX/RX
> ring.
>
> Overlap is limited to reading sysfs files. Don't know if that is worth
> being extracted into a standalone header.
No it is not worth extracting. I'm just trying to understand what this
module is, because the log doesn't say much.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver
2016-03-22 11:27 ` Thomas Monjalon
@ 2016-03-22 14:39 ` Jan Blunck
0 siblings, 0 replies; 23+ messages in thread
From: Jan Blunck @ 2016-03-22 14:39 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: jblunck, Stephen Hemminger, dev, Stephen Hemminger
On Tue, Mar 22, 2016 at 12:27 PM, Thomas Monjalon
<thomas.monjalon@6wind.com> wrote:
> 2016-03-22 12:04, Jan Blunck:
>> On Tue, Mar 22, 2016 at 11:42 AM, Thomas Monjalon
>> <thomas.monjalon@6wind.com> wrote:
>> > 2016-03-22 10:55, Jan Blunck:
>> >> New UIO helper kernel driver for Xen netfront UIO poll mode driver.
>> >>
>> >> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>> >> Signed-off-by: Jan Blunck <jblunck@infradead.org>
>> >
>> > Is it contributed upstream?
>>
>> No. Haven't planed that yet.
>
> Integrating new kernel modules in DPDK, without upstream work,
> is not planned.
>
I'll look into the existing Linux kernel netfront driver and check if
upstream is actually willing to integrate the UIO part.
>> > Is there something common with igb_uio?
>>
>> AFAIK igb_uio is mapping the bars via UIO. The Xen netfront driver
>> isn't simulating a PCI interface. Instead it directly maps the TX/RX
>> ring.
>>
>> Overlap is limited to reading sysfs files. Don't know if that is worth
>> being extracted into a standalone header.
>
> No it is not worth extracting. I'm just trying to understand what this
> module is, because the log doesn't say much.
>
I can address that for the upstream submission and drop it from this series.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Jan Blunck
` (2 preceding siblings ...)
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 3/3] xen: Add documentation Jan Blunck
@ 2016-04-20 14:18 ` Bruce Richardson
2016-05-03 9:38 ` Xie, Huawei
2017-02-05 14:44 ` Thomas Monjalon
4 siblings, 1 reply; 23+ messages in thread
From: Bruce Richardson @ 2016-04-20 14:18 UTC (permalink / raw)
To: Jan Blunck; +Cc: dev, jblunck, shemming
On Tue, Mar 22, 2016 at 10:55:26AM +0100, Jan Blunck wrote:
> v3 changes:
> - removed fake PCI interface
> - removed struct virt_eth_driver
> - check for UIO name and version
> - added basic documentation
>
> Jan Blunck (3):
> xen: Add UIO kernel driver
> xen: Add netfront poll mode driver
> xen: Add documentation
>
Hi Jan,
any update on this series?
/Bruce
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver
2016-04-20 14:18 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Bruce Richardson
@ 2016-05-03 9:38 ` Xie, Huawei
0 siblings, 0 replies; 23+ messages in thread
From: Xie, Huawei @ 2016-05-03 9:38 UTC (permalink / raw)
To: Richardson, Bruce, Jan Blunck; +Cc: dev, jblunck, shemming
On 4/20/2016 10:19 PM, Bruce Richardson wrote:
> On Tue, Mar 22, 2016 at 10:55:26AM +0100, Jan Blunck wrote:
>> v3 changes:
>> - removed fake PCI interface
>> - removed struct virt_eth_driver
>> - check for UIO name and version
>> - added basic documentation
>>
>> Jan Blunck (3):
>> xen: Add UIO kernel driver
>> xen: Add netfront poll mode driver
>> xen: Add documentation
>>
> Hi Jan,
>
> any update on this series?
>
> /Bruce
>
Jan and Bruce:
I will find time to review this starting from this week. It takes time.
Please stay tuned.
/huawei
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Jan Blunck
` (3 preceding siblings ...)
2016-04-20 14:18 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Bruce Richardson
@ 2017-02-05 14:44 ` Thomas Monjalon
2017-02-06 14:27 ` Konrad Rzeszutek Wilk
4 siblings, 1 reply; 23+ messages in thread
From: Thomas Monjalon @ 2017-02-05 14:44 UTC (permalink / raw)
To: Jan Blunck; +Cc: dev, Jianfeng Tan
Hi Jan,
2016-03-22 10:55, Jan Blunck:
> v3 changes:
> - removed fake PCI interface
> - removed struct virt_eth_driver
> - check for UIO name and version
> - added basic documentation
>
> Jan Blunck (3):
> xen: Add UIO kernel driver
> xen: Add netfront poll mode driver
> xen: Add documentation
Any news about this series?
It is a long time since last discussion.
There is a new maintainer Xen-related things: Jianfeng.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver
2017-02-05 14:44 ` Thomas Monjalon
@ 2017-02-06 14:27 ` Konrad Rzeszutek Wilk
0 siblings, 0 replies; 23+ messages in thread
From: Konrad Rzeszutek Wilk @ 2017-02-06 14:27 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: Jan Blunck, dev, Jianfeng Tan
On Sun, Feb 05, 2017 at 03:44:52PM +0100, Thomas Monjalon wrote:
> Hi Jan,
>
> 2016-03-22 10:55, Jan Blunck:
> > v3 changes:
> > - removed fake PCI interface
> > - removed struct virt_eth_driver
> > - check for UIO name and version
> > - added basic documentation
> >
> > Jan Blunck (3):
> > xen: Add UIO kernel driver
> > xen: Add netfront poll mode driver
> > xen: Add documentation
>
> Any news about this series?
Perhaps reposted with xen-devel being CC-ed?
> It is a long time since last discussion.
> There is a new maintainer Xen-related things: Jianfeng.
^ permalink raw reply [flat|nested] 23+ messages in thread
end of thread, other threads:[~2017-02-06 14:27 UTC | newest]
Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-15 15:24 [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Stephen Hemminger
2015-02-15 15:24 ` [dpdk-dev] [PATCH 2/5] enic: fix device to work with Xen DOM0 Stephen Hemminger
2015-03-10 7:08 ` Liu, Jijiang
2015-02-15 15:24 ` [dpdk-dev] [PATCH 3/5] xen: add phys-addr command line argument Stephen Hemminger
2015-02-26 7:55 ` Liu, Jijiang
2015-02-26 16:09 ` Stephen Hemminger
2015-02-15 15:24 ` [dpdk-dev] [PATCH 4/5] xen: add uio driver Stephen Hemminger
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Jan Blunck
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 1/3] xen: Add UIO kernel driver Jan Blunck
2016-03-22 10:42 ` Thomas Monjalon
2016-03-22 11:04 ` Jan Blunck
2016-03-22 11:27 ` Thomas Monjalon
2016-03-22 14:39 ` Jan Blunck
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 2/3] xen: Add netfront poll mode driver Jan Blunck
2016-03-22 10:07 ` David Marchand
2016-03-22 10:42 ` Jan Blunck
2016-03-22 9:55 ` [dpdk-dev] [PATCH v3 3/3] xen: Add documentation Jan Blunck
2016-04-20 14:18 ` [dpdk-dev] [PATCH v3 0/3] xen: netfront poll mode driver Bruce Richardson
2016-05-03 9:38 ` Xie, Huawei
2017-02-05 14:44 ` Thomas Monjalon
2017-02-06 14:27 ` Konrad Rzeszutek Wilk
2015-02-15 15:24 ` [dpdk-dev] [PATCH 5/5] xen: net-front " Stephen Hemminger
2015-07-09 0:10 ` [dpdk-dev] [PATCH 1/5] xen: allow choosing dom0 support at runtime Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).