DPDK patches and discussions
 help / color / mirror / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download: 
* [dpdk-dev] [PATCH v9 12/12] abi: fix v2.1 abi broken issue
  2015-05-29  8:45  4%       ` [dpdk-dev] [PATCH v9 00/12] Interrupt mode PMD Cunming Liang
  2015-05-29  8:45  2%         ` [dpdk-dev] [PATCH v9 08/12] ethdev: add rx intr enable, disable and ctl functions Cunming Liang
@ 2015-05-29  8:45 11%         ` Cunming Liang
  1 sibling, 0 replies; 200+ results
From: Cunming Liang @ 2015-05-29  8:45 UTC (permalink / raw)
  To: dev; +Cc: shemming, liang-min.wang

RTE_EAL_RX_INTR will be removed from v2.2. It's only used to avoid ABI(unannounced) broken in v2.1.
The usrs should make sure understand the impact before turning on the feature.
There are two abi changes required in this interrupt patch set.
They're 1) struct rte_intr_handle; 2) struct rte_intr_conf.

Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 drivers/net/e1000/igb_ethdev.c                     | 28 ++++++++-
 drivers/net/ixgbe/ixgbe_ethdev.c                   | 41 ++++++++++++-
 examples/l3fwd-power/main.c                        |  4 +-
 .../bsdapp/eal/include/exec-env/rte_interrupts.h   |  7 +++
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 12 ++++
 .../linuxapp/eal/include/exec-env/rte_interrupts.h | 68 +++++++++++++++++++++-
 lib/librte_ether/rte_ethdev.c                      |  2 +
 lib/librte_ether/rte_ethdev.h                      | 32 +++++++++-
 8 files changed, 183 insertions(+), 11 deletions(-)

diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index bbd7b74..6f29222 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -96,7 +96,9 @@ static int  eth_igb_flow_ctrl_get(struct rte_eth_dev *dev,
 static int  eth_igb_flow_ctrl_set(struct rte_eth_dev *dev,
 				struct rte_eth_fc_conf *fc_conf);
 static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev);
+#ifdef RTE_EAL_RX_INTR
 static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev);
+#endif
 static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_action(struct rte_eth_dev *dev);
 static void eth_igb_interrupt_handler(struct rte_intr_handle *handle,
@@ -199,11 +201,15 @@ static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev,
 					uint16_t queue_id);
 static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev,
 					uint16_t queue_id);
+#ifdef RTE_EAL_RX_INTR
 static void eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
 				uint8_t queue, uint8_t msix_vector);
+#endif
 static void eth_igb_configure_msix_intr(struct rte_eth_dev *dev);
+#ifdef RTE_EAL_RX_INTR
 static void eth_igb_write_ivar(struct e1000_hw *hw, uint8_t msix_vector,
 				uint8_t index, uint8_t offset);
+#endif
 
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
@@ -760,7 +766,9 @@ eth_igb_start(struct rte_eth_dev *dev)
 	struct e1000_hw *hw =
 		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+#ifdef RTE_EAL_RX_INTR
 	uint32_t intr_vector = 0;
+#endif
 	int ret, mask;
 	uint32_t ctrl_ext;
 
@@ -801,6 +809,7 @@ eth_igb_start(struct rte_eth_dev *dev)
 	/* configure PF module if SRIOV enabled */
 	igb_pf_host_configure(dev);
 
+#ifdef RTE_EAL_RX_INTR
 	/* check and configure queue intr-vector mapping */
 	if (dev->data->dev_conf.intr_conf.rxq != 0)
 		intr_vector = dev->data->nb_rx_queues;
@@ -818,6 +827,7 @@ eth_igb_start(struct rte_eth_dev *dev)
 			return -ENOMEM;
 		}
 	}
+#endif
 
 	/* confiugre msix for rx interrupt */
 	eth_igb_configure_msix_intr(dev);
@@ -913,9 +923,11 @@ eth_igb_start(struct rte_eth_dev *dev)
 				     " no intr multiplex\n");
 	}
 
+#ifdef RTE_EAL_RX_INTR
 	/* check if rxq interrupt is enabled */
 	if (dev->data->dev_conf.intr_conf.rxq != 0)
 		eth_igb_rxq_interrupt_setup(dev);
+#endif
 
 	/* enable uio/vfio intr/eventfd mapping */
 	rte_intr_enable(intr_handle);
@@ -1007,12 +1019,14 @@ eth_igb_stop(struct rte_eth_dev *dev)
 	}
 	filter_info->twotuple_mask = 0;
 
+#ifdef RTE_EAL_RX_INTR
 	/* Clean datapath event and queue/vec mapping */
 	rte_intr_efd_disable(intr_handle);
 	if (intr_handle->intr_vec != NULL) {
 		rte_free(intr_handle->intr_vec);
 		intr_handle->intr_vec = NULL;
 	}
+#endif
 }
 
 static void
@@ -1020,7 +1034,9 @@ eth_igb_close(struct rte_eth_dev *dev)
 {
 	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_eth_link link;
+#ifdef RTE_EAL_RX_INTR
 	struct rte_pci_device *pci_dev;
+#endif
 
 	eth_igb_stop(dev);
 	e1000_phy_hw_reset(hw);
@@ -1038,11 +1054,13 @@ eth_igb_close(struct rte_eth_dev *dev)
 
 	igb_dev_clear_queues(dev);
 
+#ifdef RTE_EAL_RX_INTR
 	pci_dev = dev->pci_dev;
 	if (pci_dev->intr_handle.intr_vec) {
 		rte_free(pci_dev->intr_handle.intr_vec);
 		pci_dev->intr_handle.intr_vec = NULL;
 	}
+#endif
 
 	memset(&link, 0, sizeof(link));
 	rte_igb_dev_atomic_write_link_status(dev, &link);
@@ -1867,6 +1885,7 @@ eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev)
 	return 0;
 }
 
+#ifdef RTE_EAL_RX_INTR
 /*
  * It clears the interrupt causes and enables the interrupt.
  * It will be called once only during nic initialized.
@@ -1894,6 +1913,7 @@ static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev)
 
 	return 0;
 }
+#endif
 
 /*
  * It reads ICR and gets interrupt causes, check it and set a bit flag
@@ -3750,6 +3770,7 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 	return 0;
 }
 
+#ifdef RTE_EAL_RX_INTR
 static void
 eth_igb_write_ivar(struct e1000_hw *hw, uint8_t  msix_vector,
 			uint8_t index, uint8_t offset)
@@ -3791,6 +3812,7 @@ eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
 					((queue & 0x1) << 4) + 8 * direction);
 	}
 }
+#endif
 
 /*
  * Sets up the hardware to generate MSI-X interrupts properly
@@ -3800,18 +3822,21 @@ eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
 static void
 eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
 {
+#ifdef RTE_EAL_RX_INTR
 	int queue_id;
 	uint32_t tmpval, regval, intr_mask;
 	struct e1000_hw *hw =
 		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
 	uint32_t vec = 0;
+#endif
+	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
 
 	/* won't configure msix register if no mapping is done
 	 * between intr vector and event fd */
 	if (!rte_intr_dp_is_en(intr_handle))
 		return;
 
+#ifdef RTE_EAL_RX_INTR
 	/* set interrupt vector for other causes */
 	if (hw->mac.type == e1000_82575) {
 		tmpval = E1000_READ_REG(hw, E1000_CTRL_EXT);
@@ -3868,6 +3893,7 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
 	}
 
 	E1000_WRITE_FLUSH(hw);
+#endif
 }
 
 
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 798bb85..8c7bc99 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -174,7 +174,9 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev,
 			uint16_t reta_size);
 static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev);
 static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev);
+#ifdef RTE_EAL_RX_INTR
 static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
+#endif
 static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev);
 static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev);
 static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle,
@@ -210,8 +212,10 @@ static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
 		uint16_t queue_id);
 static int ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev,
 		 uint16_t queue_id);
+#ifdef RTE_EAL_RX_INTR
 static void ixgbevf_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
 		 uint8_t queue, uint8_t msix_vector);
+#endif
 static void ixgbevf_configure_msix(struct rte_eth_dev *dev);
 
 /* For Eth VMDQ APIs support */
@@ -234,8 +238,10 @@ static int ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
 					uint16_t queue_id);
 static int ixgbe_dev_rx_queue_intr_disable(struct rte_eth_dev *dev,
 					uint16_t queue_id);
+#ifdef RTE_EAL_RX_INTR
 static void ixgbe_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
 				uint8_t queue, uint8_t msix_vector);
+#endif
 static void ixgbe_configure_msix(struct rte_eth_dev *dev);
 
 static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
@@ -1481,7 +1487,9 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
 	struct ixgbe_vf_info *vfinfo =
 		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
 	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+#ifdef RTE_EAL_RX_INTR
 	uint32_t intr_vector = 0;
+#endif
 	int err, link_up = 0, negotiate = 0;
 	uint32_t speed = 0;
 	int mask = 0;
@@ -1514,6 +1522,7 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
 	/* configure PF module if SRIOV enabled */
 	ixgbe_pf_host_configure(dev);
 
+#ifdef RTE_EAL_RX_INTR
 	/* check and configure queue intr-vector mapping */
 	if (dev->data->dev_conf.intr_conf.rxq != 0)
 		intr_vector = dev->data->nb_rx_queues;
@@ -1532,6 +1541,7 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
 			return -1;
 		}
 	}
+#endif
 
 	/* confiugre msix for sleep until rx interrupt */
 	ixgbe_configure_msix(dev);
@@ -1619,9 +1629,11 @@ skip_link_setup:
 				     " no intr multiplex\n");
 	}
 
+#ifdef RTE_EAL_RX_INTR
 	/* check if rxq interrupt is enabled */
 	if (dev->data->dev_conf.intr_conf.rxq != 0)
 		ixgbe_dev_rxq_interrupt_setup(dev);
+#endif
 
 	/* enable uio/vfio intr/eventfd mapping */
 	rte_intr_enable(intr_handle);
@@ -1727,12 +1739,14 @@ ixgbe_dev_stop(struct rte_eth_dev *dev)
 	memset(filter_info->fivetuple_mask, 0,
 		sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE);
 
+#ifdef RTE_EAL_RX_INTR
 	/* Clean datapath event and queue/vec mapping */
 	rte_intr_efd_disable(intr_handle);
 	if (intr_handle->intr_vec != NULL) {
 		rte_free(intr_handle->intr_vec);
 		intr_handle->intr_vec = NULL;
 	}
+#endif
 }
 
 /*
@@ -2335,6 +2349,7 @@ ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev)
  *  - On success, zero.
  *  - On failure, a negative value.
  */
+#ifdef RTE_EAL_RX_INTR
 static int
 ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev)
 {
@@ -2345,6 +2360,7 @@ ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev)
 
 	return 0;
 }
+#endif
 
 /*
  * It reads ICR and sets flag (IXGBE_EICR_LSC) for the link_update.
@@ -3127,7 +3143,9 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw =
 		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+#ifdef RTE_EAL_RX_INTR
 	uint32_t intr_vector = 0;
+#endif
 	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
 
 	int err, mask = 0;
@@ -3160,6 +3178,7 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
 
 	ixgbevf_dev_rxtx_start(dev);
 
+#ifdef RTE_EAL_RX_INTR
 	/* check and configure queue intr-vector mapping */
 	if (dev->data->dev_conf.intr_conf.rxq != 0)
 		intr_vector = dev->data->nb_rx_queues;
@@ -3177,7 +3196,7 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
 			return -ENOMEM;
 		}
 	}
-
+#endif
 	ixgbevf_configure_msix(dev);
 
 	if (dev->data->dev_conf.intr_conf.lsc != 0) {
@@ -3223,19 +3242,23 @@ ixgbevf_dev_stop(struct rte_eth_dev *dev)
 	/* disable intr eventfd mapping */
 	rte_intr_disable(intr_handle);
 
+#ifdef RTE_EAL_RX_INTR
 	/* Clean datapath event and queue/vec mapping */
 	rte_intr_efd_disable(intr_handle);
 	if (intr_handle->intr_vec != NULL) {
 		rte_free(intr_handle->intr_vec);
 		intr_handle->intr_vec = NULL;
 	}
+#endif
 }
 
 static void
 ixgbevf_dev_close(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+#ifdef RTE_EAL_RX_INTR
 	struct rte_pci_device *pci_dev;
+#endif
 
 	PMD_INIT_FUNC_TRACE();
 
@@ -3246,11 +3269,13 @@ ixgbevf_dev_close(struct rte_eth_dev *dev)
 	/* reprogram the RAR[0] in case user changed it. */
 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
 
+#ifdef RTE_EAL_RX_INTR
 	pci_dev = dev->pci_dev;
 	if (pci_dev->intr_handle.intr_vec) {
 		rte_free(pci_dev->intr_handle.intr_vec);
 		pci_dev->intr_handle.intr_vec = NULL;
 	}
+#endif
 }
 
 static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on)
@@ -3834,6 +3859,7 @@ ixgbe_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 	return 0;
 }
 
+#ifdef RTE_EAL_RX_INTR
 static void
 ixgbevf_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
 			uint8_t queue, uint8_t msix_vector)
@@ -3902,21 +3928,25 @@ ixgbe_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
 		}
 	}
 }
+#endif
 
 static void
 ixgbevf_configure_msix(struct rte_eth_dev *dev)
 {
+	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+#ifdef RTE_EAL_RX_INTR
 	struct ixgbe_hw *hw =
 		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
 	uint32_t q_idx;
 	uint32_t vector_idx = 0;
+#endif
 
 	/* won't configure msix register if no mapping is done
 	 * between intr vector and event fd */
 	if (!rte_intr_dp_is_en(intr_handle))
 		return;
 
+#ifdef RTE_EAL_RX_INTR
 	/* Configure all RX queues of VF */
 	for (q_idx = 0; q_idx < dev->data->nb_rx_queues; q_idx++) {
 		/* Force all queue use vector 0,
@@ -3927,6 +3957,7 @@ ixgbevf_configure_msix(struct rte_eth_dev *dev)
 
 	/* Configure VF Rx queue ivar */
 	ixgbevf_set_ivar_map(hw, -1, 1, vector_idx);
+#endif
 }
 
 /**
@@ -3937,18 +3968,21 @@ ixgbevf_configure_msix(struct rte_eth_dev *dev)
 static void
 ixgbe_configure_msix(struct rte_eth_dev *dev)
 {
+	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+#ifdef RTE_EAL_RX_INTR
 	struct ixgbe_hw *hw =
 		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
 	uint32_t queue_id, vec = 0;
 	uint32_t mask;
 	uint32_t gpie;
+#endif
 
 	/* won't configure msix register if no mapping is done
 	 * between intr vector and event fd */
 	if (!rte_intr_dp_is_en(intr_handle))
 		return;
 
+#ifdef RTE_EAL_RX_INTR
 	/* setup GPIE for MSI-x mode */
 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
 	gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
@@ -4000,6 +4034,7 @@ ixgbe_configure_msix(struct rte_eth_dev *dev)
 		  IXGBE_EIMS_LSC);
 
 	IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
+#endif
 }
 
 static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index 538bb93..86ff3e9 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -239,7 +239,7 @@ static struct rte_eth_conf port_conf = {
 	},
 	.intr_conf = {
 		.lsc = 1,
-		.rxq = 1, /**< rxq interrupt feature enabled */
+		.rxq = 1,
 	},
 };
 
@@ -889,7 +889,7 @@ main_loop(__attribute__((unused)) void *dummy)
 	}
 
 	/* add into event wait list */
-	if (port_conf.intr_conf.rxq && event_register(qconf) == 0)
+	if (event_register(qconf) == 0)
 		intr_en = 1;
 	else
 		RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n");
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
index fc2c46b..f0f6a3f 100644
--- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
@@ -49,9 +49,16 @@ enum rte_intr_handle_type {
 struct rte_intr_handle {
 	int fd;                          /**< file descriptor */
 	enum rte_intr_handle_type type;  /**< handle type */
+#ifdef RTE_EAL_RX_INTR
+	/**
+	 * RTE_EAL_RX_INTR will be removed from v2.2.
+	 * It's only used to avoid ABI(unannounced) broken in v2.1.
+	 * Make sure being aware of the impact before turning on the feature.
+	 */
 	int max_intr;                    /**< max interrupt requested */
 	uint32_t nb_efd;                 /**< number of available efds */
 	int *intr_vec;               /**< intr vector number array */
+#endif
 };
 
 /**
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 1b80359..abc2062 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -290,18 +290,26 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
 
 	irq_set = (struct vfio_irq_set *) irq_set_buf;
 	irq_set->argsz = len;
+#ifdef RTE_EAL_RX_INTR
 	if (!intr_handle->max_intr)
 		intr_handle->max_intr = 1;
 	else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID)
 		intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1;
 
 	irq_set->count = intr_handle->max_intr;
+#else
+	irq_set->count = 1;
+#endif
 	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *) &irq_set->data;
+#ifdef RTE_EAL_RX_INTR
 	memcpy(fd_ptr, intr_handle->efds, sizeof(intr_handle->efds));
 	fd_ptr[intr_handle->max_intr - 1] = intr_handle->fd;
+#else
+	fd_ptr[0] = intr_handle->fd;
+#endif
 
 	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
 
@@ -875,6 +883,7 @@ rte_eal_intr_init(void)
 	return -ret;
 }
 
+#ifdef RTE_EAL_RX_INTR
 static void
 eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
 {
@@ -917,6 +926,7 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
 		return;
 	} while (1);
 }
+#endif
 
 static int
 eal_epoll_process_event(struct epoll_event *evs, unsigned int n,
@@ -1054,6 +1064,7 @@ rte_epoll_ctl(int epfd, int op, int fd,
 	return 0;
 }
 
+#ifdef RTE_EAL_RX_INTR
 int
 rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
 		int op, unsigned int vec, void *data)
@@ -1165,3 +1176,4 @@ rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
 	intr_handle->nb_efd = 0;
 	intr_handle->max_intr = 0;
 }
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index 7c8a62b..5390b21 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -38,6 +38,10 @@
 #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
 #define _RTE_LINUXAPP_INTERRUPTS_H_
 
+#ifndef RTE_EAL_RX_INTR
+#include <rte_common.h>
+#endif
+
 #define RTE_MAX_RXTX_INTR_VEC_ID     32
 
 enum rte_intr_handle_type {
@@ -86,12 +90,19 @@ struct rte_intr_handle {
 	};
 	int fd;	 /**< interrupt event file descriptor */
 	enum rte_intr_handle_type type;  /**< handle type */
+#ifdef RTE_EAL_RX_INTR
+	/**
+	 * RTE_EAL_RX_INTR will be removed from v2.2.
+	 * It's only used to avoid ABI(unannounced) broken in v2.1.
+	 * Make sure being aware of the impact before turning on the feature.
+	 */
 	uint32_t max_intr;               /**< max interrupt requested */
 	uint32_t nb_efd;                 /**< number of available efds */
 	int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
 	struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID];
 					 /**< intr vector epoll event */
 	int *intr_vec;                   /**< intr vector number array */
+#endif
 };
 
 #define RTE_EPOLL_PER_THREAD        -1  /**< to hint using per thread epfd */
@@ -162,9 +173,23 @@ rte_intr_tls_epfd(void);
  *   - On success, zero.
  *   - On failure, a negative value.
  */
-int
+#ifdef RTE_EAL_RX_INTR
+extern int
 rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
 		int epfd, int op, unsigned int vec, void *data);
+#else
+static inline int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+		int epfd, int op, unsigned int vec, void *data)
+{
+	RTE_SET_USED(intr_handle);
+	RTE_SET_USED(epfd);
+	RTE_SET_USED(op);
+	RTE_SET_USED(vec);
+	RTE_SET_USED(data);
+	return -ENOTSUP;
+}
+#endif
 
 /**
  * It enables the fastpath event fds if it's necessary.
@@ -179,8 +204,18 @@ rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
  *   - On success, zero.
  *   - On failure, a negative value.
  */
-int
+#ifdef RTE_EAL_RX_INTR
+extern int
 rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
+#else
+static inline int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
+{
+	RTE_SET_USED(intr_handle);
+	RTE_SET_USED(nb_efd);
+	return 0;
+}
+#endif
 
 /**
  * It disable the fastpath event fds.
@@ -189,8 +224,17 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
  * @param intr_handle
  *   Pointer to the interrupt handle.
  */
-void
+#ifdef RTE_EAL_RX_INTR
+extern void
 rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
+#else
+static inline void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
+{
+	RTE_SET_USED(intr_handle);
+	return;
+}
+#endif
 
 /**
  * The fastpath interrupt is enabled or not.
@@ -198,11 +242,20 @@ rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
  * @param intr_handle
  *   Pointer to the interrupt handle.
  */
+#ifdef RTE_EAL_RX_INTR
 static inline int
 rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
 {
 	return !(!intr_handle->nb_efd);
 }
+#else
+static inline int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
+{
+	RTE_SET_USED(intr_handle);
+	return 0;
+}
+#endif
 
 /**
  * The interrupt handle instance allows other cause or not.
@@ -211,10 +264,19 @@ rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
  * @param intr_handle
  *   Pointer to the interrupt handle.
  */
+#ifdef RTE_EAL_RX_INTR
 static inline int
 rte_intr_allow_others(struct rte_intr_handle *intr_handle)
 {
 	return !!(intr_handle->max_intr - intr_handle->nb_efd);
 }
+#else
+static inline int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle)
+{
+	RTE_SET_USED(intr_handle);
+	return 1;
+}
+#endif
 
 #endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 846d7f8..823eb46 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -3282,6 +3282,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
 
+#ifdef RTE_EAL_RX_INTR
 int
 rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
 {
@@ -3353,6 +3354,7 @@ rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
 
 	return 0;
 }
+#endif
 
 int
 rte_eth_dev_rx_intr_enable(uint8_t port_id,
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index c199d32..8bea68d 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -830,8 +830,10 @@ struct rte_eth_fdir {
 struct rte_intr_conf {
 	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
 	uint16_t lsc;
+#ifdef RTE_EAL_RX_INTR
 	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
 	uint16_t rxq;
+#endif
 };
 
 /**
@@ -2943,8 +2945,20 @@ int rte_eth_dev_rx_intr_disable(uint8_t port_id,
  *   - On success, zero.
  *   - On failure, a negative value.
  */
-int
+#ifdef RTE_EAL_RX_INTR
+extern int
 rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data);
+#else
+static inline int
+rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
+{
+	RTE_SET_USED(port_id);
+	RTE_SET_USED(epfd);
+	RTE_SET_USED(op);
+	RTE_SET_USED(data);
+	return -1;
+}
+#endif
 
 /**
  * RX Interrupt control per queue.
@@ -2967,9 +2981,23 @@ rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data);
  *   - On success, zero.
  *   - On failure, a negative value.
  */
-int
+#ifdef RTE_EAL_RX_INTR
+extern int
 rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
 			  int epfd, int op, void *data);
+#else
+static inline int
+rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+			  int epfd, int op, void *data)
+{
+	RTE_SET_USED(port_id);
+	RTE_SET_USED(queue_id);
+	RTE_SET_USED(epfd);
+	RTE_SET_USED(op);
+	RTE_SET_USED(data);
+	return -1;
+}
+#endif
 
 /**
  * Turn on the LED on the Ethernet device.
-- 
1.8.1.4

^ permalink raw reply	[relevance 11%]

* [dpdk-dev] [PATCH v9 08/12] ethdev: add rx intr enable, disable and ctl functions
  2015-05-29  8:45  4%       ` [dpdk-dev] [PATCH v9 00/12] Interrupt mode PMD Cunming Liang
@ 2015-05-29  8:45  2%         ` Cunming Liang
  2015-05-29  8:45 11%         ` [dpdk-dev] [PATCH v9 12/12] abi: fix v2.1 abi broken issue Cunming Liang
  1 sibling, 0 replies; 200+ results
From: Cunming Liang @ 2015-05-29  8:45 UTC (permalink / raw)
  To: dev; +Cc: shemming, liang-min.wang

The patch adds two dev_ops functions to enable and disable rx queue interrupts.
In addtion, it adds rte_eth_dev_rx_intr_ctl/rx_intr_q to support per port or per queue rx intr event set.

Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
v9 changes
 - remove unnecessary check after rte_eth_dev_is_valid_port.
   the same as http://www.dpdk.org/dev/patchwork/patch/4784

v8 changes
 - add addtion check for EEXIT

v7 changes
 - remove rx_intr_vec_get
 - add rx_intr_ctl and rx_intr_ctl_q

v6 changes
 - add rx_intr_vec_get to retrieve the vector num of the queue.

v5 changes
 - Rebase the patchset onto the HEAD

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Put new functions at the end of eth_dev_ops to avoid breaking ABI

v3 changes
 - Add return value for interrupt enable/disable functions

 lib/librte_ether/rte_ethdev.c          | 107 +++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.h          | 104 ++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ether_version.map |   4 ++
 3 files changed, 215 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 024fe8b..846d7f8 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -3281,6 +3281,113 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	}
 	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
+
+int
+rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
+{
+	uint32_t vec;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+	uint16_t qid;
+	int rc;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	intr_handle = &dev->pci_dev->intr_handle;
+	if (!intr_handle->intr_vec) {
+		PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		return -EPERM;
+	}
+
+	for (qid = 0; qid < dev->data->nb_rx_queues; qid++) {
+		vec = intr_handle->intr_vec[qid];
+		rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
+		if (rc && rc != -EEXIST) {
+			PMD_DEBUG_TRACE("p %u q %u rx ctl error"
+					" op %d epfd %d vec %u\n",
+					port_id, qid, op, epfd, vec);
+		}
+	}
+
+	return 0;
+}
+
+int
+rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+			  int epfd, int op, void *data)
+{
+	uint32_t vec;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+	int rc;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (queue_id >= dev->data->nb_rx_queues) {
+		PMD_DEBUG_TRACE("Invalid RX queue_id=%u\n", queue_id);
+		return -EINVAL;
+	}
+
+	intr_handle = &dev->pci_dev->intr_handle;
+	if (!intr_handle->intr_vec) {
+		PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		return -EPERM;
+	}
+
+	vec = intr_handle->intr_vec[queue_id];
+	rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
+	if (rc && rc != -EEXIST) {
+		PMD_DEBUG_TRACE("p %u q %u rx ctl error"
+				" op %d epfd %d vec %u\n",
+				port_id, queue_id, op, epfd, vec);
+		return rc;
+	}
+
+	return 0;
+}
+
+int
+rte_eth_dev_rx_intr_enable(uint8_t port_id,
+			   uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+}
+
+int
+rte_eth_dev_rx_intr_disable(uint8_t port_id,
+			    uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
+}
+
 #ifdef RTE_NIC_BYPASS
 int rte_eth_dev_bypass_init(uint8_t port_id)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 16dbe00..c199d32 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -830,6 +830,8 @@ struct rte_eth_fdir {
 struct rte_intr_conf {
 	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
 	uint16_t lsc;
+	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
+	uint16_t rxq;
 };
 
 /**
@@ -1035,6 +1037,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    const struct rte_eth_txconf *tx_conf);
 /**< @internal Setup a transmit queue of an Ethernet device. */
 
+typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
+
 typedef void (*eth_queue_release_t)(void *queue);
 /**< @internal Release memory resources allocated by given RX/TX queue. */
 
@@ -1386,6 +1396,10 @@ struct eth_dev_ops {
 	/** Get current RSS hash configuration. */
 	rss_hash_conf_get_t rss_hash_conf_get;
 	eth_filter_ctrl_t              filter_ctrl;          /**< common filter control*/
+
+	/** Enable/disable Rx queue interrupt. */
+	eth_rx_enable_intr_t       rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
+	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
 };
 
 /**
@@ -2868,6 +2882,96 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 				enum rte_eth_event_type event);
 
 /**
+ * When there is no rx packet coming in Rx Queue for a long time, we can
+ * sleep lcore related to RX Queue for power saving, and enable rx interrupt
+ * to be triggered when rx packect arrives.
+ *
+ * The rte_eth_dev_rx_intr_enable() function enables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_enable(uint8_t port_id,
+			       uint16_t queue_id);
+
+/**
+ * When lcore wakes up from rx interrupt indicating packet coming, disable rx
+ * interrupt and returns to polling mode.
+ *
+ * The rte_eth_dev_rx_intr_disable() function disables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_disable(uint8_t port_id,
+				uint16_t queue_id);
+
+/**
+ * RX Interrupt control per port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ *   Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data);
+
+/**
+ * RX Interrupt control per queue.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ *   Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+			  int epfd, int op, void *data);
+
+/**
  * Turn on the LED on the Ethernet device.
  * This function turns on the LED on the Ethernet device.
  *
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index a2d25a6..2799b99 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -48,6 +48,10 @@ DPDK_2.0 {
 	rte_eth_dev_rss_hash_update;
 	rte_eth_dev_rss_reta_query;
 	rte_eth_dev_rss_reta_update;
+	rte_eth_dev_rx_intr_ctl;
+	rte_eth_dev_rx_intr_ctl_q;
+	rte_eth_dev_rx_intr_disable;
+	rte_eth_dev_rx_intr_enable;
 	rte_eth_dev_rx_queue_start;
 	rte_eth_dev_rx_queue_stop;
 	rte_eth_dev_set_link_down;
-- 
1.8.1.4

^ permalink raw reply	[relevance 2%]

* [dpdk-dev] [PATCH v9 00/12] Interrupt mode PMD
  2015-05-21  8:55  2%     ` [dpdk-dev] [PATCH v8 00/11] Interrupt mode PMD Cunming Liang
    2015-05-21  8:56  2%       ` [dpdk-dev] [PATCH v8 08/11] ethdev: add rx intr enable, disable and ctl functions Cunming Liang
@ 2015-05-29  8:45  4%       ` Cunming Liang
  2015-05-29  8:45  2%         ` [dpdk-dev] [PATCH v9 08/12] ethdev: add rx intr enable, disable and ctl functions Cunming Liang
  2015-05-29  8:45 11%         ` [dpdk-dev] [PATCH v9 12/12] abi: fix v2.1 abi broken issue Cunming Liang
  2 siblings, 2 replies; 200+ results
From: Cunming Liang @ 2015-05-29  8:45 UTC (permalink / raw)
  To: dev; +Cc: shemming, liang-min.wang

v9 changes
 - code rework to fix open comment
 - bug fix for igb lsc when both lsc and rxq are enabled in vfio-msix
 - new patch to turn off the feature by defalut so as to avoid v2.1 abi broken

v8 changes
 - remove condition check for only vfio-msix
 - add multiplex intr support when only one intr vector allowed
 - lsc and rxq interrupt runtime enable decision
 - add safe event delete while the event wakeup execution happens

v7 changes
 - decouple epoll event and intr operation
 - add condition check in the case intr vector is disabled
 - renaming some APIs

v6 changes
 - split rte_intr_wait_rx_pkt into two APIs 'wait' and 'set'.
 - rewrite rte_intr_rx_wait/rte_intr_rx_set.
 - using vector number instead of queue_id as interrupt API params.
 - patch reorder and split.

v5 changes
 - Rebase the patchset onto the HEAD
 - Isolate ethdev from EAL for new-added wait-for-rx interrupt function
 - Export wait-for-rx interrupt function for shared libraries
 - Split-off a new patch file for changed struct rte_intr_handle that
   other patches depend on, to avoid breaking git bisect
 - Change sample applicaiton to accomodate EAL function spec change
   accordingly

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Adjust position of new-added structure fields and functions to
   avoid breaking ABI
 
v3 changes
 - Add return value for interrupt enable/disable functions
 - Move spinlok from PMD to L3fwd-power
 - Remove unnecessary variables in e1000_mac_info
 - Fix miscelleous review comments
 
v2 changes
 - Fix compilation issue in Makefile for missed header file.
 - Consolidate internal and community review comments of v1 patch set.
 
The patch series introduce low-latency one-shot rx interrupt into DPDK with
polling and interrupt mode switch control example.
 
DPDK userspace interrupt notification and handling mechanism is based on UIO
with below limitation:
1) It is designed to handle LSC interrupt only with inefficient suspended
   pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
   which then wakes up DPDK polling thread). In this way, it introduces
   non-deterministic wakeup latency for DPDK polling thread as well as packet
   latency if it is used to handle Rx interrupt.
2) UIO only supports a single interrupt vector which has to been shared by
   LSC interrupt and interrupts assigned to dedicated rx queues.
 
This patchset includes below features:
1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF only).
2) Build on top of the VFIO mechanism instead of UIO, so it could support
   up to 64 interrupt vectors for rx queue interrupts.
3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
   VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
   user space.
4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
   switch algorithms in L3fwd-power example.

Known limitations:
1) It does not work for UIO due to a single interrupt eventfd shared by LSC
   and rx queue interrupt handlers causes a mess. [FIXED]
2) LSC interrupt is not supported by VF driver, so it is by default disabled
   in L3fwd-power now. Feel free to turn in on if you want to support both LSC
   and rx queue interrupts on a PF.

Cunming Liang (12):
  eal/linux: add interrupt vectors support in intr_handle
  eal/linux: add rte_epoll_wait/ctl support
  eal/linux: add API to set rx interrupt event monitor
  eal/linux: fix comments typo on vfio msi
  eal/linux: add interrupt vectors handling on VFIO
  eal/linux: standalone intr event fd create support
  eal/bsd: dummy for new intr definition
  ethdev: add rx intr enable, disable and ctl functions
  ixgbe: enable rx queue interrupts for both PF and VF
  igb: enable rx queue interrupts for PF
  l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
    switch
  abi: fix v2.1 abi broken issue

 drivers/net/e1000/igb_ethdev.c                     | 311 ++++++++++--
 drivers/net/ixgbe/ixgbe_ethdev.c                   | 519 ++++++++++++++++++++-
 drivers/net/ixgbe/ixgbe_ethdev.h                   |   4 +
 examples/l3fwd-power/main.c                        | 207 ++++++--
 lib/librte_eal/bsdapp/eal/eal_interrupts.c         |  19 +
 .../bsdapp/eal/include/exec-env/rte_interrupts.h   |  81 ++++
 lib/librte_eal/bsdapp/eal/rte_eal_version.map      |   5 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 358 ++++++++++++--
 .../linuxapp/eal/include/exec-env/rte_interrupts.h | 219 +++++++++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map    |   8 +
 lib/librte_ether/rte_ethdev.c                      | 109 +++++
 lib/librte_ether/rte_ethdev.h                      | 132 ++++++
 lib/librte_ether/rte_ether_version.map             |   4 +
 13 files changed, 1851 insertions(+), 125 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH v3 01/10] table: added structure for storing table stats
  2015-05-28 19:32  3%         ` Dumitrescu, Cristian
@ 2015-05-28 21:41  3%           ` Stephen Hemminger
  0 siblings, 0 replies; 200+ results
From: Stephen Hemminger @ 2015-05-28 21:41 UTC (permalink / raw)
  To: Dumitrescu, Cristian; +Cc: dev

On Thu, 28 May 2015 19:32:32 +0000
"Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:

> This is just adding  a new field at the end of an API data structure. Based on input from multiple people and after reviewing the rules listed on http://dpdk.org/doc/guides/rel_notes/abi.html , I think this is an acceptable change. There are other patches in flight on this mailing list that are in the same situation. Any typical/well behaved application will not break due to this change.

Expanding a structure can be okay but:
  1. The allocation will have to always take within the library.
     If you let application put structure on stack or allocate on it's own, the ABI would break.

  2. The structure must not be used as a return by reference.
     For example, this would break if sizeof(struct my_stats) changed.

     void foo() {
             struct my_stats stats;
	     int i_will_get_clobbered;
	...
		rte_dpdk_get_stats(obj, &stats)
	}

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v3 01/10] table: added structure for storing table stats
  2015-05-26 21:57  0%       ` Stephen Hemminger
@ 2015-05-28 19:32  3%         ` Dumitrescu, Cristian
  2015-05-28 21:41  3%           ` Stephen Hemminger
  0 siblings, 1 reply; 200+ results
From: Dumitrescu, Cristian @ 2015-05-28 19:32 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev



> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, May 26, 2015 10:58 PM
> To: Dumitrescu, Cristian
> Cc: Gajdzica, MaciejX T; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 01/10] table: added structure for storing
> table stats
> 
> On Tue, 26 May 2015 21:40:42 +0000
> "Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:
> 
> >
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen
> > > Hemminger
> > > Sent: Tuesday, May 26, 2015 3:58 PM
> > > To: Gajdzica, MaciejX T
> > > Cc: dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v3 01/10] table: added structure for
> storing
> > > table stats
> > >
> > > On Tue, 26 May 2015 14:39:38 +0200
> > > Maciej Gajdzica <maciejx.t.gajdzica@intel.com> wrote:
> > >
> > > > +
> > > >  /** Lookup table interface defining the lookup table operation */
> > > >  struct rte_table_ops {
> > > >  	rte_table_op_create f_create;       /**< Create */
> > > > @@ -194,6 +218,7 @@ struct rte_table_ops {
> > > >  	rte_table_op_entry_add f_add;       /**< Entry add */
> > > >  	rte_table_op_entry_delete f_delete; /**< Entry delete */
> > > >  	rte_table_op_lookup f_lookup;       /**< Lookup */
> > > > +	rte_table_op_stats_read f_stats;	/**< Stats */
> > > >  };
> > >
> > > Another good idea, which is an ABI change.
> >
> > This is simply adding a new API function, this is not changing any function
> prototype. There is no change required in the map file of this library. Is there
> anything we should have done and we did not do?
> >
> 
> But if I built an external set of code which had rte_table_ops (don't worry I
> haven't)
> and that binary ran with the new definition, the core code it table would
> reference
> outside the (old version) of rte_table_ops structure and find garbage.

This is just adding  a new field at the end of an API data structure. Based on input from multiple people and after reviewing the rules listed on http://dpdk.org/doc/guides/rel_notes/abi.html , I think this is an acceptable change. There are other patches in flight on this mailing list that are in the same situation. Any typical/well behaved application will not break due to this change.

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH 4/5] rte_sched: hide structure of port hierarchy
  2015-05-27 18:10  3% [dpdk-dev] [PATCH v4 0/5] rte_sched: cleanup and API enhancements Stephen Hemminger
@ 2015-05-27 18:10  3% ` Stephen Hemminger
  0 siblings, 0 replies; 200+ results
From: Stephen Hemminger @ 2015-05-27 18:10 UTC (permalink / raw)
  To: cristian.dumitrescu; +Cc: dev

Right now the scheduler hierarchy is encoded as a bitfield
that is visible as part of the ABI. This creates an barrier
limiting future expansion of the hierarchy.

As a transistional step. hide the actual layout of the hierarchy
and mark the exposed structure as deprecated. This will allow for
expansion in later release.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/librte_sched/rte_sched.c           | 54 ++++++++++++++++++++++++++++++++++
 lib/librte_sched/rte_sched.h           | 54 ++++++++++------------------------
 lib/librte_sched/rte_sched_version.map |  3 ++
 3 files changed, 73 insertions(+), 38 deletions(-)

diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index b1655b4..9c9419d 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -184,6 +184,21 @@ enum grinder_state {
 	e_GRINDER_READ_MBUF
 };
 
+/*
+ * Path through the scheduler hierarchy used by the scheduler enqueue
+ * operation to identify the destination queue for the current
+ * packet. Stored in the field pkt.hash.sched of struct rte_mbuf of
+ * each packet, typically written by the classification stage and read
+ * by scheduler enqueue.
+ */
+struct __rte_sched_port_hierarchy {
+	uint32_t queue:2;                /**< Queue ID (0 .. 3) */
+	uint32_t traffic_class:2;        /**< Traffic class ID (0 .. 3)*/
+	uint32_t pipe:20;                /**< Pipe ID */
+	uint32_t subport:6;              /**< Subport ID */
+	uint32_t color:2;                /**< Color */
+};
+
 struct rte_sched_grinder {
 	/* Pipe cache */
 	uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
@@ -910,6 +925,45 @@ rte_sched_pipe_config(struct rte_sched_port *port,
 	return 0;
 }
 
+void
+rte_sched_port_pkt_write(struct rte_mbuf *pkt,
+			 uint32_t subport, uint32_t pipe, uint32_t traffic_class,
+			 uint32_t queue, enum rte_meter_color color)
+{
+	struct __rte_sched_port_hierarchy *sched
+		= (struct __rte_sched_port_hierarchy *) &pkt->hash.sched;
+
+	sched->color = (uint32_t) color;
+	sched->subport = subport;
+	sched->pipe = pipe;
+	sched->traffic_class = traffic_class;
+	sched->queue = queue;
+}
+
+void
+rte_sched_port_pkt_read_tree_path(const struct rte_mbuf *pkt,
+				  uint32_t *subport, uint32_t *pipe,
+				  uint32_t *traffic_class, uint32_t *queue)
+{
+	const struct __rte_sched_port_hierarchy *sched
+		= (const struct __rte_sched_port_hierarchy *) &pkt->hash.sched;
+
+	*subport = sched->subport;
+	*pipe = sched->pipe;
+	*traffic_class = sched->traffic_class;
+	*queue = sched->queue;
+}
+
+
+enum rte_meter_color
+rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
+{
+	const struct __rte_sched_port_hierarchy *sched
+		= (const struct __rte_sched_port_hierarchy *) &pkt->hash.sched;
+
+	return (enum rte_meter_color) sched->color;
+}
+
 int
 rte_sched_subport_read_stats(struct rte_sched_port *port,
 	uint32_t subport_id,
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
index e6bba22..f7c0b8e 100644
--- a/lib/librte_sched/rte_sched.h
+++ b/lib/librte_sched/rte_sched.h
@@ -195,17 +195,19 @@ struct rte_sched_port_params {
 #endif
 };
 
-/** Path through the scheduler hierarchy used by the scheduler enqueue operation to
-identify the destination queue for the current packet. Stored in the field hash.sched
-of struct rte_mbuf of each packet, typically written by the classification stage and read by
-scheduler enqueue.*/
+/*
+ * Path through scheduler hierarchy
+ *
+ * Note: direct access to internal bitfields is deprecated to allow for future expansion.
+ * Use rte_sched_port_pkt_read/write API instead
+ */
 struct rte_sched_port_hierarchy {
 	uint32_t queue:2;                /**< Queue ID (0 .. 3) */
 	uint32_t traffic_class:2;        /**< Traffic class ID (0 .. 3)*/
 	uint32_t pipe:20;                /**< Pipe ID */
 	uint32_t subport:6;              /**< Subport ID */
 	uint32_t color:2;                /**< Color */
-};
+} __attribute__ ((deprecated));
 
 /*
  * Configuration
@@ -328,11 +330,6 @@ rte_sched_queue_read_stats(struct rte_sched_port *port,
 	struct rte_sched_queue_stats *stats,
 	uint16_t *qlen);
 
-/*
- * Run-time
- *
- ***/
-
 /**
  * Scheduler hierarchy path write to packet descriptor. Typically called by the
  * packet classification stage.
@@ -348,18 +345,10 @@ rte_sched_queue_read_stats(struct rte_sched_port *port,
  * @param queue
  *   Queue ID within pipe traffic class (0 .. 3)
  */
-static inline void
+void
 rte_sched_port_pkt_write(struct rte_mbuf *pkt,
-	uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue, enum rte_meter_color color)
-{
-	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
-
-	sched->color = (uint32_t) color;
-	sched->subport = subport;
-	sched->pipe = pipe;
-	sched->traffic_class = traffic_class;
-	sched->queue = queue;
-}
+			 uint32_t subport, uint32_t pipe, uint32_t traffic_class,
+			 uint32_t queue, enum rte_meter_color color);
 
 /**
  * Scheduler hierarchy path read from packet descriptor (struct rte_mbuf). Typically
@@ -378,24 +367,13 @@ rte_sched_port_pkt_write(struct rte_mbuf *pkt,
  *   Queue ID within pipe traffic class (0 .. 3)
  *
  */
-static inline void
-rte_sched_port_pkt_read_tree_path(struct rte_mbuf *pkt, uint32_t *subport, uint32_t *pipe, uint32_t *traffic_class, uint32_t *queue)
-{
-	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
-
-	*subport = sched->subport;
-	*pipe = sched->pipe;
-	*traffic_class = sched->traffic_class;
-	*queue = sched->queue;
-}
-
-static inline enum rte_meter_color
-rte_sched_port_pkt_read_color(struct rte_mbuf *pkt)
-{
-	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
+void
+rte_sched_port_pkt_read_tree_path(const struct rte_mbuf *pkt,
+				  uint32_t *subport, uint32_t *pipe,
+				  uint32_t *traffic_class, uint32_t *queue);
 
-	return (enum rte_meter_color) sched->color;
-}
+enum rte_meter_color
+rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt);
 
 /**
  * Hierarchical scheduler port enqueue. Writes up to n_pkts to port scheduler and
diff --git a/lib/librte_sched/rte_sched_version.map b/lib/librte_sched/rte_sched_version.map
index 9f74e8b..6626a74 100644
--- a/lib/librte_sched/rte_sched_version.map
+++ b/lib/librte_sched/rte_sched_version.map
@@ -17,6 +17,9 @@ DPDK_2.0 {
 	rte_sched_queue_read_stats;
 	rte_sched_subport_config;
 	rte_sched_subport_read_stats;
+	rte_sched_port_pkt_write;
+	rte_sched_port_pkt_read_tree_path;
+	rte_sched_port_pkt_read_color;
 
 	local: *;
 };
-- 
2.1.4

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v4 0/5] rte_sched: cleanup and API enhancements
@ 2015-05-27 18:10  3% Stephen Hemminger
  2015-05-27 18:10  3% ` [dpdk-dev] [PATCH 4/5] rte_sched: hide structure of port hierarchy Stephen Hemminger
  0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2015-05-27 18:10 UTC (permalink / raw)
  To: cristian.dumitrescu; +Cc: dev

This fixes some small issues with rte_sched API and sets stage
for enhancements in later release. Unfortunately, several things
can not be done now because of the ABI rules.

Stephen Hemminger (5):
  rte_sched: make RED optional at runtime
  rte_sched: don't put tabs in log messages
  rte_sched: use correct log level
  rte_sched: hide structure of port hierarchy
  rte_sched: allow reading without clearing

 app/test/test_sched.c                  |   4 +-
 lib/librte_sched/rte_sched.c           | 157 +++++++++++++++++++++++++--------
 lib/librte_sched/rte_sched.h           |  89 +++++++++----------
 lib/librte_sched/rte_sched_version.map |   5 ++
 4 files changed, 171 insertions(+), 84 deletions(-)

-- 
2.1.4

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH 1/4] kni: add function to query the name of a kni object
       [not found]         ` <5565D195.9040701@bisdn.de>
@ 2015-05-27 15:36  3%       ` Bruce Richardson
  0 siblings, 0 replies; 200+ results
From: Bruce Richardson @ 2015-05-27 15:36 UTC (permalink / raw)
  To: Marc Sune; +Cc: dev

On Wed, May 27, 2015 at 04:15:49PM +0200, Marc Sune wrote:
> 
> 
> On 27/05/15 15:55, Bruce Richardson wrote:
> >On Wed, May 27, 2015 at 03:52:34PM +0200, Marc Sune wrote:
> >>
> >>On 27/05/15 15:47, Bruce Richardson wrote:
> >>>When a KNI object is created, a name is assigned to it which is stored
> >>>internally. There is also an API function to look up a KNI object by
> >>>name, but there is no API to query the current name of an existing
> >>>KNI object. This patch adds just such an API.
> >>>
> >>>Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> >>>---
> >>>  lib/librte_kni/rte_kni.c           |  6 ++++++
> >>>  lib/librte_kni/rte_kni.h           | 10 ++++++++++
> >>>  lib/librte_kni/rte_kni_version.map |  1 +
> >>>  3 files changed, 17 insertions(+)
> >>>
> >>>diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> >>>index 4e70fa0..c5a0089 100644
> >>>--- a/lib/librte_kni/rte_kni.c
> >>>+++ b/lib/librte_kni/rte_kni.c
> >>>@@ -674,6 +674,12 @@ rte_kni_get(const char *name)
> >>>  	return NULL;
> >>>  }
> >>>+const char *
> >>>+rte_kni_get_name(const struct rte_kni *kni)
> >>>+{
> >>>+	return kni->name;
> >>>+}
> >>Since a pointer to the kni context (struct rte_kni) is exposed to the user
> >>(rte_kni_get() and rte_kni_alloc ()), and the field is directly in the
> >>struct, is this API call really necessary? I would only see this necessary
> >>if the API would only expose a handle, like a port_id for ethdev
> >>
> >>Marc
> >The structure definition is in rte_kni.c, not in the header file, so applications
> >can't read the name directly. In other words, the create API just exposes a handle.
> >[The structure in the header is the conf structure, not the full kni struct]
> 
> Ops, you are right. I overlooked that. What about:
> 
> extern void rte_kni_get_config(const struct rte_kni *kni, struct
> rte_kni_conf* conf);
> 
> which fills in (copies) the fields of conf would allow to recover the
> original configuration, including the name? It is closer
> rte_eth_dev_info_get (unfortunately rte_kni_info_get is taken by the
> deprecated API), and would work if we add more params to rte_kni_conf.
> 
> Thanks
> marc
> 

Given the issues that have been flagged recently around ABI compatibility, I
don't think I'd introduce such an API. If provided like you describe, it makes
the calling application very dependent upon the size and structure of the
conf structure. Having specific function like this to return specific values
is safer that way.

An alternative is to have a function which returns the conf structure as
a return value, rather than as an in-out arg. That would allow us to add
fields to the end of the structure without breaking applications using that particular
function. This would require us to store the entire conf structure inside the
rte_kni structure though, so we can return a const pointer to it. This is a bit more
invasive of a change.

Overall, I prefer the query-single value option, as I think it's generally the
best-practice for working with structures going forward. [Neil, please correct
me if I'm wrong here!]. If however, you see a current need for looking at the
other fields inside the KNI, I can see about changing things to return the
whole config structure as a const pointer.

/Bruce

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v8 01/11] eal/linux: add interrupt vectors support in intr_handle
  2015-05-22 16:52  5%                   ` Stephen Hemminger
@ 2015-05-27 10:33  4%                     ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-05-27 10:33 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, liang-min.wang

On Fri, May 22, 2015 at 09:52:06AM -0700, Stephen Hemminger wrote:
> On Fri, 22 May 2015 00:05:36 +0000
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Thu, May 21, 2015 at 11:14:00AM -0700, Stephen Hemminger wrote:
> > > On Thu, 21 May 2015 13:58:46 -0400
> > > Neil Horman <nhorman@tuxdriver.com> wrote:
> > > 
> > > > On Thu, May 21, 2015 at 10:43:00AM -0700, Stephen Hemminger wrote:
> > > > > On Thu, 21 May 2015 06:32:02 -0400
> > > > > Neil Horman <nhorman@tuxdriver.com> wrote:
> > > > > 
> > > > > > On Thu, May 21, 2015 at 04:55:53PM +0800, Cunming Liang wrote:
> > > > > > > The patch adds interrupt vectors support in rte_intr_handle.
> > > > > > > 'vec_en' is set when interrupt vectors are detected and associated event fds are set.
> > > > > > > Those event fds are stored in efds[].
> > > > > > > 'intr_vec' is reserved for device driver to initialize the vector mapping table.
> > > > > > > When the event fds add to a specified epoll instance, 'elist' will hold the rte_epoll_event object pointer.
> > > > > > > 
> > > > > > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > > > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > > > > ---
> > > > > > > v7 changes:
> > > > > > >  - add eptrs[], it's used to store the register rte_epoll_event instances.
> > > > > > >  - add vec_en, to log the vector capability status.
> > > > > > > 
> > > > > > > v6 changes:
> > > > > > >  - add mapping table between irq vector number and queue id.
> > > > > > > 
> > > > > > > v5 changes:
> > > > > > >  - Create this new patch file for changed struct rte_intr_handle that
> > > > > > >    other patches depend on, to avoid breaking git bisect.
> > > > > > > 
> > > > > > >  lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h | 10 ++++++++++
> > > > > > >  1 file changed, 10 insertions(+)
> > > > > > > 
> > > > > > > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > > > > index 6a159c7..27174df 100644
> > > > > > > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > > > > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > > > > @@ -38,6 +38,8 @@
> > > > > > >  #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> > > > > > >  #define _RTE_LINUXAPP_INTERRUPTS_H_
> > > > > > >  
> > > > > > > +#define RTE_MAX_RXTX_INTR_VEC_ID     32
> > > > > > > +
> > > > > > >  enum rte_intr_handle_type {
> > > > > > >  	RTE_INTR_HANDLE_UNKNOWN = 0,
> > > > > > >  	RTE_INTR_HANDLE_UIO,      /**< uio device handle */
> > > > > > > @@ -48,6 +50,8 @@ enum rte_intr_handle_type {
> > > > > > >  	RTE_INTR_HANDLE_MAX
> > > > > > >  };
> > > > > > >  
> > > > > > > +struct rte_epoll_event;
> > > > > > > +
> > > > > > >  /** Handle for interrupts. */
> > > > > > >  struct rte_intr_handle {
> > > > > > >  	union {
> > > > > > > @@ -57,6 +61,12 @@ struct rte_intr_handle {
> > > > > > >  	};
> > > > > > >  	int fd;	 /**< interrupt event file descriptor */
> > > > > > >  	enum rte_intr_handle_type type;  /**< handle type */
> > > > > > > +	uint32_t max_intr;               /**< max interrupt requested */
> > > > > > > +	uint32_t nb_efd;                 /**< number of available efds */
> > > > > > > +	int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
> > > > > > > +	struct rte_epoll_event *elist[RTE_MAX_RXTX_INTR_VEC_ID];
> > > > > > > +					 /**< intr vector epoll event ptr */
> > > > > > > +	int *intr_vec;                   /**< intr vector number array */
> > > > > > >  };
> > > > > > >    
> > > > > > 
> > > > > > This is going to be ABI breaking if this from test_interrupts.c:
> > > > > > static struct rte_intr_handle intr_handles[TEST_INTERRUPT_HANDLE_MAX];
> > > > > > 
> > > > > > is a plausible way of using this structure.  Even putting the data at the end of
> > > > > > the structure won't help, as the array indicies are off
> > > > > 
> > > > > This needs to go in 2.0 and 2.0 has to have new ABI anyway.
> > > > > 
> > > > We've already released 2.0, I think you mean 2.1, but 2.1 can't have a new ABI
> > > > because we didn't announce it in 1.8.  The earliest we can update the ABI
> > > > (according to the ABI docs) at this point is 2.2, since we need to announce the
> > > > change in 2.1, then make it in 2.2
> > > > 
> > > > Neil
> > > > 
> > > 
> > > Then just skip 2.1 (or make it a trivial doc change only dummy release),
> > > and call it 2.2.
> > > 
> > > I guess we need to proactively say every .x release will have new ABI.
> > > Sorry, this is a project under development.
> > > 
> > Sorry, NAK.  I didn't go through all the trouble of creating an ABI
> > infrastructure just to throw it out the window on some rubber stamp.  We decided
> > on the rules, we need to stick to them.  We have large projects that rely on
> > DPDK now (OVS primarily), and we owe it to them to not just go completely throw
> > out the ABI every release.  We have a process for doing it, lets follow it.
> > 
> > Neil
> > 
> 
> I meant, that close and ship existing 2.1 code base early and open 2.2 early
> to keep things rolling. But in general this project needs x.x.y releases
> with ABI stability, and just admit that x.x releases will not have stable ABI.
> That is reality now.
> 
I'm  not opposed to doing that, though the purpose of the proposed cadence was
to give sufficient notice to downstream consumers of DPDK that ABI changes were
comming.  As long as the time delta beweeen 2.X and 2.X+1 is sufficient for
consumers to have time to react and update their applications I'm ok with it
(which I know is subjective, but I'm willing to experiment there).

> A lot of the ABI problem is that the code does not do a good job of hiding.
> And also does not sepearte driver ABI from user ABI. There are things like
> structure of PCI and interrupt handles that the user from library point
> of view should not care about, but drivers will need to.
> 
I agree.  I had hoped that implementing an ABI process would help drive
improvements in this area, but it hasn't seemed to yet.

Neil

> 
> 

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
  2015-05-26 15:46  3%           ` Ananyev, Konstantin
@ 2015-05-27  1:07  0%             ` Zhang, Helin
  0 siblings, 0 replies; 200+ results
From: Zhang, Helin @ 2015-05-27  1:07 UTC (permalink / raw)
  To: Ananyev, Konstantin, Stephen Hemminger; +Cc: dev



> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Tuesday, May 26, 2015 11:46 PM
> To: Stephen Hemminger
> Cc: Zhang, Helin; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for
> double vlan
> 
> 
> 
> > -----Original Message-----
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Tuesday, May 26, 2015 4:35 PM
> > To: Ananyev, Konstantin
> > Cc: Zhang, Helin; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for
> > double vlan
> >
> > On Tue, 26 May 2015 15:02:51 +0000
> > "Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote:
> >
> > > Hi Stephen,
> > >
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen
> > > > Hemminger
> > > > Sent: Tuesday, May 26, 2015 3:55 PM
> > > > To: Zhang, Helin
> > > > Cc: dev@dpdk.org
> > > > Subject: Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits
> > > > for double vlan
> > > >
> > > > On Tue, 26 May 2015 16:36:37 +0800 Helin Zhang
> > > > <helin.zhang@intel.com> wrote:
> > > >
> > > > > Use the reserved 16 bits in rte_mbuf structure for the outer
> > > > > vlan, also add QinQ offloading flags for both RX and TX sides.
> > > > >
> > > > > Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> > > >
> > > > Yet another change that is much needed, but breaks ABI
> compatibility.
> > >
> > > Why do you think it breaks ABI compatibility?
> > > As I can see, it uses field that was reserved.
> > > Konstantin
> >
> > Because an application maybe assuming something or reusing the
> reserved fields.
> 
> But properly behaving application, shouldn't do that right?
> And for misbehaving ones, why should we care about them?
For any reserved bits, I think all application users should avoid touching it,
as it is reserved for future use, or some special reason. Otherwise,
un-predicted behavior can be expected.

Regards,
Helin

> 
> > Yes, it would be dumb of application to do that but from absolute ABI
> > point of view it is a change.
> 
> So, in theory,  even adding a new field to the end of rte_mbuf is an ABI
> breakage?
> Konstantin

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v3 01/10] table: added structure for storing table stats
  2015-05-26 21:40  0%     ` Dumitrescu, Cristian
@ 2015-05-26 21:57  0%       ` Stephen Hemminger
  2015-05-28 19:32  3%         ` Dumitrescu, Cristian
  0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2015-05-26 21:57 UTC (permalink / raw)
  To: Dumitrescu, Cristian; +Cc: dev

On Tue, 26 May 2015 21:40:42 +0000
"Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:

> 
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen
> > Hemminger
> > Sent: Tuesday, May 26, 2015 3:58 PM
> > To: Gajdzica, MaciejX T
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 01/10] table: added structure for storing
> > table stats
> > 
> > On Tue, 26 May 2015 14:39:38 +0200
> > Maciej Gajdzica <maciejx.t.gajdzica@intel.com> wrote:
> > 
> > > +
> > >  /** Lookup table interface defining the lookup table operation */
> > >  struct rte_table_ops {
> > >  	rte_table_op_create f_create;       /**< Create */
> > > @@ -194,6 +218,7 @@ struct rte_table_ops {
> > >  	rte_table_op_entry_add f_add;       /**< Entry add */
> > >  	rte_table_op_entry_delete f_delete; /**< Entry delete */
> > >  	rte_table_op_lookup f_lookup;       /**< Lookup */
> > > +	rte_table_op_stats_read f_stats;	/**< Stats */
> > >  };
> > 
> > Another good idea, which is an ABI change.
> 
> This is simply adding a new API function, this is not changing any function prototype. There is no change required in the map file of this library. Is there anything we should have done and we did not do?
> 

But if I built an external set of code which had rte_table_ops (don't worry I haven't)
and that binary ran with the new definition, the core code it table would reference
outside the (old version) of rte_table_ops structure and find garbage.

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v3 01/10] table: added structure for storing table stats
  2015-05-26 14:57  3%   ` Stephen Hemminger
@ 2015-05-26 21:40  0%     ` Dumitrescu, Cristian
  2015-05-26 21:57  0%       ` Stephen Hemminger
  0 siblings, 1 reply; 200+ results
From: Dumitrescu, Cristian @ 2015-05-26 21:40 UTC (permalink / raw)
  To: Stephen Hemminger, Gajdzica, MaciejX T; +Cc: dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen
> Hemminger
> Sent: Tuesday, May 26, 2015 3:58 PM
> To: Gajdzica, MaciejX T
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 01/10] table: added structure for storing
> table stats
> 
> On Tue, 26 May 2015 14:39:38 +0200
> Maciej Gajdzica <maciejx.t.gajdzica@intel.com> wrote:
> 
> > +
> >  /** Lookup table interface defining the lookup table operation */
> >  struct rte_table_ops {
> >  	rte_table_op_create f_create;       /**< Create */
> > @@ -194,6 +218,7 @@ struct rte_table_ops {
> >  	rte_table_op_entry_add f_add;       /**< Entry add */
> >  	rte_table_op_entry_delete f_delete; /**< Entry delete */
> >  	rte_table_op_lookup f_lookup;       /**< Lookup */
> > +	rte_table_op_stats_read f_stats;	/**< Stats */
> >  };
> 
> Another good idea, which is an ABI change.

This is simply adding a new API function, this is not changing any function prototype. There is no change required in the map file of this library. Is there anything we should have done and we did not do?

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
  2015-05-26 15:35  3%         ` Stephen Hemminger
@ 2015-05-26 15:46  3%           ` Ananyev, Konstantin
  2015-05-27  1:07  0%             ` Zhang, Helin
  0 siblings, 1 reply; 200+ results
From: Ananyev, Konstantin @ 2015-05-26 15:46 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev



> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, May 26, 2015 4:35 PM
> To: Ananyev, Konstantin
> Cc: Zhang, Helin; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
> 
> On Tue, 26 May 2015 15:02:51 +0000
> "Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote:
> 
> > Hi Stephen,
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
> > > Sent: Tuesday, May 26, 2015 3:55 PM
> > > To: Zhang, Helin
> > > Cc: dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
> > >
> > > On Tue, 26 May 2015 16:36:37 +0800
> > > Helin Zhang <helin.zhang@intel.com> wrote:
> > >
> > > > Use the reserved 16 bits in rte_mbuf structure for the outer vlan,
> > > > also add QinQ offloading flags for both RX and TX sides.
> > > >
> > > > Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> > >
> > > Yet another change that is much needed, but breaks ABI compatibility.
> >
> > Why do you think it breaks ABI compatibility?
> > As I can see, it uses field that was reserved.
> > Konstantin
> 
> Because an application maybe assuming something or reusing the reserved fields.

But properly behaving application, shouldn't do that right?
And for misbehaving ones, why should we care about them?

> Yes, it would be dumb of application to do that but from absolute ABI point
> of view it is a change.

So, in theory,  even adding a new field to the end of rte_mbuf is an ABI breakage?
Konstantin

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
  2015-05-26 15:02  3%       ` Ananyev, Konstantin
@ 2015-05-26 15:35  3%         ` Stephen Hemminger
  2015-05-26 15:46  3%           ` Ananyev, Konstantin
  0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2015-05-26 15:35 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: dev

On Tue, 26 May 2015 15:02:51 +0000
"Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote:

> Hi Stephen,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
> > Sent: Tuesday, May 26, 2015 3:55 PM
> > To: Zhang, Helin
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
> > 
> > On Tue, 26 May 2015 16:36:37 +0800
> > Helin Zhang <helin.zhang@intel.com> wrote:
> > 
> > > Use the reserved 16 bits in rte_mbuf structure for the outer vlan,
> > > also add QinQ offloading flags for both RX and TX sides.
> > >
> > > Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> > 
> > Yet another change that is much needed, but breaks ABI compatibility.
> 
> Why do you think it breaks ABI compatibility?
> As I can see, it uses field that was reserved.
> Konstantin

Because an application maybe assuming something or reusing the reserved fields.
Yes, it would be dumb of application to do that but from absolute ABI point
of view it is a change.

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [RFC PATCH 1/2] Added ETH_SPEED_CAP bitmap in rte_eth_dev_info
  2015-05-26 15:03  3%   ` Stephen Hemminger
@ 2015-05-26 15:09  0%     ` Marc Sune
  0 siblings, 0 replies; 200+ results
From: Marc Sune @ 2015-05-26 15:09 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev



On 26/05/15 17:03, Stephen Hemminger wrote:
> On Tue, 12 May 2015 01:45:45 +0200
> Marc Sune <marc.sune@bisdn.de> wrote:
>
>> +/**
>> + * Ethernet device information
>> + */
>>   struct rte_eth_dev_info {
>>   	struct rte_pci_device *pci_dev; /**< Device PCI information. */
>>   	const char *driver_name; /**< Device Driver name. */
>> @@ -924,6 +947,7 @@ struct rte_eth_dev_info {
>>   	uint16_t vmdq_queue_base; /**< First queue ID for VMDQ pools. */
>>   	uint16_t vmdq_queue_num;  /**< Queue number for VMDQ pools. */
>>   	uint16_t vmdq_pool_base;  /**< First ID of VMDQ pools. */
>> +	uint16_t speed_capa;  /**< Supported speeds bitmap. */
>>   };
>>   
> Since you are changing size of key structure, this is an ABI change.

Yes. This means target would be 2.2?

I will send the new version anyway to further discuss, and will rebase 
again once necessary.

Marc

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
  2015-05-26 14:55  3%     ` Stephen Hemminger
  2015-05-26 15:00  0%       ` Zhang, Helin
@ 2015-05-26 15:02  3%       ` Ananyev, Konstantin
  2015-05-26 15:35  3%         ` Stephen Hemminger
  1 sibling, 1 reply; 200+ results
From: Ananyev, Konstantin @ 2015-05-26 15:02 UTC (permalink / raw)
  To: Stephen Hemminger, Zhang, Helin; +Cc: dev

Hi Stephen,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
> Sent: Tuesday, May 26, 2015 3:55 PM
> To: Zhang, Helin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
> 
> On Tue, 26 May 2015 16:36:37 +0800
> Helin Zhang <helin.zhang@intel.com> wrote:
> 
> > Use the reserved 16 bits in rte_mbuf structure for the outer vlan,
> > also add QinQ offloading flags for both RX and TX sides.
> >
> > Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> 
> Yet another change that is much needed, but breaks ABI compatibility.

Why do you think it breaks ABI compatibility?
As I can see, it uses field that was reserved.
Konstantin

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [RFC PATCH 1/2] Added ETH_SPEED_CAP bitmap in rte_eth_dev_info
  @ 2015-05-26 15:03  3%   ` Stephen Hemminger
  2015-05-26 15:09  0%     ` Marc Sune
  0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2015-05-26 15:03 UTC (permalink / raw)
  To: Marc Sune; +Cc: dev

On Tue, 12 May 2015 01:45:45 +0200
Marc Sune <marc.sune@bisdn.de> wrote:

> +/**
> + * Ethernet device information
> + */
>  struct rte_eth_dev_info {
>  	struct rte_pci_device *pci_dev; /**< Device PCI information. */
>  	const char *driver_name; /**< Device Driver name. */
> @@ -924,6 +947,7 @@ struct rte_eth_dev_info {
>  	uint16_t vmdq_queue_base; /**< First queue ID for VMDQ pools. */
>  	uint16_t vmdq_queue_num;  /**< Queue number for VMDQ pools. */
>  	uint16_t vmdq_pool_base;  /**< First ID of VMDQ pools. */
> +	uint16_t speed_capa;  /**< Supported speeds bitmap. */
>  };
>  

Since you are changing size of key structure, this is an ABI change.

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
  2015-05-26 14:55  3%     ` Stephen Hemminger
@ 2015-05-26 15:00  0%       ` Zhang, Helin
  2015-05-26 15:02  3%       ` Ananyev, Konstantin
  1 sibling, 0 replies; 200+ results
From: Zhang, Helin @ 2015-05-26 15:00 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

Hi Stephen

> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, May 26, 2015 10:55 PM
> To: Zhang, Helin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for
> double vlan
> 
> On Tue, 26 May 2015 16:36:37 +0800
> Helin Zhang <helin.zhang@intel.com> wrote:
> 
> > Use the reserved 16 bits in rte_mbuf structure for the outer vlan,
> > also add QinQ offloading flags for both RX and TX sides.
> >
> > Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> 
> Yet another change that is much needed, but breaks ABI compatibility.
Even just use the reserved 16 bits? It seems yes.
Would it be acceptable to use the original name of 'reserved' for the outer vlan?
And then announce the name change, and rename it one release after?

Regards,
Helin

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v3 01/10] table: added structure for storing table stats
  @ 2015-05-26 14:57  3%   ` Stephen Hemminger
  2015-05-26 21:40  0%     ` Dumitrescu, Cristian
  0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2015-05-26 14:57 UTC (permalink / raw)
  To: Maciej Gajdzica; +Cc: dev

On Tue, 26 May 2015 14:39:38 +0200
Maciej Gajdzica <maciejx.t.gajdzica@intel.com> wrote:

> +
>  /** Lookup table interface defining the lookup table operation */
>  struct rte_table_ops {
>  	rte_table_op_create f_create;       /**< Create */
> @@ -194,6 +218,7 @@ struct rte_table_ops {
>  	rte_table_op_entry_add f_add;       /**< Entry add */
>  	rte_table_op_entry_delete f_delete; /**< Entry delete */
>  	rte_table_op_lookup f_lookup;       /**< Lookup */
> +	rte_table_op_stats_read f_stats;	/**< Stats */
>  };

Another good idea, which is an ABI change.

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan
  @ 2015-05-26 14:55  3%     ` Stephen Hemminger
  2015-05-26 15:00  0%       ` Zhang, Helin
  2015-05-26 15:02  3%       ` Ananyev, Konstantin
  0 siblings, 2 replies; 200+ results
From: Stephen Hemminger @ 2015-05-26 14:55 UTC (permalink / raw)
  To: Helin Zhang; +Cc: dev

On Tue, 26 May 2015 16:36:37 +0800
Helin Zhang <helin.zhang@intel.com> wrote:

> Use the reserved 16 bits in rte_mbuf structure for the outer vlan,
> also add QinQ offloading flags for both RX and TX sides.
> 
> Signed-off-by: Helin Zhang <helin.zhang@intel.com>

Yet another change that is much needed, but breaks ABI compatibility.

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v8 01/11] eal/linux: add interrupt vectors support in intr_handle
       [not found]                     ` <40594e9e6e0543afa11e4dbd90e59b22@BRMWP-EXMB11.corp.brocade.com>
@ 2015-05-22 16:52  5%                   ` Stephen Hemminger
  2015-05-27 10:33  4%                     ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2015-05-22 16:52 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev, liang-min.wang

On Fri, 22 May 2015 00:05:36 +0000
Neil Horman <nhorman@tuxdriver.com> wrote:

> On Thu, May 21, 2015 at 11:14:00AM -0700, Stephen Hemminger wrote:
> > On Thu, 21 May 2015 13:58:46 -0400
> > Neil Horman <nhorman@tuxdriver.com> wrote:
> > 
> > > On Thu, May 21, 2015 at 10:43:00AM -0700, Stephen Hemminger wrote:
> > > > On Thu, 21 May 2015 06:32:02 -0400
> > > > Neil Horman <nhorman@tuxdriver.com> wrote:
> > > > 
> > > > > On Thu, May 21, 2015 at 04:55:53PM +0800, Cunming Liang wrote:
> > > > > > The patch adds interrupt vectors support in rte_intr_handle.
> > > > > > 'vec_en' is set when interrupt vectors are detected and associated event fds are set.
> > > > > > Those event fds are stored in efds[].
> > > > > > 'intr_vec' is reserved for device driver to initialize the vector mapping table.
> > > > > > When the event fds add to a specified epoll instance, 'elist' will hold the rte_epoll_event object pointer.
> > > > > > 
> > > > > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > > > ---
> > > > > > v7 changes:
> > > > > >  - add eptrs[], it's used to store the register rte_epoll_event instances.
> > > > > >  - add vec_en, to log the vector capability status.
> > > > > > 
> > > > > > v6 changes:
> > > > > >  - add mapping table between irq vector number and queue id.
> > > > > > 
> > > > > > v5 changes:
> > > > > >  - Create this new patch file for changed struct rte_intr_handle that
> > > > > >    other patches depend on, to avoid breaking git bisect.
> > > > > > 
> > > > > >  lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h | 10 ++++++++++
> > > > > >  1 file changed, 10 insertions(+)
> > > > > > 
> > > > > > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > > > index 6a159c7..27174df 100644
> > > > > > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > > > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > > > @@ -38,6 +38,8 @@
> > > > > >  #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> > > > > >  #define _RTE_LINUXAPP_INTERRUPTS_H_
> > > > > >  
> > > > > > +#define RTE_MAX_RXTX_INTR_VEC_ID     32
> > > > > > +
> > > > > >  enum rte_intr_handle_type {
> > > > > >  	RTE_INTR_HANDLE_UNKNOWN = 0,
> > > > > >  	RTE_INTR_HANDLE_UIO,      /**< uio device handle */
> > > > > > @@ -48,6 +50,8 @@ enum rte_intr_handle_type {
> > > > > >  	RTE_INTR_HANDLE_MAX
> > > > > >  };
> > > > > >  
> > > > > > +struct rte_epoll_event;
> > > > > > +
> > > > > >  /** Handle for interrupts. */
> > > > > >  struct rte_intr_handle {
> > > > > >  	union {
> > > > > > @@ -57,6 +61,12 @@ struct rte_intr_handle {
> > > > > >  	};
> > > > > >  	int fd;	 /**< interrupt event file descriptor */
> > > > > >  	enum rte_intr_handle_type type;  /**< handle type */
> > > > > > +	uint32_t max_intr;               /**< max interrupt requested */
> > > > > > +	uint32_t nb_efd;                 /**< number of available efds */
> > > > > > +	int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
> > > > > > +	struct rte_epoll_event *elist[RTE_MAX_RXTX_INTR_VEC_ID];
> > > > > > +					 /**< intr vector epoll event ptr */
> > > > > > +	int *intr_vec;                   /**< intr vector number array */
> > > > > >  };
> > > > > >    
> > > > > 
> > > > > This is going to be ABI breaking if this from test_interrupts.c:
> > > > > static struct rte_intr_handle intr_handles[TEST_INTERRUPT_HANDLE_MAX];
> > > > > 
> > > > > is a plausible way of using this structure.  Even putting the data at the end of
> > > > > the structure won't help, as the array indicies are off
> > > > 
> > > > This needs to go in 2.0 and 2.0 has to have new ABI anyway.
> > > > 
> > > We've already released 2.0, I think you mean 2.1, but 2.1 can't have a new ABI
> > > because we didn't announce it in 1.8.  The earliest we can update the ABI
> > > (according to the ABI docs) at this point is 2.2, since we need to announce the
> > > change in 2.1, then make it in 2.2
> > > 
> > > Neil
> > > 
> > 
> > Then just skip 2.1 (or make it a trivial doc change only dummy release),
> > and call it 2.2.
> > 
> > I guess we need to proactively say every .x release will have new ABI.
> > Sorry, this is a project under development.
> > 
> Sorry, NAK.  I didn't go through all the trouble of creating an ABI
> infrastructure just to throw it out the window on some rubber stamp.  We decided
> on the rules, we need to stick to them.  We have large projects that rely on
> DPDK now (OVS primarily), and we owe it to them to not just go completely throw
> out the ABI every release.  We have a process for doing it, lets follow it.
> 
> Neil
> 

I meant, that close and ship existing 2.1 code base early and open 2.2 early
to keep things rolling. But in general this project needs x.x.y releases
with ABI stability, and just admit that x.x releases will not have stable ABI.
That is reality now.

A lot of the ABI problem is that the code does not do a good job of hiding.
And also does not sepearte driver ABI from user ABI. There are things like
structure of PCI and interrupt handles that the user from library point
of view should not care about, but drivers will need to.

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v5 01/18] mbuf: redefine packet_type in rte_mbuf
  @ 2015-05-22 10:09  3%     ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-05-22 10:09 UTC (permalink / raw)
  To: Helin Zhang; +Cc: dev

On Fri, May 22, 2015 at 04:44:07PM +0800, Helin Zhang wrote:
> In order to unify the packet type, the field of 'packet_type' in
> 'struct rte_mbuf' needs to be extended from 16 to 32 bits.
> Accordingly, some fields in 'struct rte_mbuf' are re-organized to
> support this change for Vector PMD. As 'struct rte_kni_mbuf' for
> KNI should be right mapped to 'struct rte_mbuf', it should be
> modified accordingly. In addition, Vector PMD of ixgbe is disabled
> by default, as 'struct rte_mbuf' changed.
> 
> Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
>  config/common_linuxapp                             |  2 +-
>  .../linuxapp/eal/include/exec-env/rte_kni_common.h |  4 ++--
>  lib/librte_mbuf/rte_mbuf.h                         | 23 +++++++++++++++-------
>  3 files changed, 19 insertions(+), 10 deletions(-)
> 
> v2 changes:
> * Enlarged the packet_type field from 16 bits to 32 bits.
> * Redefined the packet type sub-fields.
> * Updated the 'struct rte_kni_mbuf' for KNI according to the mbuf changes.
> 
> v3 changes:
> * Put the mbuf layout changes into a single patch.
> * Disabled vector ixgbe PMD by default, as mbuf layout changed.
> 
> v5 changes:
> * Re-worded the commit logs.
> 
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 0078dc9..6b067c7 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -167,7 +167,7 @@ CONFIG_RTE_LIBRTE_IXGBE_DEBUG_TX_FREE=n
>  CONFIG_RTE_LIBRTE_IXGBE_DEBUG_DRIVER=n
>  CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n
>  CONFIG_RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC=y
> -CONFIG_RTE_IXGBE_INC_VECTOR=y
> +CONFIG_RTE_IXGBE_INC_VECTOR=n
>  CONFIG_RTE_IXGBE_RX_OLFLAGS_ENABLE=y
>  
>  #
> diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> index 1e55c2d..bd1cc09 100644
> --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> @@ -117,9 +117,9 @@ struct rte_kni_mbuf {
>  	uint16_t data_off;      /**< Start address of data in segment buffer. */
>  	char pad1[4];
>  	uint64_t ol_flags;      /**< Offload features. */
> -	char pad2[2];
> -	uint16_t data_len;      /**< Amount of data in segment buffer. */
> +	char pad2[4];
>  	uint32_t pkt_len;       /**< Total pkt len: sum of all segment data_len. */
> +	uint16_t data_len;      /**< Amount of data in segment buffer. */
>  
>  	/* fields on second cache line */
>  	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index ab6de67..c2b1463 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -269,17 +269,26 @@ struct rte_mbuf {
>  	/* remaining bytes are set on RX when pulling packet from descriptor */
>  	MARKER rx_descriptor_fields1;
>  
> -	/**
> -	 * The packet type, which is used to indicate ordinary packet and also
> -	 * tunneled packet format, i.e. each number is represented a type of
> -	 * packet.
> +	/*
> +	 * The packet type, which is the combination of outer/inner L2, L3, L4
> +	 * and tunnel types.
>  	 */
> -	uint16_t packet_type;
> +	union {
> +		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
> +		struct {
> +			uint32_t l2_type:4; /**< (Outer) L2 type. */
> +			uint32_t l3_type:4; /**< (Outer) L3 type. */
> +			uint32_t l4_type:4; /**< (Outer) L4 type. */
> +			uint32_t tun_type:4; /**< Tunnel type. */
> +			uint32_t inner_l2_type:4; /**< Inner L2 type. */
> +			uint32_t inner_l3_type:4; /**< Inner L3 type. */
> +			uint32_t inner_l4_type:4; /**< Inner L4 type. */
> +		};
> +	};
>  
> -	uint16_t data_len;        /**< Amount of data in segment buffer. */
>  	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
> +	uint16_t data_len;        /**< Amount of data in segment buffer. */
>  	uint16_t vlan_tci;        /**< VLAN Tag Control Identifier (CPU order) */
> -	uint16_t reserved;
>  	union {
>  		uint32_t rss;     /**< RSS hash result if RSS enabled */
>  		struct {


ABI Compatibility process?

Neil

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v5 18/18] mbuf: remove old packet type bit masks
  2015-05-22  8:44  2% ` [dpdk-dev] [PATCH v5 " Helin Zhang
  @ 2015-05-22  8:44  3%   ` Helin Zhang
  1 sibling, 0 replies; 200+ results
From: Helin Zhang @ 2015-05-22  8:44 UTC (permalink / raw)
  To: dev

As unified packet types are used instead, those old bit masks and
the relevant macros for packet type indication need to be removed.

Signed-off-by: Helin Zhang <helin.zhang@intel.com>
---
 lib/librte_mbuf/rte_mbuf.c | 6 ------
 lib/librte_mbuf/rte_mbuf.h | 6 ------
 2 files changed, 12 deletions(-)

v2 changes:
* Used redefined packet types and enlarged packet_type field in mbuf.
* Redefined the bit masks for packet RX offload flags.

v5 changes:
* Rolled back the bit masks of RX flags, for ABI compatibility.

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index f506517..78688f7 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -251,14 +251,8 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
 	/* case PKT_RX_HBUF_OVERFLOW: return "PKT_RX_HBUF_OVERFLOW"; */
 	/* case PKT_RX_RECIP_ERR: return "PKT_RX_RECIP_ERR"; */
 	/* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */
-	case PKT_RX_IPV4_HDR: return "PKT_RX_IPV4_HDR";
-	case PKT_RX_IPV4_HDR_EXT: return "PKT_RX_IPV4_HDR_EXT";
-	case PKT_RX_IPV6_HDR: return "PKT_RX_IPV6_HDR";
-	case PKT_RX_IPV6_HDR_EXT: return "PKT_RX_IPV6_HDR_EXT";
 	case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
 	case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
-	case PKT_RX_TUNNEL_IPV4_HDR: return "PKT_RX_TUNNEL_IPV4_HDR";
-	case PKT_RX_TUNNEL_IPV6_HDR: return "PKT_RX_TUNNEL_IPV6_HDR";
 	default: return NULL;
 	}
 }
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 6a26172..aea9ba8 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -91,14 +91,8 @@ extern "C" {
 #define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
 #define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
 #define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
-#define PKT_RX_IPV4_HDR      (1ULL << 5)  /**< RX packet with IPv4 header. */
-#define PKT_RX_IPV4_HDR_EXT  (1ULL << 6)  /**< RX packet with extended IPv4 header. */
-#define PKT_RX_IPV6_HDR      (1ULL << 7)  /**< RX packet with IPv6 header. */
-#define PKT_RX_IPV6_HDR_EXT  (1ULL << 8)  /**< RX packet with extended IPv6 header. */
 #define PKT_RX_IEEE1588_PTP  (1ULL << 9)  /**< RX IEEE1588 L2 Ethernet PT Packet. */
 #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
-#define PKT_RX_TUNNEL_IPV4_HDR (1ULL << 11) /**< RX tunnel packet with IPv4 header.*/
-#define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
 #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
 #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
 /* add new RX flags here */
-- 
1.9.3

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v5 00/18] unified packet type
  @ 2015-05-22  8:44  2% ` Helin Zhang
    2015-05-22  8:44  3%   ` [dpdk-dev] [PATCH v5 18/18] mbuf: remove old packet type bit masks Helin Zhang
  0 siblings, 2 replies; 200+ results
From: Helin Zhang @ 2015-05-22  8:44 UTC (permalink / raw)
  To: dev

Currently only 6 bits which are stored in ol_flags are used to indicate
the packet types. This is not enough, as some NIC hardware can recognize
quite a lot of packet types, e.g i40e hardware can recognize more than 150
packet types. Hiding those packet types hides hardware offload capabilities
which could be quite useful for improving performance and for end users. So
an unified packet types are needed to support all possible PMDs. A 16 bits
packet_type in mbuf structure can be changed to 32 bits and used for this
purpose. In addition, all packet types stored in ol_flag field should be
deleted at all, and 6 bits of ol_flags can be save as the benifit.

Initially, 32 bits of packet_type can be divided into several sub fields to
indicate different packet type information of a packet. The initial design
is to divide those bits into fields for L2 types, L3 types, L4 types, tunnel
types, inner L2 types, inner L3 types and inner L4 types. All PMDs should
translate the offloaded packet types into these 7 fields of information, for
user applications.

v2 changes:
* Enlarged the packet_type field from 16 bits to 32 bits.
* Redefined the packet type sub-fields.
* Updated the 'struct rte_kni_mbuf' for KNI according to the mbuf changes.
* Used redefined packet types and enlarged packet_type field for all PMDs
  and corresponding applications.
* Removed changes in bond and its relevant application, as there is no need
  at all according to the recent bond changes.

v3 changes:
* Put the mbuf layout changes into a single patch.
* Put vector ixgbe changes right after mbuf changes.
* Disabled vector ixgbe PMD by default, as mbuf layout changed, and then
  re-enabled it after vector ixgbe PMD updated.
* Put the definitions of unified packet type into a single patch.
* Minor bug fixes and enhancements in l3fwd example.

v4 changes:
* Added detailed description of each packet types.
* Supported unified packet type of fm10k.
* Added printing logs of packet types of each received packet for rxonly
  mode in testpmd.
* Removed several useless code lines which block packet type unification from
  app/test/packet_burst_generator.c.

v5 changes:
* Added more detailed description for each packet types, together with examples.
* Rolled back the macro definitions of RX packet flags, for ABI compitability.

Helin Zhang (18):
  mbuf: redefine packet_type in rte_mbuf
  ixgbe: support unified packet type in vectorized PMD
  mbuf: add definitions of unified packet types
  e1000: replace bit mask based packet type with unified packet type
  ixgbe: replace bit mask based packet type with unified packet type
  i40e: replace bit mask based packet type with unified packet type
  enic: replace bit mask based packet type with unified packet type
  vmxnet3: replace bit mask based packet type with unified packet type
  fm10k: replace bit mask based packet type with unified packet type
  app/test-pipeline: replace bit mask based packet type with unified
    packet type
  app/testpmd: replace bit mask based packet type with unified packet
    type
  app/test: Remove useless code
  examples/ip_fragmentation: replace bit mask based packet type with
    unified packet type
  examples/ip_reassembly: replace bit mask based packet type with
    unified packet type
  examples/l3fwd-acl: replace bit mask based packet type with unified
    packet type
  examples/l3fwd-power: replace bit mask based packet type with unified
    packet type
  examples/l3fwd: replace bit mask based packet type with unified packet
    type
  mbuf: remove old packet type bit masks

 app/test-pipeline/pipeline_hash.c                  |   7 +-
 app/test-pmd/csumonly.c                            |  10 +-
 app/test-pmd/rxonly.c                              | 178 ++++-
 app/test/packet_burst_generator.c                  |  10 -
 examples/ip_fragmentation/main.c                   |   7 +-
 examples/ip_reassembly/main.c                      |   7 +-
 examples/l3fwd-acl/main.c                          |  19 +-
 examples/l3fwd-power/main.c                        |   5 +-
 examples/l3fwd/main.c                              |  71 +-
 .../linuxapp/eal/include/exec-env/rte_kni_common.h |   4 +-
 lib/librte_mbuf/rte_mbuf.c                         |   6 -
 lib/librte_mbuf/rte_mbuf.h                         | 514 +++++++++++++-
 lib/librte_pmd_e1000/igb_rxtx.c                    |  98 ++-
 lib/librte_pmd_enic/enic_main.c                    |  14 +-
 lib/librte_pmd_fm10k/fm10k_rxtx.c                  |  30 +-
 lib/librte_pmd_i40e/i40e_rxtx.c                    | 786 ++++++++++++++-------
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c                  | 139 +++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c              |  49 +-
 lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c              |   4 +-
 19 files changed, 1498 insertions(+), 460 deletions(-)

-- 
1.9.3

^ permalink raw reply	[relevance 2%]

* Re: [dpdk-dev] [PATCH v8 01/11] eal/linux: add interrupt vectors support in intr_handle
       [not found]                   ` <20150521111400.2a04a196@urahara>
@ 2015-05-22  0:05  4%                 ` Neil Horman
       [not found]                     ` <40594e9e6e0543afa11e4dbd90e59b22@BRMWP-EXMB11.corp.brocade.com>
  1 sibling, 0 replies; 200+ results
From: Neil Horman @ 2015-05-22  0:05 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, liang-min.wang

On Thu, May 21, 2015 at 11:14:00AM -0700, Stephen Hemminger wrote:
> On Thu, 21 May 2015 13:58:46 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Thu, May 21, 2015 at 10:43:00AM -0700, Stephen Hemminger wrote:
> > > On Thu, 21 May 2015 06:32:02 -0400
> > > Neil Horman <nhorman@tuxdriver.com> wrote:
> > > 
> > > > On Thu, May 21, 2015 at 04:55:53PM +0800, Cunming Liang wrote:
> > > > > The patch adds interrupt vectors support in rte_intr_handle.
> > > > > 'vec_en' is set when interrupt vectors are detected and associated event fds are set.
> > > > > Those event fds are stored in efds[].
> > > > > 'intr_vec' is reserved for device driver to initialize the vector mapping table.
> > > > > When the event fds add to a specified epoll instance, 'elist' will hold the rte_epoll_event object pointer.
> > > > > 
> > > > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > > ---
> > > > > v7 changes:
> > > > >  - add eptrs[], it's used to store the register rte_epoll_event instances.
> > > > >  - add vec_en, to log the vector capability status.
> > > > > 
> > > > > v6 changes:
> > > > >  - add mapping table between irq vector number and queue id.
> > > > > 
> > > > > v5 changes:
> > > > >  - Create this new patch file for changed struct rte_intr_handle that
> > > > >    other patches depend on, to avoid breaking git bisect.
> > > > > 
> > > > >  lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h | 10 ++++++++++
> > > > >  1 file changed, 10 insertions(+)
> > > > > 
> > > > > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > > index 6a159c7..27174df 100644
> > > > > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > > @@ -38,6 +38,8 @@
> > > > >  #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> > > > >  #define _RTE_LINUXAPP_INTERRUPTS_H_
> > > > >  
> > > > > +#define RTE_MAX_RXTX_INTR_VEC_ID     32
> > > > > +
> > > > >  enum rte_intr_handle_type {
> > > > >  	RTE_INTR_HANDLE_UNKNOWN = 0,
> > > > >  	RTE_INTR_HANDLE_UIO,      /**< uio device handle */
> > > > > @@ -48,6 +50,8 @@ enum rte_intr_handle_type {
> > > > >  	RTE_INTR_HANDLE_MAX
> > > > >  };
> > > > >  
> > > > > +struct rte_epoll_event;
> > > > > +
> > > > >  /** Handle for interrupts. */
> > > > >  struct rte_intr_handle {
> > > > >  	union {
> > > > > @@ -57,6 +61,12 @@ struct rte_intr_handle {
> > > > >  	};
> > > > >  	int fd;	 /**< interrupt event file descriptor */
> > > > >  	enum rte_intr_handle_type type;  /**< handle type */
> > > > > +	uint32_t max_intr;               /**< max interrupt requested */
> > > > > +	uint32_t nb_efd;                 /**< number of available efds */
> > > > > +	int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
> > > > > +	struct rte_epoll_event *elist[RTE_MAX_RXTX_INTR_VEC_ID];
> > > > > +					 /**< intr vector epoll event ptr */
> > > > > +	int *intr_vec;                   /**< intr vector number array */
> > > > >  };
> > > > >    
> > > > 
> > > > This is going to be ABI breaking if this from test_interrupts.c:
> > > > static struct rte_intr_handle intr_handles[TEST_INTERRUPT_HANDLE_MAX];
> > > > 
> > > > is a plausible way of using this structure.  Even putting the data at the end of
> > > > the structure won't help, as the array indicies are off
> > > 
> > > This needs to go in 2.0 and 2.0 has to have new ABI anyway.
> > > 
> > We've already released 2.0, I think you mean 2.1, but 2.1 can't have a new ABI
> > because we didn't announce it in 1.8.  The earliest we can update the ABI
> > (according to the ABI docs) at this point is 2.2, since we need to announce the
> > change in 2.1, then make it in 2.2
> > 
> > Neil
> > 
> 
> Then just skip 2.1 (or make it a trivial doc change only dummy release),
> and call it 2.2.
> 
> I guess we need to proactively say every .x release will have new ABI.
> Sorry, this is a project under development.
> 
Sorry, NAK.  I didn't go through all the trouble of creating an ABI
infrastructure just to throw it out the window on some rubber stamp.  We decided
on the rules, we need to stick to them.  We have large projects that rely on
DPDK now (OVS primarily), and we owe it to them to not just go completely throw
out the ABI every release.  We have a process for doing it, lets follow it.

Neil

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH v8 01/11] eal/linux: add interrupt vectors support in intr_handle
  2015-05-21 17:58  4%             ` Neil Horman
@ 2015-05-21 18:21  3%               ` Stephen Hemminger
       [not found]                   ` <20150521111400.2a04a196@urahara>
  1 sibling, 0 replies; 200+ results
From: Stephen Hemminger @ 2015-05-21 18:21 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev, liang-min.wang

On Thu, 21 May 2015 13:58:46 -0400
Neil Horman <nhorman@tuxdriver.com> wrote:

> On Thu, May 21, 2015 at 10:43:00AM -0700, Stephen Hemminger wrote:
> > On Thu, 21 May 2015 06:32:02 -0400
> > Neil Horman <nhorman@tuxdriver.com> wrote:
> > 
> > > On Thu, May 21, 2015 at 04:55:53PM +0800, Cunming Liang wrote:
> > > > The patch adds interrupt vectors support in rte_intr_handle.
> > > > 'vec_en' is set when interrupt vectors are detected and associated event fds are set.
> > > > Those event fds are stored in efds[].
> > > > 'intr_vec' is reserved for device driver to initialize the vector mapping table.
> > > > When the event fds add to a specified epoll instance, 'elist' will hold the rte_epoll_event object pointer.
> > > > 
> > > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > ---
> > > > v7 changes:
> > > >  - add eptrs[], it's used to store the register rte_epoll_event instances.
> > > >  - add vec_en, to log the vector capability status.
> > > > 
> > > > v6 changes:
> > > >  - add mapping table between irq vector number and queue id.
> > > > 
> > > > v5 changes:
> > > >  - Create this new patch file for changed struct rte_intr_handle that
> > > >    other patches depend on, to avoid breaking git bisect.
> > > > 
> > > >  lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h | 10 ++++++++++
> > > >  1 file changed, 10 insertions(+)
> > > > 
> > > > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > index 6a159c7..27174df 100644
> > > > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > > @@ -38,6 +38,8 @@
> > > >  #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> > > >  #define _RTE_LINUXAPP_INTERRUPTS_H_
> > > >  
> > > > +#define RTE_MAX_RXTX_INTR_VEC_ID     32
> > > > +
> > > >  enum rte_intr_handle_type {
> > > >  	RTE_INTR_HANDLE_UNKNOWN = 0,
> > > >  	RTE_INTR_HANDLE_UIO,      /**< uio device handle */
> > > > @@ -48,6 +50,8 @@ enum rte_intr_handle_type {
> > > >  	RTE_INTR_HANDLE_MAX
> > > >  };
> > > >  
> > > > +struct rte_epoll_event;
> > > > +
> > > >  /** Handle for interrupts. */
> > > >  struct rte_intr_handle {
> > > >  	union {
> > > > @@ -57,6 +61,12 @@ struct rte_intr_handle {
> > > >  	};
> > > >  	int fd;	 /**< interrupt event file descriptor */
> > > >  	enum rte_intr_handle_type type;  /**< handle type */
> > > > +	uint32_t max_intr;               /**< max interrupt requested */
> > > > +	uint32_t nb_efd;                 /**< number of available efds */
> > > > +	int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
> > > > +	struct rte_epoll_event *elist[RTE_MAX_RXTX_INTR_VEC_ID];
> > > > +					 /**< intr vector epoll event ptr */
> > > > +	int *intr_vec;                   /**< intr vector number array */
> > > >  };
> > > >    
> > > 
> > > This is going to be ABI breaking if this from test_interrupts.c:
> > > static struct rte_intr_handle intr_handles[TEST_INTERRUPT_HANDLE_MAX];
> > > 
> > > is a plausible way of using this structure.  Even putting the data at the end of
> > > the structure won't help, as the array indicies are off
> > 
> > This needs to go in 2.0 and 2.0 has to have new ABI anyway.
> > 
> We've already released 2.0, I think you mean 2.1, but 2.1 can't have a new ABI
> because we didn't announce it in 1.8.  The earliest we can update the ABI
> (according to the ABI docs) at this point is 2.2, since we need to announce the
> change in 2.1, then make it in 2.2
> 
> Neil
> 

Then just skip 2.1 (or make it a trivial doc change only dummy release),
and call it 2.2.

I guess we need to proactively say every .x release will have new ABI.
Sorry, this is a project under development.

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v8 01/11] eal/linux: add interrupt vectors support in intr_handle
       [not found]               ` <20150521104300.00757b4e@urahara>
@ 2015-05-21 17:58  4%             ` Neil Horman
  2015-05-21 18:21  3%               ` Stephen Hemminger
       [not found]                   ` <20150521111400.2a04a196@urahara>
  0 siblings, 2 replies; 200+ results
From: Neil Horman @ 2015-05-21 17:58 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, liang-min.wang

On Thu, May 21, 2015 at 10:43:00AM -0700, Stephen Hemminger wrote:
> On Thu, 21 May 2015 06:32:02 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Thu, May 21, 2015 at 04:55:53PM +0800, Cunming Liang wrote:
> > > The patch adds interrupt vectors support in rte_intr_handle.
> > > 'vec_en' is set when interrupt vectors are detected and associated event fds are set.
> > > Those event fds are stored in efds[].
> > > 'intr_vec' is reserved for device driver to initialize the vector mapping table.
> > > When the event fds add to a specified epoll instance, 'elist' will hold the rte_epoll_event object pointer.
> > > 
> > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > ---
> > > v7 changes:
> > >  - add eptrs[], it's used to store the register rte_epoll_event instances.
> > >  - add vec_en, to log the vector capability status.
> > > 
> > > v6 changes:
> > >  - add mapping table between irq vector number and queue id.
> > > 
> > > v5 changes:
> > >  - Create this new patch file for changed struct rte_intr_handle that
> > >    other patches depend on, to avoid breaking git bisect.
> > > 
> > >  lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h | 10 ++++++++++
> > >  1 file changed, 10 insertions(+)
> > > 
> > > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > index 6a159c7..27174df 100644
> > > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > @@ -38,6 +38,8 @@
> > >  #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> > >  #define _RTE_LINUXAPP_INTERRUPTS_H_
> > >  
> > > +#define RTE_MAX_RXTX_INTR_VEC_ID     32
> > > +
> > >  enum rte_intr_handle_type {
> > >  	RTE_INTR_HANDLE_UNKNOWN = 0,
> > >  	RTE_INTR_HANDLE_UIO,      /**< uio device handle */
> > > @@ -48,6 +50,8 @@ enum rte_intr_handle_type {
> > >  	RTE_INTR_HANDLE_MAX
> > >  };
> > >  
> > > +struct rte_epoll_event;
> > > +
> > >  /** Handle for interrupts. */
> > >  struct rte_intr_handle {
> > >  	union {
> > > @@ -57,6 +61,12 @@ struct rte_intr_handle {
> > >  	};
> > >  	int fd;	 /**< interrupt event file descriptor */
> > >  	enum rte_intr_handle_type type;  /**< handle type */
> > > +	uint32_t max_intr;               /**< max interrupt requested */
> > > +	uint32_t nb_efd;                 /**< number of available efds */
> > > +	int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
> > > +	struct rte_epoll_event *elist[RTE_MAX_RXTX_INTR_VEC_ID];
> > > +					 /**< intr vector epoll event ptr */
> > > +	int *intr_vec;                   /**< intr vector number array */
> > >  };
> > >    
> > 
> > This is going to be ABI breaking if this from test_interrupts.c:
> > static struct rte_intr_handle intr_handles[TEST_INTERRUPT_HANDLE_MAX];
> > 
> > is a plausible way of using this structure.  Even putting the data at the end of
> > the structure won't help, as the array indicies are off
> 
> This needs to go in 2.0 and 2.0 has to have new ABI anyway.
> 
We've already released 2.0, I think you mean 2.1, but 2.1 can't have a new ABI
because we didn't announce it in 1.8.  The earliest we can update the ABI
(according to the ABI docs) at this point is 2.2, since we need to announce the
change in 2.1, then make it in 2.2

Neil

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [RFC PATCHv2 0/2] pktdev as wrapper type
  @ 2015-05-21 12:12  3%             ` Richardson, Bruce
  0 siblings, 0 replies; 200+ results
From: Richardson, Bruce @ 2015-05-21 12:12 UTC (permalink / raw)
  To: Neil Horman, Marc Sune; +Cc: dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Neil Horman
> Sent: Wednesday, May 20, 2015 7:47 PM
> To: Marc Sune
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCHv2 0/2] pktdev as wrapper type
> 
> On Wed, May 20, 2015 at 07:01:02PM +0200, Marc Sune wrote:
> >
> >
> > On 20/05/15 12:28, Neil Horman wrote:
> > >On Wed, May 20, 2015 at 12:05:00PM +0200, Marc Sune wrote:
> > >>
> > >>On 20/05/15 10:31, Thomas Monjalon wrote:
> > >>>2015-05-19 12:31, Bruce Richardson:
> > >>>>On Mon, May 11, 2015 at 05:29:39PM +0100, Bruce Richardson wrote:
> > >>>>>Hi all,
> > >>>>>
> > >>>>>after a small amount of offline discussion with Marc Sune, here
> > >>>>>is an alternative proposal for a higher-level interface - aka
> > >>>>>pktdev - to allow a common Rx/Tx API across device types handling
> > >>>>>mbufs [for now, ethdev, ring and KNI]. The key code is in the
> > >>>>>first patch fo the set - the second is an example of a trivial
> usecase.
> > >>>>>
> > >>>>>What is different about this to previously:
> > >>>>>* wrapper class, so no changes to any existing ring, ethdev
> > >>>>>implementations
> > >>>>>* use of function pointers for RX/TX with an API that maps to
> ethdev
> > >>>>>   - this means there is little/no additional overhead for ethdev
> calls
> > >>>>>   - inline special case for rings, to accelerate that. Since we
> are at a
> > >>>>>     higher level, we can special case process some things if
> appropriate. This
> > >>>>>     means the impact to ring ops is one (predictable) branch per
> > >>>>>burst
> > >>>>>* elimination of the queue abstraction. For the ring and KNI, there
> is no
> > >>>>>   concept of queues, so we just wrap the functions directly (no
> need even for
> > >>>>>   wrapper functions, the api's match so we can call directly).
> This also
> > >>>>>   means:
> > >>>>>   - adding in features per-queue, is far easier as we don't need
> to worry about
> > >>>>>     having arrays of multiple queues. For example:
> > >>>>>   - adding in buffering on TX (or RX) is easier since again we
> only have a
> > >>>>>     single queue.
> > >>>>>* thread safety is made easier using a wrapper. For a MP ring, we
> can create
> > >>>>>   multiple pktdevs around it, and each thread will then be able to
> use their
> > >>>>>   own copy, with their own buffering etc.
> > >>>>>
> > >>>>>However, at this point, I'm just looking for general feedback on
> > >>>>>this as an approach. I think it's quite flexible - even more so
> > >>>>>than the earlier proposal we had. It's less proscriptive and
> doesn't make any demands on any other libs.
> > >>>>>
> > >>>>>Comments/thoughts welcome.
> > >>>>Any comments on this RFC before I see about investing further time
> > >>>>in it to clean it up a bit and submit as a non-RFC patchset for
> merge in 2.1?
> > >>>I would say there are 2 possible approaches for KNI and ring
> handling:
> > >>>1/ You Bruce, Marc and Keith are advocating for a layer on top of
> > >>>ethdev, ring, KNI and possibly other devices, which uses mbuf. The
> > >>>set of functions is simpler than ethdev but the data structure is
> > >>>mbuf which is related to ethdev layer.
> > >>>2/ Konstantin and Neil talked about keeping mbuf for ethdev layer
> > >>>and related libs only. Ring and KNI could have an ethdev API with a
> > >>>reduced set of implemented functions. Crypto devices could adopt a
> > >>>specific crypto API and an ethdev API at the same time.
> > >>I don't fully understand which APIs you meant by non-ethdev. This
> > >>pktdev wrapper proposal abstracts RX and TX functions only, and all
> > >>of these are using mbufs as the packet buffer abstraction right now
> anyway (ethdev).
> > >>
> > >He's referring to future device classes (like crypto devices), which
> > >ostensibly would make use of the pktdev API.  My argument (and I
> > >think Thomas') is that if a bit of hardware can be made to operate as
> > >a packet sending/receiving device, then its just as reasonable to use
> > >the existing ethdev api rather than some other restricted version of
> > >it (pktdev)
> > >
> > >>This approach does not preclude that different libraries expose
> > >>other API calls. In fact they will have to; setup the port/device
> > >>... It is just a higher level API, so that you don't have to check
> > >>the type of port in your DPDK application I/O loop, minimizing user's
> code.
> > >>
> > >No argument there.  But if thats the case (and I agree that it is),
> > >an application will implicitly have to know what what type of device
> > >it is, because it (the application) will need to understand the
> specific API it is writing to.
> > >
> > >>Or were you in 2) thinking about creating a different "packet buffer"
> > >>abstraction, independent from the ethdev, and then map the different
> > >>port specifics (e.g. mbuf) to this new abstraction?
> > >>
> > >My argument was to just leave the ethdev api alone.  If a device
> > >class can be made to look like a packet forwarding device, then use
> > >the existing ethdev api to implement it.
> > >
> > >>>I feel it's cleaner, more generic and more maintainable to have
> > >>>drivers implementing one or several stable APIs instead of having
> > >>>some restricted wrappers to update.
> > >>This would be a separate library _on top_ of the existing APIs, and
> > >>it has the advantage to simplify the DPDK user's application code
> > >>when an application needs to deal with several types of port, as
> > >>shown in the example that Bruce provided in PATCH #2.
> > >>
> > >But thats already the purpose of the ethdev api.  Different types of
> > >hardware/software can be made to look like the same thing (an ethdev)
> > >from an application standpoint.  Adding this pktdev layer does
> > >nothing but that, add a layer.  If you want restricted functionality
> > >of an interface, thats ok, ethdev offers that ability.  unimplemented
> > >methods in a pmd cause the ethdev api to return EOPNOTSUP to the
> > >calling application, so the application knows when a given ethdev can't
> do some aspect of what an ethdev is.
> >
> > Hi Neil,
> >
> > Thanks for the clarifications. Now I understand the concern Thomas
> > expressed. Using ethdev API (port-ids) was actually my first
> > suggestion
> > here:
> >
> > http://permalink.gmane.org/gmane.comp.networking.dpdk.devel/13545
> >
> > And to be honest, what I was expecting when I was reading for the
> > first time DPDK's APIs. It is indeed an option. However, if we take a
> look at the API:
> >
> > http://www.dpdk.org/doc/api/rte__ethdev_8h.html
> >
> > none of the API calls, except the burst RX/TX and, perhaps, the
> > callbacks, would be used by devices other than NICs. It seems going a
> > bit too far using it, but ofc possible.
> >
> So, I'll make 3 counter-arguments here:
> 
> 1) To your point about the ethdev api being much larger than what a non-
> ethernet device could use, I'll tacitly agree, but indicate that its not
> relevant.  If you want a bit of hardware that isn't a network interface to
> behave like a network interface, then there are going to be alot of
> aspects of a network interface that it just can't do.  Thats true
> regardless of how you implement that.  In the pktdev model, you prevent
> those operations from being an option at all, while in the current ethdev
> model, you simply get a return code of EOPNOTSUP, and the application does
> the right thing (which is to say, it understands that this hardware
> doesn't need that aspect of network card mangement and goes on with its
> day).  I assert that, because we already have the ethdev api, its a lower
> time investment to simply reuse it
> 
> 2) To the implication that we aren't working with NICs here, you're
> correct.  As you note in your previous message, the pktdev interface is in
> no way the end all and be all of device model design.  You will need to
> add other api calls to manage the device.  If thats the case, then don't
> shoehorn any one particular aspect of the API to fit a device model that
> the device doesn't conform to.
> Design the API so that it best reflects the hardware behavior.
> 
> 
> 3) An addendum to the point about hardware not being a NIC (and you didn't
> make this point directly above, but I think you may have mentioned it
> previously), sometimes you want a device to behave like another device for
> the purposes of using generic code to talk to several device types.  While
> this is true, this is a case for device translation and use, not for
> carving out parts of an api to make something more generic.  The use case
> I cited previously was an ipsec tunnel.  An ipsec tunnel uses
> cryptography, and crypto device apis to encrypt decrypt packet data.  The
> common way to implement this is to design a crypto api that accepts a
> block of data in a way most condusive to the hardware, and then implement
> a network driver (that uses whatever ethernet api, in this case the ethdev
> api), to integrate with the network datapath.  With this model, the ipsec
> tunnel uses the full range of the ethdev api (or a good deal more of it),
> and the crypto api is optimized to work with crypo acceleration hardware.
> 
> > In essence, rte_ether(rte_ethdev.h) right now has: i) NIC setup;
> > general configuration, queue config, fdir, offloads, hw stuff like
> > leds... ii) RX/TX routines and callbacks iii) Stats and queue stats
> > iv) other utils for ethernet stuff (rte_ether.h)
> >
> The key that I'm taking away here is 'right now'.  Its already written, so
> theres no work involved in implementing it for new devices.
> 
> > i) is clearly HW specific, and does only apply to NICs/ASICs (e.g.
> > FM10k)
> Ok, so it only applies to NIC's, thats fine.  If you want to write a
> driver that leaves those methods for the pmd set to NULL, the ethdev
> library will correctly return EOPNOTSUPP to the calling applications.
> 
> > while ii) and iii) are things that could be abstracted beyond NICs,
> > like KNI, rte_ring, crypto... (iv could be moved into some
> > utils/protocol parsing libraries).
> >
> Right again, so let those device types implement the appropriate portions
> of the pmd driver structure that match to what they support.  EVerything
> else is handled by the ethdev library automatically.
> 
> > Perhaps these two groups could be split into two different libraries
> > and then ii) and iii) together would be something like ~ rte_pktdev
> > (stats are missing on the proposed patch), while i) would be
> > rte_ether, or rte_nic if we think it is a better name.
> >
> The point I'm trying to get to is, why split at all?  Theres just no need
> that I can see. The example I would set here is the dummy driver in linux.
> Its a net device that only serves to act as a sink for network packets.
> It still uses the network driver interface, but of the 65-ish methods that
> the netdevice model in linux offers, it implements 8 (or approximately
> 12%).  The other unused method are just that, unused, and thats ok.
> Applications that try to do things like set flow director options, or
> speed/duplex options gets a return code that effectively says "This device
> can't do that", and thats ok.  Thats what we need to be doing here.
> Instead of finding a way to codify the subset of functionality that other
> devices might be able to implement, for those cases where we want other
> hardware to act like a netdevice, lets just let those devices pick and
> choose what to implement, and the interface we already have will
> communicate with applications appropriately.
> 
> Regards
> Neil
> 

Hi Neil,

First off, a note on the naming and the basic concept: this proposal is not trying to make everything look like NIC, rather we are trying to make a bunch of different components appear as generic sources/sinks for pkts or mbufs. From my point of view, it's an important difference.

Be that as it may, I'd like to first deal with the whole idea of the application needing to know about the type of the underlying device. For me, this is a critical point. Applications - such as all our sample apps - have essentially two parts:
* an initialization and control part
* a data-path part.
These two parts are very, very different in what they do. The initialization part - which e.g. in testpmd continues on in the form of the cmdline interface as a control part - does the initial setup of devices/rings/etc. and potentially makes use of the full APIs provided by the ethdev interface. It's also not performance critical, as evidenced by the fact that the APIs used there have additional checks for valid input etc. The second, data-path part, is entirely the part that this proposal is targeting. This data-path is completely separate in the application, is highly performance sensitive, and rarely, if ever needs to know or care about the actual source of its data. So the idea behind this library is that you can write your initialization control parts of your app as-now, fully aware of the underlying types involved, and without ever using rx/tx burst. Then when you have the various devices and DPDK objects set up, you spawn your data-path threads and pass each one the set of input and outputs it needs, in the form of your generic packet source objects.

This distinction is also why I'm not particularly interested in the ability to pass in different objects via cmdline, as is done now with pcap/ring PMDs. That's ok when you want the initialization part of your app to be oblivious to see everything under a common abstraction, but when it's only the data path you want to work with generic packet objects that's unnecessary, and the initialization path should be able to convert any of the required input/output sources to a generic type using a single API call. [This doesn't rule out specifying different inputs/outputs on the commandline, it's just you can specify them as their native types, rather than hiding them under a common API at the control-path level].

As for what that abstraction should be. There are a number of issues I see with ethdev - as it is right now, as that common abstraction.

1. The use of port-ids. I think port ids are fine for numbering physical ports, but I think pointers are better for passing around objects to be worked on by the data path. What is more concerning [than my opinion on numbers vs pointers :-)] is the fact that we are limited to 256 port ids. Yes, that can be changed, but the impacts are massive. To change the type, we would break the ABI for every single ethdev API, as well as likely other functions too. Furthermore, increasing the size of the port id would require a change to the internals of the mbuf structure, which would lead to the ABI being broken for any function that uses mbufs. By adopting an API, such as proposed, which uses pointers, we avoid the problem, as port ids would only apply to ethdevs.

2. Simplicity. While you say that its fine for an ethdev not to implement all the functions in the ethdev API, to create a proper PMD like you are proposing involves a good deal more work than using the proposed pktdev abstraction. If it's to appear like a proper NIC to the control paths, as well as the init paths - which seems to be what you imply - you really do need to implement additional functions like queue setup, and start and stop. While it's true that the library can return -ENOSUP on an unsupported function, I don't believe any of our sample apps are set up to check for this on NIC setup, and therefore I would hazard a guess that real-world customer apps aren't set up to handle it either.

3. Performance for rings. While not applicable for all cases, the performance of the rings under an ethdev abstraction would not be the same as here. For example, when polling on an empty ring for packets, the current time taken by our ring rx/tx functions, is literally a few cycles (as tested by the rings autotest). If these functions cannot be inlined, that cycle count goes up to 3x what it is now. [I observed this previously when doing reworking of the rings code, and the code-size led to icc no longer doing inlining. In that case, the gcc code for empty polling was indeed 3 times faster than the icc version. Adding forced inlining made things equal again]. This metric of empty polling may seem trivial i.e. "if there are no packets, why does it matter how long it takes?", but is important in real-world cases where you are pulling packets from multiple sources, and your application is only currently dealing with input on one of them. [Often tested to see how an application handles in a single-flow situation - an metric our customers do look at]. Even in the non-empty situation, for smaller packet bursts, the overhead of the function call may slow things down. [For larger bursts, e.g. 32, the effect should not be noticeable, I suspect].

The only other final point I'd make here is that what is proposed is not proscriptive - whatever a future API for handling other device types, such as crypto devices, may look like can be decided separately from this pktdev implementation. Whether one chooses pktdev or ethdev as a common abstraction layer type, the decision of whether or not a particular object type is allowed to be made look like that common type can be made entirely independently, and based upon whether or not such a type-conversion makes sense.

Regards,
/Bruce

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v8 01/11] eal/linux: add interrupt vectors support in intr_handle
  @ 2015-05-21 10:32  3%         ` Neil Horman
       [not found]               ` <20150521104300.00757b4e@urahara>
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-05-21 10:32 UTC (permalink / raw)
  To: Cunming Liang; +Cc: dev, liang-min.wang, shemming

On Thu, May 21, 2015 at 04:55:53PM +0800, Cunming Liang wrote:
> The patch adds interrupt vectors support in rte_intr_handle.
> 'vec_en' is set when interrupt vectors are detected and associated event fds are set.
> Those event fds are stored in efds[].
> 'intr_vec' is reserved for device driver to initialize the vector mapping table.
> When the event fds add to a specified epoll instance, 'elist' will hold the rte_epoll_event object pointer.
> 
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
> v7 changes:
>  - add eptrs[], it's used to store the register rte_epoll_event instances.
>  - add vec_en, to log the vector capability status.
> 
> v6 changes:
>  - add mapping table between irq vector number and queue id.
> 
> v5 changes:
>  - Create this new patch file for changed struct rte_intr_handle that
>    other patches depend on, to avoid breaking git bisect.
> 
>  lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> index 6a159c7..27174df 100644
> --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> @@ -38,6 +38,8 @@
>  #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
>  #define _RTE_LINUXAPP_INTERRUPTS_H_
>  
> +#define RTE_MAX_RXTX_INTR_VEC_ID     32
> +
>  enum rte_intr_handle_type {
>  	RTE_INTR_HANDLE_UNKNOWN = 0,
>  	RTE_INTR_HANDLE_UIO,      /**< uio device handle */
> @@ -48,6 +50,8 @@ enum rte_intr_handle_type {
>  	RTE_INTR_HANDLE_MAX
>  };
>  
> +struct rte_epoll_event;
> +
>  /** Handle for interrupts. */
>  struct rte_intr_handle {
>  	union {
> @@ -57,6 +61,12 @@ struct rte_intr_handle {
>  	};
>  	int fd;	 /**< interrupt event file descriptor */
>  	enum rte_intr_handle_type type;  /**< handle type */
> +	uint32_t max_intr;               /**< max interrupt requested */
> +	uint32_t nb_efd;                 /**< number of available efds */
> +	int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
> +	struct rte_epoll_event *elist[RTE_MAX_RXTX_INTR_VEC_ID];
> +					 /**< intr vector epoll event ptr */
> +	int *intr_vec;                   /**< intr vector number array */
>  };
>  

This is going to be ABI breaking if this from test_interrupts.c:
static struct rte_intr_handle intr_handles[TEST_INTERRUPT_HANDLE_MAX];

is a plausible way of using this structure.  Even putting the data at the end of
the structure won't help, as the array indicies are off
Neil

>  #endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
> -- 
> 1.8.1.4
> 
> 

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v8 08/11] ethdev: add rx intr enable, disable and ctl functions
  2015-05-21  8:55  2%     ` [dpdk-dev] [PATCH v8 00/11] Interrupt mode PMD Cunming Liang
  @ 2015-05-21  8:56  2%       ` Cunming Liang
  2015-05-29  8:45  4%       ` [dpdk-dev] [PATCH v9 00/12] Interrupt mode PMD Cunming Liang
  2 siblings, 0 replies; 200+ results
From: Cunming Liang @ 2015-05-21  8:56 UTC (permalink / raw)
  To: dev; +Cc: shemming, liang-min.wang

The patch adds two dev_ops functions to enable and disable rx queue interrupts.
In addtion, it adds rte_eth_dev_rx_intr_ctl/rx_intr_q to support per port or per queue rx intr event set.

Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
v8 changes
 - add addtion check for EEXIT

v7 changes
 - remove rx_intr_vec_get
 - add rx_intr_ctl and rx_intr_ctl_q

v6 changes
 - add rx_intr_vec_get to retrieve the vector num of the queue.

v5 changes
 - Rebase the patchset onto the HEAD

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Put new functions at the end of eth_dev_ops to avoid breaking ABI

v3 changes
 - Add return value for interrupt enable/disable functions

 lib/librte_ether/rte_ethdev.c          | 127 +++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.h          | 104 +++++++++++++++++++++++++++
 lib/librte_ether/rte_ether_version.map |   4 ++
 3 files changed, 235 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 024fe8b..1a47d9a 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -3281,6 +3281,133 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	}
 	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
+
+int
+rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
+{
+	uint32_t vec;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+	uint16_t qid;
+	int rc;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	intr_handle = &dev->pci_dev->intr_handle;
+	if (!intr_handle->intr_vec) {
+		PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		return -EPERM;
+	}
+
+	for (qid = 0; qid < dev->data->nb_rx_queues; qid++) {
+		vec = intr_handle->intr_vec[qid];
+		rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec,
+				     data, rte_eth_dev_socket_id(port_id));
+		if (rc && rc != -EEXIST) {
+			PMD_DEBUG_TRACE("p %d q %d rx ctl error"
+					" op %d epfd %d vec %u\n",
+					port_id, qid, op, epfd, vec);
+		}
+	}
+
+	return 0;
+}
+
+int
+rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+			  int epfd, int op, void *data)
+{
+	uint32_t vec;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+	int rc;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	if (queue_id >= dev->data->nb_rx_queues) {
+		PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		return -EINVAL;
+	}
+
+	intr_handle = &dev->pci_dev->intr_handle;
+	if (!intr_handle->intr_vec) {
+		PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		return -EPERM;
+	}
+
+	vec = intr_handle->intr_vec[queue_id];
+	rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec,
+			     data, rte_eth_dev_socket_id(port_id));
+	if (rc && rc != -EEXIST) {
+		PMD_DEBUG_TRACE("p %d q %d rx ctl error"
+				" op %d epfd %d vec %u\n",
+				port_id, queue_id, op, epfd, vec);
+		return rc;
+	}
+
+	return 0;
+}
+
+int
+rte_eth_dev_rx_intr_enable(uint8_t port_id,
+			   uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+}
+
+int
+rte_eth_dev_rx_intr_disable(uint8_t port_id,
+			    uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
+}
+
 #ifdef RTE_NIC_BYPASS
 int rte_eth_dev_bypass_init(uint8_t port_id)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 4648290..e5efec0 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -829,6 +829,8 @@ struct rte_eth_fdir {
 struct rte_intr_conf {
 	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
 	uint16_t lsc;
+	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
+	uint16_t rxq;
 };
 
 /**
@@ -1034,6 +1036,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    const struct rte_eth_txconf *tx_conf);
 /**< @internal Setup a transmit queue of an Ethernet device. */
 
+typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
+
 typedef void (*eth_queue_release_t)(void *queue);
 /**< @internal Release memory resources allocated by given RX/TX queue. */
 
@@ -1385,6 +1395,10 @@ struct eth_dev_ops {
 	/** Get current RSS hash configuration. */
 	rss_hash_conf_get_t rss_hash_conf_get;
 	eth_filter_ctrl_t              filter_ctrl;          /**< common filter control*/
+
+	/** Enable/disable Rx queue interrupt. */
+	eth_rx_enable_intr_t       rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
+	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
 };
 
 /**
@@ -2867,6 +2881,96 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 				enum rte_eth_event_type event);
 
 /**
+ * When there is no rx packet coming in Rx Queue for a long time, we can
+ * sleep lcore related to RX Queue for power saving, and enable rx interrupt
+ * to be triggered when rx packect arrives.
+ *
+ * The rte_eth_dev_rx_intr_enable() function enables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_enable(uint8_t port_id,
+			       uint16_t queue_id);
+
+/**
+ * When lcore wakes up from rx interrupt indicating packet coming, disable rx
+ * interrupt and returns to polling mode.
+ *
+ * The rte_eth_dev_rx_intr_disable() function disables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_disable(uint8_t port_id,
+				uint16_t queue_id);
+
+/**
+ * RX Interrupt control per port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ *   Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data);
+
+/**
+ * RX Interrupt control per queue.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ *   Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+			  int epfd, int op, void *data);
+
+/**
  * Turn on the LED on the Ethernet device.
  * This function turns on the LED on the Ethernet device.
  *
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index a2d25a6..2799b99 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -48,6 +48,10 @@ DPDK_2.0 {
 	rte_eth_dev_rss_hash_update;
 	rte_eth_dev_rss_reta_query;
 	rte_eth_dev_rss_reta_update;
+	rte_eth_dev_rx_intr_ctl;
+	rte_eth_dev_rx_intr_ctl_q;
+	rte_eth_dev_rx_intr_disable;
+	rte_eth_dev_rx_intr_enable;
 	rte_eth_dev_rx_queue_start;
 	rte_eth_dev_rx_queue_stop;
 	rte_eth_dev_set_link_down;
-- 
1.8.1.4

^ permalink raw reply	[relevance 2%]

* [dpdk-dev] [PATCH v8 00/11] Interrupt mode PMD
  2015-05-05  5:39  3%   ` [dpdk-dev] From: Cunming Liang <cunming.liang@intel.com> Cunming Liang
  2015-05-05  5:39  2%     ` [dpdk-dev] [PATCH v7 07/10] ethdev: add rx intr enable, disable and ctl functions Cunming Liang
@ 2015-05-21  8:55  2%     ` Cunming Liang
                           ` (2 more replies)
  1 sibling, 3 replies; 200+ results
From: Cunming Liang @ 2015-05-21  8:55 UTC (permalink / raw)
  To: dev; +Cc: shemming, liang-min.wang

v8 changes
 - remove condition check for only vfio-msix
 - add multiplex intr support when only one intr vector allowed
 - lsc and rxq interrupt runtime enable decision
 - add safe event delete while the event wakeup execution happens

v7 changes
 - decouple epoll event and intr operation
 - add condition check in the case intr vector is disabled
 - renaming some APIs

v6 changes
 - split rte_intr_wait_rx_pkt into two APIs 'wait' and 'set'.
 - rewrite rte_intr_rx_wait/rte_intr_rx_set.
 - using vector number instead of queue_id as interrupt API params.
 - patch reorder and split.

v5 changes
 - Rebase the patchset onto the HEAD
 - Isolate ethdev from EAL for new-added wait-for-rx interrupt function
 - Export wait-for-rx interrupt function for shared libraries
 - Split-off a new patch file for changed struct rte_intr_handle that
   other patches depend on, to avoid breaking git bisect
 - Change sample applicaiton to accomodate EAL function spec change
   accordingly

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Adjust position of new-added structure fields and functions to
   avoid breaking ABI
 
v3 changes
 - Add return value for interrupt enable/disable functions
 - Move spinlok from PMD to L3fwd-power
 - Remove unnecessary variables in e1000_mac_info
 - Fix miscelleous review comments
 
v2 changes
 - Fix compilation issue in Makefile for missed header file.
 - Consolidate internal and community review comments of v1 patch set.
 
The patch series introduce low-latency one-shot rx interrupt into DPDK with
polling and interrupt mode switch control example.
 
DPDK userspace interrupt notification and handling mechanism is based on UIO
with below limitation:
1) It is designed to handle LSC interrupt only with inefficient suspended
   pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
   which then wakes up DPDK polling thread). In this way, it introduces
   non-deterministic wakeup latency for DPDK polling thread as well as packet
   latency if it is used to handle Rx interrupt.
2) UIO only supports a single interrupt vector which has to been shared by
   LSC interrupt and interrupts assigned to dedicated rx queues.
 
This patchset includes below features:
1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF only).
2) Build on top of the VFIO mechanism instead of UIO, so it could support
   up to 64 interrupt vectors for rx queue interrupts.
3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
   VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
   user space.
4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
   switch algorithms in L3fwd-power example.

Known limitations:
1) It does not work for UIO due to a single interrupt eventfd shared by LSC
   and rx queue interrupt handlers causes a mess.
2) LSC interrupt is not supported by VF driver, so it is by default disabled
   in L3fwd-power now. Feel free to turn in on if you want to support both LSC
   and rx queue interrupts on a PF.

Cunming Liang (11):
  eal/linux: add interrupt vectors support in intr_handle
  eal/linux: add rte_epoll_wait/ctl support
  eal/linux: add API to set rx interrupt event monitor
  eal/linux: fix comments typo on vfio msi
  eal/linux: add interrupt vectors handling on VFIO
  eal/linux: standalone intr event fd create support
  eal/bsd: dummy for new intr definition
  ethdev: add rx intr enable, disable and ctl functions
  ixgbe: enable rx queue interrupts for both PF and VF
  igb: enable rx queue interrupts for PF
  l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
    switch

 examples/l3fwd-power/main.c                        | 207 +++++++--
 lib/librte_eal/bsdapp/eal/eal_interrupts.c         |  20 +
 .../bsdapp/eal/include/exec-env/rte_interrupts.h   |  77 ++++
 lib/librte_eal/bsdapp/eal/rte_eal_version.map      |   5 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 351 +++++++++++++--
 .../linuxapp/eal/include/exec-env/rte_interrupts.h | 160 +++++++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map    |   8 +
 lib/librte_ether/rte_ethdev.c                      | 127 ++++++
 lib/librte_ether/rte_ethdev.h                      | 104 +++++
 lib/librte_ether/rte_ether_version.map             |   4 +
 lib/librte_pmd_e1000/igb_ethdev.c                  | 292 +++++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c                | 482 ++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h                |   4 +
 13 files changed, 1715 insertions(+), 126 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[relevance 2%]

* Re: [dpdk-dev] Technical Steering Committee (TSC)
  @ 2015-05-19 20:21  3%         ` O'Driscoll, Tim
  0 siblings, 0 replies; 200+ results
From: O'Driscoll, Tim @ 2015-05-19 20:21 UTC (permalink / raw)
  To: Neil Horman, Thomas Monjalon; +Cc: dev


> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Neil Horman
> 
> On Tue, May 19, 2015 at 05:45:05PM +0200, Thomas Monjalon wrote:
> > 2015-05-19 11:34, Neil Horman:
> > > On Tue, May 19, 2015 at 07:43:14AM -0700, Stephen Hemminger wrote:
> > > > > Composition of the TSC should reflect contributions to the
> project, but be
> > > > > balanced so that no single party has an undue influence. It
> should also be
> > > > > kept to a manageable size(maybe 7?).
> > > > >
> > > > > The TSC should elect its own chair, who would have the deciding
> vote in
> > > > > the event that the TSC was deadlocked. Once in place, the TSC
> should
> > > > > approve any new members.
> > > > >
> > > > > Specific details on membership can be discussed and agreed
> later, if we
> > > > > agree on the creation of a TSC.
> > > >
> > > > TSC should be limited to those individuals and companies that have
> > > > contributed in a non-trivial way to the DPDK distributed code
> base.
> > > > It should not be a users group, or place for network vendors who
> take but
> > > > never give back.
> > > >
> > > +1
> > >
> > > It should also endavour to only act as a fallback body for any
> issues commonly
> > > handled by the development communtiy (patch acceptance/review, etc)
> >
> > I agree that it should be a fallback.
> > And I'm wondering how useful it would be: have we ever known such
> discussion or
> > conflict without finding a solution or a consensus?
> Well, I suppose the jury is still out on that, since there are ongoing
> problems,
> in the form of patch latency, and such.  But for the most part, no,
> problems
> tend to reach consensus resolution IMO

It's true that there aren't many obvious examples, although as Neil points out there are still some things that are ongoing. One issue that springs to mind where we didn't reach consensus was inclusion of ABI Versioning in 1.8. It was subsequently included in 2.0, but there were people who believed it should have been in 1.8.

The other issue with having to reach consensus on everything is that it tends to a lowest common denominator approach, and can slow things down. There are times where a clear decision and then everybody moving forward is preferable.

> > By the way, is there a TSC in Linux netdev?
> >
> No, but there are TSC for many projects, including Openshift,
> freedesktop.org,
> etc.
> 
> Neil

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v2 14/19] virtio: move virtio PMD to drivers/net
  @ 2015-05-15 15:56  1%     ` Bruce Richardson
  0 siblings, 0 replies; 200+ results
From: Bruce Richardson @ 2015-05-15 15:56 UTC (permalink / raw)
  To: dev

Move virtio PMD to drivers/net directory

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/Makefile                             |    2 +-
 drivers/net/virtio/Makefile                      |   60 +
 drivers/net/virtio/rte_pmd_virtio_version.map    |    4 +
 drivers/net/virtio/virtio_ethdev.c               | 1504 ++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.h               |  124 ++
 drivers/net/virtio/virtio_logs.h                 |   70 +
 drivers/net/virtio/virtio_pci.c                  |  147 +++
 drivers/net/virtio/virtio_pci.h                  |  270 ++++
 drivers/net/virtio/virtio_ring.h                 |  163 +++
 drivers/net/virtio/virtio_rxtx.c                 |  815 ++++++++++++
 drivers/net/virtio/virtqueue.c                   |   70 +
 drivers/net/virtio/virtqueue.h                   |  325 +++++
 lib/Makefile                                     |    1 -
 lib/librte_pmd_virtio/Makefile                   |   60 -
 lib/librte_pmd_virtio/rte_pmd_virtio_version.map |    4 -
 lib/librte_pmd_virtio/virtio_ethdev.c            | 1504 ----------------------
 lib/librte_pmd_virtio/virtio_ethdev.h            |  124 --
 lib/librte_pmd_virtio/virtio_logs.h              |   70 -
 lib/librte_pmd_virtio/virtio_pci.c               |  147 ---
 lib/librte_pmd_virtio/virtio_pci.h               |  270 ----
 lib/librte_pmd_virtio/virtio_ring.h              |  163 ---
 lib/librte_pmd_virtio/virtio_rxtx.c              |  815 ------------
 lib/librte_pmd_virtio/virtqueue.c                |   70 -
 lib/librte_pmd_virtio/virtqueue.h                |  325 -----
 24 files changed, 3553 insertions(+), 3554 deletions(-)
 create mode 100644 drivers/net/virtio/Makefile
 create mode 100644 drivers/net/virtio/rte_pmd_virtio_version.map
 create mode 100644 drivers/net/virtio/virtio_ethdev.c
 create mode 100644 drivers/net/virtio/virtio_ethdev.h
 create mode 100644 drivers/net/virtio/virtio_logs.h
 create mode 100644 drivers/net/virtio/virtio_pci.c
 create mode 100644 drivers/net/virtio/virtio_pci.h
 create mode 100644 drivers/net/virtio/virtio_ring.h
 create mode 100644 drivers/net/virtio/virtio_rxtx.c
 create mode 100644 drivers/net/virtio/virtqueue.c
 create mode 100644 drivers/net/virtio/virtqueue.h
 delete mode 100644 lib/librte_pmd_virtio/Makefile
 delete mode 100644 lib/librte_pmd_virtio/rte_pmd_virtio_version.map
 delete mode 100644 lib/librte_pmd_virtio/virtio_ethdev.c
 delete mode 100644 lib/librte_pmd_virtio/virtio_ethdev.h
 delete mode 100644 lib/librte_pmd_virtio/virtio_logs.h
 delete mode 100644 lib/librte_pmd_virtio/virtio_pci.c
 delete mode 100644 lib/librte_pmd_virtio/virtio_pci.h
 delete mode 100644 lib/librte_pmd_virtio/virtio_ring.h
 delete mode 100644 lib/librte_pmd_virtio/virtio_rxtx.c
 delete mode 100644 lib/librte_pmd_virtio/virtqueue.c
 delete mode 100644 lib/librte_pmd_virtio/virtqueue.h

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 267d386..48dd328 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -42,7 +42,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_NULL) += null
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
-#DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
+DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 #DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 #DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
 
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
new file mode 100644
index 0000000..21ff7e5
--- /dev/null
+++ b/drivers/net/virtio/Makefile
@@ -0,0 +1,60 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_virtio.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_pmd_virtio_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
+
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_net lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/virtio/rte_pmd_virtio_version.map b/drivers/net/virtio/rte_pmd_virtio_version.map
new file mode 100644
index 0000000..ef35398
--- /dev/null
+++ b/drivers/net/virtio/rte_pmd_virtio_version.map
@@ -0,0 +1,4 @@
+DPDK_2.0 {
+
+	local: *;
+};
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
new file mode 100644
index 0000000..e63dbfb
--- /dev/null
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -0,0 +1,1504 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#include <dirent.h>
+#include <fcntl.h>
+#endif
+
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_pci.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+
+#include "virtio_ethdev.h"
+#include "virtio_pci.h"
+#include "virtio_logs.h"
+#include "virtqueue.h"
+
+
+static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
+static int  virtio_dev_configure(struct rte_eth_dev *dev);
+static int  virtio_dev_start(struct rte_eth_dev *dev);
+static void virtio_dev_stop(struct rte_eth_dev *dev);
+static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
+static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
+static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
+static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
+static void virtio_dev_info_get(struct rte_eth_dev *dev,
+				struct rte_eth_dev_info *dev_info);
+static int virtio_dev_link_update(struct rte_eth_dev *dev,
+	__rte_unused int wait_to_complete);
+
+static void virtio_set_hwaddr(struct virtio_hw *hw);
+static void virtio_get_hwaddr(struct virtio_hw *hw);
+
+static void virtio_dev_rx_queue_release(__rte_unused void *rxq);
+static void virtio_dev_tx_queue_release(__rte_unused void *txq);
+
+static void virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
+static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
+static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
+static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
+				uint16_t vlan_id, int on);
+static void virtio_mac_addr_add(struct rte_eth_dev *dev,
+				struct ether_addr *mac_addr,
+				uint32_t index, uint32_t vmdq __rte_unused);
+static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
+static void virtio_mac_addr_set(struct rte_eth_dev *dev,
+				struct ether_addr *mac_addr);
+
+static int virtio_dev_queue_stats_mapping_set(
+	__rte_unused struct rte_eth_dev *eth_dev,
+	__rte_unused uint16_t queue_id,
+	__rte_unused uint8_t stat_idx,
+	__rte_unused uint8_t is_rx);
+
+/*
+ * The set of PCI devices this driver supports
+ */
+static const struct rte_pci_id pci_id_virtio_map[] = {
+
+#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
+#include "rte_pci_dev_ids.h"
+
+{ .vendor_id = 0, /* sentinel */ },
+};
+
+static int
+virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
+		int *dlen, int pkt_num)
+{
+	uint16_t head = vq->vq_desc_head_idx, i;
+	int k, sum = 0;
+	virtio_net_ctrl_ack status = ~0;
+	struct virtio_pmd_ctrl result;
+
+	ctrl->status = status;
+
+	if (!vq->hw->cvq) {
+		PMD_INIT_LOG(ERR,
+			     "%s(): Control queue is not supported.",
+			     __func__);
+		return -1;
+	}
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
+		"vq->hw->cvq = %p vq = %p",
+		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
+
+	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+		return -1;
+
+	memcpy(vq->virtio_net_hdr_mz->addr, ctrl,
+		sizeof(struct virtio_pmd_ctrl));
+
+	/*
+	 * Format is enforced in qemu code:
+	 * One TX packet for header;
+	 * At least one TX packet per argument;
+	 * One RX packet for ACK.
+	 */
+	vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
+	vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
+	vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
+	vq->vq_free_cnt--;
+	i = vq->vq_ring.desc[head].next;
+
+	for (k = 0; k < pkt_num; k++) {
+		vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
+		vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+			+ sizeof(struct virtio_net_ctrl_hdr)
+			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
+		vq->vq_ring.desc[i].len = dlen[k];
+		sum += dlen[k];
+		vq->vq_free_cnt--;
+		i = vq->vq_ring.desc[i].next;
+	}
+
+	vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+	vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+			+ sizeof(struct virtio_net_ctrl_hdr);
+	vq->vq_ring.desc[i].len = sizeof(ctrl->status);
+	vq->vq_free_cnt--;
+
+	vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
+
+	vq_update_avail_ring(vq, head);
+	vq_update_avail_idx(vq);
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
+
+	virtqueue_notify(vq);
+
+	rte_rmb();
+	while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+		rte_rmb();
+		usleep(100);
+	}
+
+	while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+		uint32_t idx, desc_idx, used_idx;
+		struct vring_used_elem *uep;
+
+		used_idx = (uint32_t)(vq->vq_used_cons_idx
+				& (vq->vq_nentries - 1));
+		uep = &vq->vq_ring.used->ring[used_idx];
+		idx = (uint32_t) uep->id;
+		desc_idx = idx;
+
+		while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
+			desc_idx = vq->vq_ring.desc[desc_idx].next;
+			vq->vq_free_cnt++;
+		}
+
+		vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
+		vq->vq_desc_head_idx = idx;
+
+		vq->vq_used_cons_idx++;
+		vq->vq_free_cnt++;
+	}
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
+			vq->vq_free_cnt, vq->vq_desc_head_idx);
+
+	memcpy(&result, vq->virtio_net_hdr_mz->addr,
+			sizeof(struct virtio_pmd_ctrl));
+
+	return result.status;
+}
+
+static int
+virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
+	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
+
+	dlen[0] = sizeof(uint16_t);
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
+			  "failed, this is too late now...");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int virtio_dev_queue_setup(struct rte_eth_dev *dev,
+			int queue_type,
+			uint16_t queue_idx,
+			uint16_t  vtpci_queue_idx,
+			uint16_t nb_desc,
+			unsigned int socket_id,
+			struct virtqueue **pvq)
+{
+	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
+	const struct rte_memzone *mz;
+	uint16_t vq_size;
+	int size;
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtqueue  *vq = NULL;
+
+	/* Write the virtqueue index to the Queue Select Field */
+	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vtpci_queue_idx);
+	PMD_INIT_LOG(DEBUG, "selecting queue: %d", vtpci_queue_idx);
+
+	/*
+	 * Read the virtqueue size from the Queue Size field
+	 * Always power of 2 and if 0 virtqueue does not exist
+	 */
+	vq_size = VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
+	PMD_INIT_LOG(DEBUG, "vq_size: %d nb_desc:%d", vq_size, nb_desc);
+	if (nb_desc == 0)
+		nb_desc = vq_size;
+	if (vq_size == 0) {
+		PMD_INIT_LOG(ERR, "%s: virtqueue does not exist", __func__);
+		return -EINVAL;
+	} else if (!rte_is_power_of_2(vq_size)) {
+		PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2", __func__);
+		return -EINVAL;
+	} else if (nb_desc != vq_size) {
+		PMD_INIT_LOG(ERR, "Warning: nb_desc(%d) is not equal to vq size (%d), fall to vq size",
+			nb_desc, vq_size);
+		nb_desc = vq_size;
+	}
+
+	if (queue_type == VTNET_RQ) {
+		snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d",
+			dev->data->port_id, queue_idx);
+		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
+			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
+	} else if (queue_type == VTNET_TQ) {
+		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
+			dev->data->port_id, queue_idx);
+		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
+			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
+	} else if (queue_type == VTNET_CQ) {
+		snprintf(vq_name, sizeof(vq_name), "port%d_cvq",
+			dev->data->port_id);
+		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
+			vq_size * sizeof(struct vq_desc_extra),
+			RTE_CACHE_LINE_SIZE);
+	}
+	if (vq == NULL) {
+		PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
+		return (-ENOMEM);
+	}
+
+	vq->hw = hw;
+	vq->port_id = dev->data->port_id;
+	vq->queue_id = queue_idx;
+	vq->vq_queue_index = vtpci_queue_idx;
+	vq->vq_nentries = vq_size;
+	vq->vq_free_cnt = vq_size;
+
+	/*
+	 * Reserve a memzone for vring elements
+	 */
+	size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
+	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
+	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size);
+
+	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
+		socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
+	if (mz == NULL) {
+		rte_free(vq);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
+	 * and only accepts 32 bit page frame number.
+	 * Check if the allocated physical memory exceeds 16TB.
+	 */
+	if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
+		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
+		rte_free(vq);
+		return -ENOMEM;
+	}
+
+	memset(mz->addr, 0, sizeof(mz->len));
+	vq->mz = mz;
+	vq->vq_ring_mem = mz->phys_addr;
+	vq->vq_ring_virt_mem = mz->addr;
+	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%"PRIx64, (uint64_t)mz->phys_addr);
+	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)mz->addr);
+	vq->virtio_net_hdr_mz  = NULL;
+	vq->virtio_net_hdr_mem = 0;
+
+	if (queue_type == VTNET_TQ) {
+		/*
+		 * For each xmit packet, allocate a virtio_net_hdr
+		 */
+		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
+			dev->data->port_id, queue_idx);
+		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
+			vq_size * hw->vtnet_hdr_size,
+			socket_id, 0, RTE_CACHE_LINE_SIZE);
+		if (vq->virtio_net_hdr_mz == NULL) {
+			rte_free(vq);
+			return -ENOMEM;
+		}
+		vq->virtio_net_hdr_mem =
+			vq->virtio_net_hdr_mz->phys_addr;
+		memset(vq->virtio_net_hdr_mz->addr, 0,
+			vq_size * hw->vtnet_hdr_size);
+	} else if (queue_type == VTNET_CQ) {
+		/* Allocate a page for control vq command, data and status */
+		snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
+			dev->data->port_id);
+		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
+			PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE);
+		if (vq->virtio_net_hdr_mz == NULL) {
+			rte_free(vq);
+			return -ENOMEM;
+		}
+		vq->virtio_net_hdr_mem =
+			vq->virtio_net_hdr_mz->phys_addr;
+		memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
+	}
+
+	/*
+	 * Set guest physical address of the virtqueue
+	 * in VIRTIO_PCI_QUEUE_PFN config register of device
+	 */
+	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN,
+			mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	*pvq = vq;
+	return 0;
+}
+
+static int
+virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
+		uint32_t socket_id)
+{
+	struct virtqueue *vq;
+	uint16_t nb_desc = 0;
+	int ret;
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+	ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
+			vtpci_queue_idx, nb_desc, socket_id, &vq);
+
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "control vq initialization failed");
+		return ret;
+	}
+
+	hw->cvq = vq;
+	return 0;
+}
+
+static void
+virtio_dev_close(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
+
+	/* reset the NIC */
+	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
+	vtpci_reset(hw);
+	hw->started = 0;
+	virtio_dev_free_mbufs(dev);
+}
+
+static void
+virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
+	ctrl.data[0] = 1;
+	dlen[0] = 1;
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret)
+		PMD_INIT_LOG(ERR, "Failed to enable promisc");
+}
+
+static void
+virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
+	ctrl.data[0] = 0;
+	dlen[0] = 1;
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret)
+		PMD_INIT_LOG(ERR, "Failed to disable promisc");
+}
+
+static void
+virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
+	ctrl.data[0] = 1;
+	dlen[0] = 1;
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret)
+		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
+}
+
+static void
+virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
+	ctrl.data[0] = 0;
+	dlen[0] = 1;
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret)
+		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
+}
+
+/*
+ * dev_ops for virtio, bare necessities for basic operation
+ */
+static const struct eth_dev_ops virtio_eth_dev_ops = {
+	.dev_configure           = virtio_dev_configure,
+	.dev_start               = virtio_dev_start,
+	.dev_stop                = virtio_dev_stop,
+	.dev_close               = virtio_dev_close,
+	.promiscuous_enable      = virtio_dev_promiscuous_enable,
+	.promiscuous_disable     = virtio_dev_promiscuous_disable,
+	.allmulticast_enable     = virtio_dev_allmulticast_enable,
+	.allmulticast_disable    = virtio_dev_allmulticast_disable,
+
+	.dev_infos_get           = virtio_dev_info_get,
+	.stats_get               = virtio_dev_stats_get,
+	.stats_reset             = virtio_dev_stats_reset,
+	.link_update             = virtio_dev_link_update,
+	.rx_queue_setup          = virtio_dev_rx_queue_setup,
+	/* meaningfull only to multiple queue */
+	.rx_queue_release        = virtio_dev_rx_queue_release,
+	.tx_queue_setup          = virtio_dev_tx_queue_setup,
+	/* meaningfull only to multiple queue */
+	.tx_queue_release        = virtio_dev_tx_queue_release,
+	/* collect stats per queue */
+	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
+	.vlan_filter_set         = virtio_vlan_filter_set,
+	.mac_addr_add            = virtio_mac_addr_add,
+	.mac_addr_remove         = virtio_mac_addr_remove,
+	.mac_addr_set            = virtio_mac_addr_set,
+};
+
+static inline int
+virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev,
+				struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = link;
+	struct rte_eth_link *src = &(dev->data->dev_link);
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+			*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+/**
+ * Atomically writes the link status information into global
+ * structure rte_eth_dev.
+ *
+ * @param dev
+ *   - Pointer to the structure rte_eth_dev to read from.
+ *   - Pointer to the buffer to be saved with the link status.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, negative value.
+ */
+static inline int
+virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+		struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = &(dev->data->dev_link);
+	struct rte_eth_link *src = link;
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+					*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+static void
+virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	unsigned i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const struct virtqueue *txvq = dev->data->tx_queues[i];
+		if (txvq == NULL)
+			continue;
+
+		stats->opackets += txvq->packets;
+		stats->obytes += txvq->bytes;
+		stats->oerrors += txvq->errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = txvq->packets;
+			stats->q_obytes[i] = txvq->bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const struct virtqueue *rxvq = dev->data->rx_queues[i];
+		if (rxvq == NULL)
+			continue;
+
+		stats->ipackets += rxvq->packets;
+		stats->ibytes += rxvq->bytes;
+		stats->ierrors += rxvq->errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = rxvq->packets;
+			stats->q_ibytes[i] = rxvq->bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+}
+
+static void
+virtio_dev_stats_reset(struct rte_eth_dev *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct virtqueue *txvq = dev->data->tx_queues[i];
+		if (txvq == NULL)
+			continue;
+
+		txvq->packets = 0;
+		txvq->bytes = 0;
+		txvq->errors = 0;
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct virtqueue *rxvq = dev->data->rx_queues[i];
+		if (rxvq == NULL)
+			continue;
+
+		rxvq->packets = 0;
+		rxvq->bytes = 0;
+		rxvq->errors = 0;
+	}
+
+	dev->data->rx_mbuf_alloc_failed = 0;
+}
+
+static void
+virtio_set_hwaddr(struct virtio_hw *hw)
+{
+	vtpci_write_dev_config(hw,
+			offsetof(struct virtio_net_config, mac),
+			&hw->mac_addr, ETHER_ADDR_LEN);
+}
+
+static void
+virtio_get_hwaddr(struct virtio_hw *hw)
+{
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
+		vtpci_read_dev_config(hw,
+			offsetof(struct virtio_net_config, mac),
+			&hw->mac_addr, ETHER_ADDR_LEN);
+	} else {
+		eth_random_addr(&hw->mac_addr[0]);
+		virtio_set_hwaddr(hw);
+	}
+}
+
+static int
+virtio_mac_table_set(struct virtio_hw *hw,
+		     const struct virtio_net_ctrl_mac *uc,
+		     const struct virtio_net_ctrl_mac *mc)
+{
+	struct virtio_pmd_ctrl ctrl;
+	int err, len[2];
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
+
+	len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries);
+	memcpy(ctrl.data, uc, len[0]);
+
+	len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries);
+	memcpy(ctrl.data + len[0], mc, len[1]);
+
+	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
+	if (err != 0)
+		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
+
+	return err;
+}
+
+static void
+virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+		    uint32_t index, uint32_t vmdq __rte_unused)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	const struct ether_addr *addrs = dev->data->mac_addrs;
+	unsigned int i;
+	struct virtio_net_ctrl_mac *uc, *mc;
+
+	if (index >= VIRTIO_MAX_MAC_ADDRS) {
+		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
+		return;
+	}
+
+	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
+	uc->entries = 0;
+	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
+	mc->entries = 0;
+
+	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
+		const struct ether_addr *addr
+			= (i == index) ? mac_addr : addrs + i;
+		struct virtio_net_ctrl_mac *tbl
+			= is_multicast_ether_addr(addr) ? mc : uc;
+
+		memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN);
+	}
+
+	virtio_mac_table_set(hw, uc, mc);
+}
+
+static void
+virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct ether_addr *addrs = dev->data->mac_addrs;
+	struct virtio_net_ctrl_mac *uc, *mc;
+	unsigned int i;
+
+	if (index >= VIRTIO_MAX_MAC_ADDRS) {
+		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
+		return;
+	}
+
+	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
+	uc->entries = 0;
+	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
+	mc->entries = 0;
+
+	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
+		struct virtio_net_ctrl_mac *tbl;
+
+		if (i == index || is_zero_ether_addr(addrs + i))
+			continue;
+
+		tbl = is_multicast_ether_addr(addrs + i) ? mc : uc;
+		memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN);
+	}
+
+	virtio_mac_table_set(hw, uc, mc);
+}
+
+static void
+virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN);
+
+	/* Use atomic update if available */
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
+		struct virtio_pmd_ctrl ctrl;
+		int len = ETHER_ADDR_LEN;
+
+		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
+		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
+
+		memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN);
+		virtio_send_command(hw->cvq, &ctrl, &len, 1);
+	} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
+		virtio_set_hwaddr(hw);
+}
+
+static int
+virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int len;
+
+	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
+		return -ENOTSUP;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
+	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
+	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
+	len = sizeof(vlan_id);
+
+	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
+}
+
+static void
+virtio_negotiate_features(struct virtio_hw *hw)
+{
+	uint32_t host_features, mask;
+
+	/* checksum offload not implemented */
+	mask = VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
+
+	/* TSO and LRO are only available when their corresponding
+	 * checksum offload feature is also negotiated.
+	 */
+	mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_ECN;
+	mask |= VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN;
+	mask |= VTNET_LRO_FEATURES;
+
+	/* not negotiating INDIRECT descriptor table support */
+	mask |= VIRTIO_RING_F_INDIRECT_DESC;
+
+	/* Prepare guest_features: feature that driver wants to support */
+	hw->guest_features = VTNET_FEATURES & ~mask;
+	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %x",
+		hw->guest_features);
+
+	/* Read device(host) feature bits */
+	host_features = VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES);
+	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %x",
+		host_features);
+
+	/*
+	 * Negotiate features: Subset of device feature bits are written back
+	 * guest feature bits.
+	 */
+	hw->guest_features = vtpci_negotiate_features(hw, host_features);
+	PMD_INIT_LOG(DEBUG, "features after negotiate = %x",
+		hw->guest_features);
+}
+
+#ifdef RTE_EXEC_ENV_LINUXAPP
+static int
+parse_sysfs_value(const char *filename, unsigned long *val)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		PMD_INIT_LOG(ERR, "%s(): cannot open sysfs value %s",
+			     __func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		PMD_INIT_LOG(ERR, "%s(): cannot read sysfs value %s",
+			     __func__, filename);
+		fclose(f);
+		return -1;
+	}
+	*val = strtoul(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		PMD_INIT_LOG(ERR, "%s(): cannot parse sysfs value %s",
+			     __func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+static int get_uio_dev(struct rte_pci_addr *loc, char *buf, unsigned int buflen,
+			unsigned int *uio_num)
+{
+	struct dirent *e;
+	DIR *dir;
+	char dirname[PATH_MAX];
+
+	/* depending on kernel version, uio can be located in uio/uioX
+	 * or uio:uioX */
+	snprintf(dirname, sizeof(dirname),
+		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
+		     loc->domain, loc->bus, loc->devid, loc->function);
+	dir = opendir(dirname);
+	if (dir == NULL) {
+		/* retry with the parent directory */
+		snprintf(dirname, sizeof(dirname),
+			     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
+			     loc->domain, loc->bus, loc->devid, loc->function);
+		dir = opendir(dirname);
+
+		if (dir == NULL) {
+			PMD_INIT_LOG(ERR, "Cannot opendir %s", dirname);
+			return -1;
+		}
+	}
+
+	/* take the first file starting with "uio" */
+	while ((e = readdir(dir)) != NULL) {
+		/* format could be uio%d ...*/
+		int shortprefix_len = sizeof("uio") - 1;
+		/* ... or uio:uio%d */
+		int longprefix_len = sizeof("uio:uio") - 1;
+		char *endptr;
+
+		if (strncmp(e->d_name, "uio", 3) != 0)
+			continue;
+
+		/* first try uio%d */
+		errno = 0;
+		*uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
+			snprintf(buf, buflen, "%s/uio%u", dirname, *uio_num);
+			break;
+		}
+
+		/* then try uio:uio%d */
+		errno = 0;
+		*uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
+			snprintf(buf, buflen, "%s/uio:uio%u", dirname,
+				     *uio_num);
+			break;
+		}
+	}
+	closedir(dir);
+
+	/* No uio resource found */
+	if (e == NULL) {
+		PMD_INIT_LOG(ERR, "Could not find uio resource");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+virtio_has_msix(const struct rte_pci_addr *loc)
+{
+	DIR *d;
+	char dirname[PATH_MAX];
+
+	snprintf(dirname, sizeof(dirname),
+		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/msi_irqs",
+		     loc->domain, loc->bus, loc->devid, loc->function);
+
+	d = opendir(dirname);
+	if (d)
+		closedir(d);
+
+	return (d != NULL);
+}
+
+/* Extract I/O port numbers from sysfs */
+static int virtio_resource_init_by_uio(struct rte_pci_device *pci_dev)
+{
+	char dirname[PATH_MAX];
+	char filename[PATH_MAX];
+	unsigned long start, size;
+	unsigned int uio_num;
+
+	if (get_uio_dev(&pci_dev->addr, dirname, sizeof(dirname), &uio_num) < 0)
+		return -1;
+
+	/* get portio size */
+	snprintf(filename, sizeof(filename),
+		     "%s/portio/port0/size", dirname);
+	if (parse_sysfs_value(filename, &size) < 0) {
+		PMD_INIT_LOG(ERR, "%s(): cannot parse size",
+			     __func__);
+		return -1;
+	}
+
+	/* get portio start */
+	snprintf(filename, sizeof(filename),
+		 "%s/portio/port0/start", dirname);
+	if (parse_sysfs_value(filename, &start) < 0) {
+		PMD_INIT_LOG(ERR, "%s(): cannot parse portio start",
+			     __func__);
+		return -1;
+	}
+	pci_dev->mem_resource[0].addr = (void *)(uintptr_t)start;
+	pci_dev->mem_resource[0].len =  (uint64_t)size;
+	PMD_INIT_LOG(DEBUG,
+		     "PCI Port IO found start=0x%lx with size=0x%lx",
+		     start, size);
+
+	/* save fd */
+	memset(dirname, 0, sizeof(dirname));
+	snprintf(dirname, sizeof(dirname), "/dev/uio%u", uio_num);
+	pci_dev->intr_handle.fd = open(dirname, O_RDWR);
+	if (pci_dev->intr_handle.fd < 0) {
+		PMD_INIT_LOG(ERR, "Cannot open %s: %s\n",
+			dirname, strerror(errno));
+		return -1;
+	}
+
+	pci_dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+	pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+
+	return 0;
+}
+
+/* Extract port I/O numbers from proc/ioports */
+static int virtio_resource_init_by_ioports(struct rte_pci_device *pci_dev)
+{
+	uint16_t start, end;
+	int size;
+	FILE *fp;
+	char *line = NULL;
+	char pci_id[16];
+	int found = 0;
+	size_t linesz;
+
+	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
+		 pci_dev->addr.domain,
+		 pci_dev->addr.bus,
+		 pci_dev->addr.devid,
+		 pci_dev->addr.function);
+
+	fp = fopen("/proc/ioports", "r");
+	if (fp == NULL) {
+		PMD_INIT_LOG(ERR, "%s(): can't open ioports", __func__);
+		return -1;
+	}
+
+	while (getdelim(&line, &linesz, '\n', fp) > 0) {
+		char *ptr = line;
+		char *left;
+		int n;
+
+		n = strcspn(ptr, ":");
+		ptr[n] = 0;
+		left = &ptr[n+1];
+
+		while (*left && isspace(*left))
+			left++;
+
+		if (!strncmp(left, pci_id, strlen(pci_id))) {
+			found = 1;
+
+			while (*ptr && isspace(*ptr))
+				ptr++;
+
+			sscanf(ptr, "%04hx-%04hx", &start, &end);
+			size = end - start + 1;
+
+			break;
+		}
+	}
+
+	free(line);
+	fclose(fp);
+
+	if (!found)
+		return -1;
+
+	pci_dev->mem_resource[0].addr = (void *)(uintptr_t)(uint32_t)start;
+	pci_dev->mem_resource[0].len =  (uint64_t)size;
+	PMD_INIT_LOG(DEBUG,
+		"PCI Port IO found start=0x%x with size=0x%x",
+		start, size);
+
+	/* can't support lsc interrupt without uio */
+	pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+
+	return 0;
+}
+
+/* Extract I/O port numbers from sysfs */
+static int virtio_resource_init(struct rte_pci_device *pci_dev)
+{
+	if (virtio_resource_init_by_uio(pci_dev) == 0)
+		return 0;
+	else
+		return virtio_resource_init_by_ioports(pci_dev);
+}
+
+#else
+static int
+virtio_has_msix(const struct rte_pci_addr *loc __rte_unused)
+{
+	/* nic_uio does not enable interrupts, return 0 (false). */
+	return 0;
+}
+
+static int virtio_resource_init(struct rte_pci_device *pci_dev __rte_unused)
+{
+	/* no setup required */
+	return 0;
+}
+#endif
+
+/*
+ * Process Virtio Config changed interrupt and call the callback
+ * if link state changed.
+ */
+static void
+virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+			 void *param)
+{
+	struct rte_eth_dev *dev = param;
+	struct virtio_hw *hw = dev->data->dev_private;
+	uint8_t isr;
+
+	/* Read interrupt status which clears interrupt */
+	isr = vtpci_isr(hw);
+	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
+
+	if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
+		PMD_DRV_LOG(ERR, "interrupt enable failed");
+
+	if (isr & VIRTIO_PCI_ISR_CONFIG) {
+		if (virtio_dev_link_update(dev, 0) == 0)
+			_rte_eth_dev_callback_process(dev,
+						      RTE_ETH_EVENT_INTR_LSC);
+	}
+
+}
+
+static void
+rx_func_get(struct rte_eth_dev *eth_dev)
+{
+	struct virtio_hw *hw = eth_dev->data->dev_private;
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
+		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
+	else
+		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
+}
+
+/*
+ * This function is based on probe() function in virtio_pci.c
+ * It returns 0 on success.
+ */
+static int
+eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
+{
+	struct virtio_hw *hw = eth_dev->data->dev_private;
+	struct virtio_net_config *config;
+	struct virtio_net_config local_config;
+	uint32_t offset_conf = sizeof(config->mac);
+	struct rte_pci_device *pci_dev;
+
+	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
+
+	eth_dev->dev_ops = &virtio_eth_dev_ops;
+	eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		rx_func_get(eth_dev);
+		return 0;
+	}
+
+	/* Allocate memory for storing MAC addresses */
+	eth_dev->data->mac_addrs = rte_zmalloc("virtio", ETHER_ADDR_LEN, 0);
+	if (eth_dev->data->mac_addrs == NULL) {
+		PMD_INIT_LOG(ERR,
+			"Failed to allocate %d bytes needed to store MAC addresses",
+			ETHER_ADDR_LEN);
+		return -ENOMEM;
+	}
+
+	pci_dev = eth_dev->pci_dev;
+	if (virtio_resource_init(pci_dev) < 0)
+		return -1;
+
+	hw->use_msix = virtio_has_msix(&pci_dev->addr);
+	hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
+
+	/* Reset the device although not necessary at startup */
+	vtpci_reset(hw);
+
+	/* Tell the host we've noticed this device. */
+	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
+
+	/* Tell the host we've known how to drive the device. */
+	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
+	virtio_negotiate_features(hw);
+
+	rx_func_get(eth_dev);
+
+	/* Setting up rx_header size for the device */
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
+		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	else
+		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
+
+	/* Copy the permanent MAC address to: virtio_hw */
+	virtio_get_hwaddr(hw);
+	ether_addr_copy((struct ether_addr *) hw->mac_addr,
+			&eth_dev->data->mac_addrs[0]);
+	PMD_INIT_LOG(DEBUG,
+		     "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
+		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
+		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
+
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
+		config = &local_config;
+
+		if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+			offset_conf += sizeof(config->status);
+		} else {
+			PMD_INIT_LOG(DEBUG,
+				     "VIRTIO_NET_F_STATUS is not supported");
+			config->status = 0;
+		}
+
+		if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
+			offset_conf += sizeof(config->max_virtqueue_pairs);
+		} else {
+			PMD_INIT_LOG(DEBUG,
+				     "VIRTIO_NET_F_MQ is not supported");
+			config->max_virtqueue_pairs = 1;
+		}
+
+		vtpci_read_dev_config(hw, 0, (uint8_t *)config, offset_conf);
+
+		hw->max_rx_queues =
+			(VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
+			VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
+		hw->max_tx_queues =
+			(VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ?
+			VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs;
+
+		virtio_dev_cq_queue_setup(eth_dev,
+					config->max_virtqueue_pairs * 2,
+					SOCKET_ID_ANY);
+
+		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
+				config->max_virtqueue_pairs);
+		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
+		PMD_INIT_LOG(DEBUG,
+				"PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
+				config->mac[0], config->mac[1],
+				config->mac[2], config->mac[3],
+				config->mac[4], config->mac[5]);
+	} else {
+		hw->max_rx_queues = 1;
+		hw->max_tx_queues = 1;
+	}
+
+	eth_dev->data->nb_rx_queues = hw->max_rx_queues;
+	eth_dev->data->nb_tx_queues = hw->max_tx_queues;
+
+	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
+			hw->max_rx_queues, hw->max_tx_queues);
+	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
+			eth_dev->data->port_id, pci_dev->id.vendor_id,
+			pci_dev->id.device_id);
+
+	/* Setup interrupt callback  */
+	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		rte_intr_callback_register(&pci_dev->intr_handle,
+				   virtio_interrupt_handler, eth_dev);
+
+	virtio_dev_cq_start(eth_dev);
+
+	return 0;
+}
+
+static struct eth_driver rte_virtio_pmd = {
+	{
+		.name = "rte_virtio_pmd",
+		.id_table = pci_id_virtio_map,
+	},
+	.eth_dev_init = eth_virtio_dev_init,
+	.dev_private_size = sizeof(struct virtio_hw),
+};
+
+/*
+ * Driver initialization routine.
+ * Invoked once at EAL init time.
+ * Register itself as the [Poll Mode] Driver of PCI virtio devices.
+ * Returns 0 on success.
+ */
+static int
+rte_virtio_pmd_init(const char *name __rte_unused,
+		    const char *param __rte_unused)
+{
+	if (rte_eal_iopl_init() != 0) {
+		PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
+		return -1;
+	}
+
+	rte_eth_driver_register(&rte_virtio_pmd);
+	return 0;
+}
+
+/*
+ * Only 1 queue is supported, no queue release related operation
+ */
+static void
+virtio_dev_rx_queue_release(__rte_unused void *rxq)
+{
+}
+
+static void
+virtio_dev_tx_queue_release(__rte_unused void *txq)
+{
+}
+
+/*
+ * Configure virtio device
+ * It returns 0 on success.
+ */
+static int
+virtio_dev_configure(struct rte_eth_dev *dev)
+{
+	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	PMD_INIT_LOG(DEBUG, "configure");
+
+	if (rxmode->hw_ip_checksum) {
+		PMD_DRV_LOG(ERR, "HW IP checksum not supported");
+		return (-EINVAL);
+	}
+
+	hw->vlan_strip = rxmode->hw_vlan_strip;
+
+	if (rxmode->hw_vlan_filter
+	    && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
+		PMD_DRV_LOG(NOTICE,
+			    "vlan filtering not available on this host");
+		return -ENOTSUP;
+	}
+
+	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
+			PMD_DRV_LOG(ERR, "failed to set config vector");
+			return -EBUSY;
+		}
+
+	return 0;
+}
+
+
+static int
+virtio_dev_start(struct rte_eth_dev *dev)
+{
+	uint16_t nb_queues, i;
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	/* check if lsc interrupt feature is enabled */
+	if ((dev->data->dev_conf.intr_conf.lsc) &&
+		(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+			PMD_DRV_LOG(ERR, "link status not supported by host");
+			return -ENOTSUP;
+		}
+
+		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return -EIO;
+		}
+	}
+
+	/* Initialize Link state */
+	virtio_dev_link_update(dev, 0);
+
+	/* On restart after stop do not touch queues */
+	if (hw->started)
+		return 0;
+
+	/* Do final configuration before rx/tx engine starts */
+	virtio_dev_rxtx_start(dev);
+	vtpci_reinit_complete(hw);
+
+	hw->started = 1;
+
+	/*Notify the backend
+	 *Otherwise the tap backend might already stop its queue due to fullness.
+	 *vhost backend will have no chance to be waked up
+	 */
+	nb_queues = dev->data->nb_rx_queues;
+	if (nb_queues > 1) {
+		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
+			return -EINVAL;
+	}
+
+	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
+
+	for (i = 0; i < nb_queues; i++)
+		virtqueue_notify(dev->data->rx_queues[i]);
+
+	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++)
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+
+	return 0;
+}
+
+static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
+{
+	struct rte_mbuf *buf;
+	int i, mbuf_num = 0;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		PMD_INIT_LOG(DEBUG,
+			     "Before freeing rxq[%d] used and unused buf", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+
+		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
+					dev->data->rx_queues[i])) != NULL) {
+			rte_pktmbuf_free(buf);
+			mbuf_num++;
+		}
+
+		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
+		PMD_INIT_LOG(DEBUG,
+			     "After freeing rxq[%d] used and unused buf", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		PMD_INIT_LOG(DEBUG,
+			     "Before freeing txq[%d] used and unused bufs",
+			     i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+
+		mbuf_num = 0;
+		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
+					dev->data->tx_queues[i])) != NULL) {
+			rte_pktmbuf_free(buf);
+
+			mbuf_num++;
+		}
+
+		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
+		PMD_INIT_LOG(DEBUG,
+			     "After freeing txq[%d] used and unused buf", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+	}
+}
+
+/*
+ * Stop device: disable interrupt and mark link down
+ */
+static void
+virtio_dev_stop(struct rte_eth_dev *dev)
+{
+	struct rte_eth_link link;
+
+	PMD_INIT_LOG(DEBUG, "stop");
+
+	if (dev->data->dev_conf.intr_conf.lsc)
+		rte_intr_disable(&dev->pci_dev->intr_handle);
+
+	memset(&link, 0, sizeof(link));
+	virtio_dev_atomic_write_link_status(dev, &link);
+}
+
+static int
+virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
+{
+	struct rte_eth_link link, old;
+	uint16_t status;
+	struct virtio_hw *hw = dev->data->dev_private;
+	memset(&link, 0, sizeof(link));
+	virtio_dev_atomic_read_link_status(dev, &link);
+	old = link;
+	link.link_duplex = FULL_DUPLEX;
+	link.link_speed  = SPEED_10G;
+
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+		PMD_INIT_LOG(DEBUG, "Get link status from hw");
+		vtpci_read_dev_config(hw,
+				offsetof(struct virtio_net_config, status),
+				&status, sizeof(status));
+		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
+			link.link_status = 0;
+			PMD_INIT_LOG(DEBUG, "Port %d is down",
+				     dev->data->port_id);
+		} else {
+			link.link_status = 1;
+			PMD_INIT_LOG(DEBUG, "Port %d is up",
+				     dev->data->port_id);
+		}
+	} else {
+		link.link_status = 1;   /* Link up */
+	}
+	virtio_dev_atomic_write_link_status(dev, &link);
+
+	return (old.link_status == link.link_status) ? -1 : 0;
+}
+
+static void
+virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	dev_info->driver_name = dev->driver->pci_drv.name;
+	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
+	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
+	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
+	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
+	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
+	dev_info->default_txconf = (struct rte_eth_txconf) {
+		.txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS
+	};
+}
+
+/*
+ * It enables testpmd to collect per queue stats.
+ */
+static int
+virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
+__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
+__rte_unused uint8_t is_rx)
+{
+	return 0;
+}
+
+static struct rte_driver rte_virtio_driver = {
+	.type = PMD_PDEV,
+	.init = rte_virtio_pmd_init,
+};
+
+PMD_REGISTER_DRIVER(rte_virtio_driver);
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
new file mode 100644
index 0000000..e6d4533
--- /dev/null
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -0,0 +1,124 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_ETHDEV_H_
+#define _VIRTIO_ETHDEV_H_
+
+#include <stdint.h>
+
+#include "virtio_pci.h"
+
+#define SPEED_10	10
+#define SPEED_100	100
+#define SPEED_1000	1000
+#define SPEED_10G	10000
+#define HALF_DUPLEX	1
+#define FULL_DUPLEX	2
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define VIRTIO_MAX_RX_QUEUES 128
+#define VIRTIO_MAX_TX_QUEUES 128
+#define VIRTIO_MAX_MAC_ADDRS 64
+#define VIRTIO_MIN_RX_BUFSIZE 64
+#define VIRTIO_MAX_RX_PKTLEN  9728
+
+/* Features desired/implemented by this driver. */
+#define VTNET_FEATURES \
+	(VIRTIO_NET_F_MAC       | \
+	VIRTIO_NET_F_STATUS     | \
+	VIRTIO_NET_F_MQ         | \
+	VIRTIO_NET_F_CTRL_MAC_ADDR | \
+	VIRTIO_NET_F_CTRL_VQ    | \
+	VIRTIO_NET_F_CTRL_RX    | \
+	VIRTIO_NET_F_CTRL_VLAN  | \
+	VIRTIO_NET_F_CSUM       | \
+	VIRTIO_NET_F_HOST_TSO4  | \
+	VIRTIO_NET_F_HOST_TSO6  | \
+	VIRTIO_NET_F_HOST_ECN   | \
+	VIRTIO_NET_F_GUEST_CSUM | \
+	VIRTIO_NET_F_GUEST_TSO4 | \
+	VIRTIO_NET_F_GUEST_TSO6 | \
+	VIRTIO_NET_F_GUEST_ECN  | \
+	VIRTIO_NET_F_MRG_RXBUF  | \
+	VIRTIO_RING_F_INDIRECT_DESC)
+
+/*
+ * CQ function prototype
+ */
+void virtio_dev_cq_start(struct rte_eth_dev *dev);
+
+/*
+ * RX/TX function prototypes
+ */
+void virtio_dev_rxtx_start(struct rte_eth_dev *dev);
+
+int virtio_dev_queue_setup(struct rte_eth_dev *dev,
+			int queue_type,
+			uint16_t queue_idx,
+			uint16_t  vtpci_queue_idx,
+			uint16_t nb_desc,
+			unsigned int socket_id,
+			struct virtqueue **pvq);
+
+int  virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+		uint16_t nb_rx_desc, unsigned int socket_id,
+		const struct rte_eth_rxconf *rx_conf,
+		struct rte_mempool *mb_pool);
+
+int  virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+		uint16_t nb_tx_desc, unsigned int socket_id,
+		const struct rte_eth_txconf *tx_conf);
+
+uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts);
+
+uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts);
+
+uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts);
+
+
+/*
+ * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
+ * frames larger than 1514 bytes. We do not yet support software LRO
+ * via tcp_lro_rx().
+ */
+#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
+			    VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
+
+
+#endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_logs.h b/drivers/net/virtio/virtio_logs.h
new file mode 100644
index 0000000..d6c33f7
--- /dev/null
+++ b/drivers/net/virtio/virtio_logs.h
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_LOGS_H_
+#define _VIRTIO_LOGS_H_
+
+#include <rte_log.h>
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT
+#define PMD_INIT_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+#else
+#define PMD_INIT_LOG(level, fmt, args...) do { } while(0)
+#define PMD_INIT_FUNC_TRACE() do { } while(0)
+#endif
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX
+#define PMD_RX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args)
+#else
+#define PMD_RX_LOG(level, fmt, args...) do { } while(0)
+#endif
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX
+#define PMD_TX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args)
+#else
+#define PMD_TX_LOG(level, fmt, args...) do { } while(0)
+#endif
+
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER
+#define PMD_DRV_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args)
+#else
+#define PMD_DRV_LOG(level, fmt, args...) do { } while(0)
+#endif
+
+#endif /* _VIRTIO_LOGS_H_ */
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
new file mode 100644
index 0000000..2245bec
--- /dev/null
+++ b/drivers/net/virtio/virtio_pci.c
@@ -0,0 +1,147 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+
+#include "virtio_pci.h"
+#include "virtio_logs.h"
+
+static uint8_t vtpci_get_status(struct virtio_hw *);
+
+void
+vtpci_read_dev_config(struct virtio_hw *hw, uint64_t offset,
+		void *dst, int length)
+{
+	uint64_t off;
+	uint8_t *d;
+	int size;
+
+	off = VIRTIO_PCI_CONFIG(hw) + offset;
+	for (d = dst; length > 0; d += size, off += size, length -= size) {
+		if (length >= 4) {
+			size = 4;
+			*(uint32_t *)d = VIRTIO_READ_REG_4(hw, off);
+		} else if (length >= 2) {
+			size = 2;
+			*(uint16_t *)d = VIRTIO_READ_REG_2(hw, off);
+		} else {
+			size = 1;
+			*d = VIRTIO_READ_REG_1(hw, off);
+		}
+	}
+}
+
+void
+vtpci_write_dev_config(struct virtio_hw *hw, uint64_t offset,
+		void *src, int length)
+{
+	uint64_t off;
+	uint8_t *s;
+	int size;
+
+	off = VIRTIO_PCI_CONFIG(hw) + offset;
+	for (s = src; length > 0; s += size, off += size, length -= size) {
+		if (length >= 4) {
+			size = 4;
+			VIRTIO_WRITE_REG_4(hw, off, *(uint32_t *)s);
+		} else if (length >= 2) {
+			size = 2;
+			VIRTIO_WRITE_REG_2(hw, off, *(uint16_t *)s);
+		} else {
+			size = 1;
+			VIRTIO_WRITE_REG_1(hw, off, *s);
+		}
+	}
+}
+
+uint32_t
+vtpci_negotiate_features(struct virtio_hw *hw, uint32_t host_features)
+{
+	uint32_t features;
+	/*
+	 * Limit negotiated features to what the driver, virtqueue, and
+	 * host all support.
+	 */
+	features = host_features & hw->guest_features;
+
+	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_GUEST_FEATURES, features);
+	return features;
+}
+
+
+void
+vtpci_reset(struct virtio_hw *hw)
+{
+	/*
+	 * Setting the status to RESET sets the host device to
+	 * the original, uninitialized state.
+	 */
+	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+	vtpci_get_status(hw);
+}
+
+void
+vtpci_reinit_complete(struct virtio_hw *hw)
+{
+	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
+}
+
+static uint8_t
+vtpci_get_status(struct virtio_hw *hw)
+{
+	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_STATUS);
+}
+
+void
+vtpci_set_status(struct virtio_hw *hw, uint8_t status)
+{
+	if (status != VIRTIO_CONFIG_STATUS_RESET)
+		status = (uint8_t)(status | vtpci_get_status(hw));
+
+	VIRTIO_WRITE_REG_1(hw, VIRTIO_PCI_STATUS, status);
+}
+
+uint8_t
+vtpci_isr(struct virtio_hw *hw)
+{
+
+	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_ISR);
+}
+
+
+/* Enable one vector (0) for Link State Intrerrupt */
+uint16_t
+vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
+{
+	VIRTIO_WRITE_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR, vec);
+	return VIRTIO_READ_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR);
+}
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
new file mode 100644
index 0000000..64d9c34
--- /dev/null
+++ b/drivers/net/virtio/virtio_pci.h
@@ -0,0 +1,270 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_PCI_H_
+#define _VIRTIO_PCI_H_
+
+#include <stdint.h>
+
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <machine/cpufunc.h>
+#else
+#include <sys/io.h>
+#endif
+
+#include <rte_ethdev.h>
+
+struct virtqueue;
+
+/* VirtIO PCI vendor/device ID. */
+#define VIRTIO_PCI_VENDORID     0x1AF4
+#define VIRTIO_PCI_DEVICEID_MIN 0x1000
+#define VIRTIO_PCI_DEVICEID_MAX 0x103F
+
+/* VirtIO ABI version, this must match exactly. */
+#define VIRTIO_PCI_ABI_VERSION 0
+
+/*
+ * VirtIO Header, located in BAR 0.
+ */
+#define VIRTIO_PCI_HOST_FEATURES  0  /* host's supported features (32bit, RO)*/
+#define VIRTIO_PCI_GUEST_FEATURES 4  /* guest's supported features (32, RW) */
+#define VIRTIO_PCI_QUEUE_PFN      8  /* physical address of VQ (32, RW) */
+#define VIRTIO_PCI_QUEUE_NUM      12 /* number of ring entries (16, RO) */
+#define VIRTIO_PCI_QUEUE_SEL      14 /* current VQ selection (16, RW) */
+#define VIRTIO_PCI_QUEUE_NOTIFY   16 /* notify host regarding VQ (16, RW) */
+#define VIRTIO_PCI_STATUS         18 /* device status register (8, RW) */
+#define VIRTIO_PCI_ISR		  19 /* interrupt status register, reading
+				      * also clears the register (8, RO) */
+/* Only if MSIX is enabled: */
+#define VIRTIO_MSI_CONFIG_VECTOR  20 /* configuration change vector (16, RW) */
+#define VIRTIO_MSI_QUEUE_VECTOR	  22 /* vector for selected VQ notifications
+				      (16, RW) */
+
+/* The bit of the ISR which indicates a device has an interrupt. */
+#define VIRTIO_PCI_ISR_INTR   0x1
+/* The bit of the ISR which indicates a device configuration change. */
+#define VIRTIO_PCI_ISR_CONFIG 0x2
+/* Vector value used to disable MSI for queue. */
+#define VIRTIO_MSI_NO_VECTOR 0xFFFF
+
+/* VirtIO device IDs. */
+#define VIRTIO_ID_NETWORK  0x01
+#define VIRTIO_ID_BLOCK    0x02
+#define VIRTIO_ID_CONSOLE  0x03
+#define VIRTIO_ID_ENTROPY  0x04
+#define VIRTIO_ID_BALLOON  0x05
+#define VIRTIO_ID_IOMEMORY 0x06
+#define VIRTIO_ID_9P       0x09
+
+/* Status byte for guest to report progress. */
+#define VIRTIO_CONFIG_STATUS_RESET     0x00
+#define VIRTIO_CONFIG_STATUS_ACK       0x01
+#define VIRTIO_CONFIG_STATUS_DRIVER    0x02
+#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04
+#define VIRTIO_CONFIG_STATUS_FAILED    0x80
+
+/*
+ * Generate interrupt when the virtqueue ring is
+ * completely used, even if we've suppressed them.
+ */
+#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24)
+
+/*
+ * The guest should never negotiate this feature; it
+ * is used to detect faulty drivers.
+ */
+#define VIRTIO_F_BAD_FEATURE (1 << 30)
+
+/*
+ * Some VirtIO feature bits (currently bits 28 through 31) are
+ * reserved for the transport being used (eg. virtio_ring), the
+ * rest are per-device feature bits.
+ */
+#define VIRTIO_TRANSPORT_F_START 28
+#define VIRTIO_TRANSPORT_F_END   32
+
+/*
+ * Each virtqueue indirect descriptor list must be physically contiguous.
+ * To allow us to malloc(9) each list individually, limit the number
+ * supported to what will fit in one page. With 4KB pages, this is a limit
+ * of 256 descriptors. If there is ever a need for more, we can switch to
+ * contigmalloc(9) for the larger allocations, similar to what
+ * bus_dmamem_alloc(9) does.
+ *
+ * Note the sizeof(struct vring_desc) is 16 bytes.
+ */
+#define VIRTIO_MAX_INDIRECT ((int) (PAGE_SIZE / 16))
+
+/* The feature bitmap for virtio net */
+#define VIRTIO_NET_F_CSUM       0x00001 /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM 0x00002 /* Guest handles pkts w/ partial csum*/
+#define VIRTIO_NET_F_MAC        0x00020 /* Host has given MAC address. */
+#define VIRTIO_NET_F_GSO        0x00040 /* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4 0x00080 /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6 0x00100 /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN  0x00200 /* Guest can handle TSO[6] w/ ECN in.*/
+#define VIRTIO_NET_F_GUEST_UFO  0x00400 /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4  0x00800 /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6  0x01000 /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN   0x02000 /* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO   0x04000 /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF  0x08000 /* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS     0x10000 /* virtio_net_config.status available*/
+#define VIRTIO_NET_F_CTRL_VQ    0x20000 /* Control channel available */
+#define VIRTIO_NET_F_CTRL_RX    0x40000 /* Control channel RX mode support */
+#define VIRTIO_NET_F_CTRL_VLAN  0x80000 /* Control channel VLAN filtering */
+#define VIRTIO_NET_F_CTRL_RX_EXTRA  0x100000 /* Extra RX mode control support */
+#define VIRTIO_RING_F_INDIRECT_DESC 0x10000000 /* Support for indirect buffer descriptors. */
+/* The guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field.
+ * The host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field.
+ */
+#define VIRTIO_RING_F_EVENT_IDX 0x20000000
+
+#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
+
+/*
+ * Maximum number of virtqueues per device.
+ */
+#define VIRTIO_MAX_VIRTQUEUES 8
+
+struct virtio_hw {
+	struct virtqueue *cvq;
+	uint32_t    io_base;
+	uint32_t    guest_features;
+	uint32_t    max_tx_queues;
+	uint32_t    max_rx_queues;
+	uint16_t    vtnet_hdr_size;
+	uint8_t	    vlan_strip;
+	uint8_t	    use_msix;
+	uint8_t     started;
+	uint8_t     mac_addr[ETHER_ADDR_LEN];
+};
+
+/*
+ * This structure is just a reference to read
+ * net device specific config space; it just a chodu structure
+ *
+ */
+struct virtio_net_config {
+	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
+	uint8_t    mac[ETHER_ADDR_LEN];
+	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
+	uint16_t   status;
+	uint16_t   max_virtqueue_pairs;
+} __attribute__((packed));
+
+/*
+ * The remaining space is defined by each driver as the per-driver
+ * configuration space.
+ */
+#define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)
+
+/*
+ * How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size.
+ */
+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
+
+/* The alignment to use between consumer and producer parts of vring. */
+#define VIRTIO_PCI_VRING_ALIGN 4096
+
+#ifdef __FreeBSD__
+
+static inline void
+outb_p(unsigned char data, unsigned int port)
+{
+
+	outb(port, (u_char)data);
+}
+
+static inline void
+outw_p(unsigned short data, unsigned int port)
+{
+	outw(port, (u_short)data);
+}
+
+static inline void
+outl_p(unsigned int data, unsigned int port)
+{
+	outl(port, (u_int)data);
+}
+#endif
+
+#define VIRTIO_PCI_REG_ADDR(hw, reg) \
+	(unsigned short)((hw)->io_base + (reg))
+
+#define VIRTIO_READ_REG_1(hw, reg) \
+	inb((VIRTIO_PCI_REG_ADDR((hw), (reg))))
+#define VIRTIO_WRITE_REG_1(hw, reg, value) \
+	outb_p((unsigned char)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+
+#define VIRTIO_READ_REG_2(hw, reg) \
+	inw((VIRTIO_PCI_REG_ADDR((hw), (reg))))
+#define VIRTIO_WRITE_REG_2(hw, reg, value) \
+	outw_p((unsigned short)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+
+#define VIRTIO_READ_REG_4(hw, reg) \
+	inl((VIRTIO_PCI_REG_ADDR((hw), (reg))))
+#define VIRTIO_WRITE_REG_4(hw, reg, value) \
+	outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+
+static inline int
+vtpci_with_feature(struct virtio_hw *hw, uint32_t feature)
+{
+	return (hw->guest_features & feature) != 0;
+}
+
+/*
+ * Function declaration from virtio_pci.c
+ */
+void vtpci_reset(struct virtio_hw *);
+
+void vtpci_reinit_complete(struct virtio_hw *);
+
+void vtpci_set_status(struct virtio_hw *, uint8_t);
+
+uint32_t vtpci_negotiate_features(struct virtio_hw *, uint32_t);
+
+void vtpci_write_dev_config(struct virtio_hw *, uint64_t, void *, int);
+
+void vtpci_read_dev_config(struct virtio_hw *, uint64_t, void *, int);
+
+uint8_t vtpci_isr(struct virtio_hw *);
+
+uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t);
+
+#endif /* _VIRTIO_PCI_H_ */
diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h
new file mode 100644
index 0000000..a16c499
--- /dev/null
+++ b/drivers/net/virtio/virtio_ring.h
@@ -0,0 +1,163 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_RING_H_
+#define _VIRTIO_RING_H_
+
+#include <stdint.h>
+
+#include <rte_common.h>
+
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT       1
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE      2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT   4
+
+/* The Host uses this in used->flags to advise the Guest: don't kick me
+ * when you add a buffer.  It's unreliable, so it's simply an
+ * optimization.  Guest will still kick if it's out of buffers. */
+#define VRING_USED_F_NO_NOTIFY  1
+/* The Guest uses this in avail->flags to advise the Host: don't
+ * interrupt me when you consume a buffer.  It's unreliable, so it's
+ * simply an optimization.  */
+#define VRING_AVAIL_F_NO_INTERRUPT  1
+
+/* VirtIO ring descriptors: 16 bytes.
+ * These can chain together via "next". */
+struct vring_desc {
+	uint64_t addr;  /*  Address (guest-physical). */
+	uint32_t len;   /* Length. */
+	uint16_t flags; /* The flags as indicated above. */
+	uint16_t next;  /* We chain unused descriptors via this. */
+};
+
+struct vring_avail {
+	uint16_t flags;
+	uint16_t idx;
+	uint16_t ring[0];
+};
+
+/* id is a 16bit index. uint32_t is used here for ids for padding reasons. */
+struct vring_used_elem {
+	/* Index of start of used descriptor chain. */
+	uint32_t id;
+	/* Total length of the descriptor chain which was written to. */
+	uint32_t len;
+};
+
+struct vring_used {
+	uint16_t flags;
+	uint16_t idx;
+	struct vring_used_elem ring[0];
+};
+
+struct vring {
+	unsigned int num;
+	struct vring_desc  *desc;
+	struct vring_avail *avail;
+	struct vring_used  *used;
+};
+
+/* The standard layout for the ring is a continuous chunk of memory which
+ * looks like this.  We assume num is a power of 2.
+ *
+ * struct vring {
+ *      // The actual descriptors (16 bytes each)
+ *      struct vring_desc desc[num];
+ *
+ *      // A ring of available descriptor heads with free-running index.
+ *      __u16 avail_flags;
+ *      __u16 avail_idx;
+ *      __u16 available[num];
+ *      __u16 used_event_idx;
+ *
+ *      // Padding to the next align boundary.
+ *      char pad[];
+ *
+ *      // A ring of used descriptor heads with free-running index.
+ *      __u16 used_flags;
+ *      __u16 used_idx;
+ *      struct vring_used_elem used[num];
+ *      __u16 avail_event_idx;
+ * };
+ *
+ * NOTE: for VirtIO PCI, align is 4096.
+ */
+
+/*
+ * We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility.
+ */
+#define vring_used_event(vr)  ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num])
+
+static inline int
+vring_size(unsigned int num, unsigned long align)
+{
+	int size;
+
+	size = num * sizeof(struct vring_desc);
+	size += sizeof(struct vring_avail) + (num * sizeof(uint16_t));
+	size = RTE_ALIGN_CEIL(size, align);
+	size += sizeof(struct vring_used) +
+		(num * sizeof(struct vring_used_elem));
+	return size;
+}
+
+static inline void
+vring_init(struct vring *vr, unsigned int num, uint8_t *p,
+	unsigned long align)
+{
+	vr->num = num;
+	vr->desc = (struct vring_desc *) p;
+	vr->avail = (struct vring_avail *) (p +
+		num * sizeof(struct vring_desc));
+	vr->used = (void *)
+		RTE_ALIGN_CEIL((uintptr_t)(&vr->avail->ring[num]), align);
+}
+
+/*
+ * The following is used with VIRTIO_RING_F_EVENT_IDX.
+ * Assuming a given event_idx value from the other size, if we have
+ * just incremented index from old to new_idx, should we trigger an
+ * event?
+ */
+static inline int
+vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
+{
+	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
+}
+
+#endif /* _VIRTIO_RING_H_ */
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
new file mode 100644
index 0000000..3ff275c
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -0,0 +1,815 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_branch_prediction.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_byteorder.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
+#define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
+#else
+#define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
+#endif
+
+static void
+vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
+{
+	struct vring_desc *dp, *dp_tail;
+	struct vq_desc_extra *dxp;
+	uint16_t desc_idx_last = desc_idx;
+
+	dp  = &vq->vq_ring.desc[desc_idx];
+	dxp = &vq->vq_descx[desc_idx];
+	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
+	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
+		while (dp->flags & VRING_DESC_F_NEXT) {
+			desc_idx_last = dp->next;
+			dp = &vq->vq_ring.desc[dp->next];
+		}
+	}
+	dxp->ndescs = 0;
+
+	/*
+	 * We must append the existing free chain, if any, to the end of
+	 * newly freed chain. If the virtqueue was completely used, then
+	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
+	 */
+	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
+		vq->vq_desc_head_idx = desc_idx;
+	} else {
+		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
+		dp_tail->next = desc_idx;
+	}
+
+	vq->vq_desc_tail_idx = desc_idx_last;
+	dp->next = VQ_RING_DESC_CHAIN_END;
+}
+
+static uint16_t
+virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
+			   uint32_t *len, uint16_t num)
+{
+	struct vring_used_elem *uep;
+	struct rte_mbuf *cookie;
+	uint16_t used_idx, desc_idx;
+	uint16_t i;
+
+	/*  Caller does the check */
+	for (i = 0; i < num ; i++) {
+		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+		uep = &vq->vq_ring.used->ring[used_idx];
+		desc_idx = (uint16_t) uep->id;
+		len[i] = uep->len;
+		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
+
+		if (unlikely(cookie == NULL)) {
+			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
+				vq->vq_used_cons_idx);
+			break;
+		}
+
+		rte_prefetch0(cookie);
+		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
+		rx_pkts[i]  = cookie;
+		vq->vq_used_cons_idx++;
+		vq_ring_free_chain(vq, desc_idx);
+		vq->vq_descx[desc_idx].cookie = NULL;
+	}
+
+	return i;
+}
+
+#ifndef DEFAULT_TX_FREE_THRESH
+#define DEFAULT_TX_FREE_THRESH 32
+#endif
+
+/* Cleanup from completed transmits. */
+static void
+virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
+{
+	uint16_t i, used_idx, desc_idx;
+	for (i = 0; i < num; i++) {
+		struct vring_used_elem *uep;
+		struct vq_desc_extra *dxp;
+
+		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+		uep = &vq->vq_ring.used->ring[used_idx];
+
+		desc_idx = (uint16_t) uep->id;
+		dxp = &vq->vq_descx[desc_idx];
+		vq->vq_used_cons_idx++;
+		vq_ring_free_chain(vq, desc_idx);
+
+		if (dxp->cookie != NULL) {
+			rte_pktmbuf_free(dxp->cookie);
+			dxp->cookie = NULL;
+		}
+	}
+}
+
+
+static inline int
+virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
+{
+	struct vq_desc_extra *dxp;
+	struct virtio_hw *hw = vq->hw;
+	struct vring_desc *start_dp;
+	uint16_t needed = 1;
+	uint16_t head_idx, idx;
+
+	if (unlikely(vq->vq_free_cnt == 0))
+		return -ENOSPC;
+	if (unlikely(vq->vq_free_cnt < needed))
+		return -EMSGSIZE;
+
+	head_idx = vq->vq_desc_head_idx;
+	if (unlikely(head_idx >= vq->vq_nentries))
+		return -EFAULT;
+
+	idx = head_idx;
+	dxp = &vq->vq_descx[idx];
+	dxp->cookie = (void *)cookie;
+	dxp->ndescs = needed;
+
+	start_dp = vq->vq_ring.desc;
+	start_dp[idx].addr =
+		(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
+		- hw->vtnet_hdr_size);
+	start_dp[idx].len =
+		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
+	start_dp[idx].flags =  VRING_DESC_F_WRITE;
+	idx = start_dp[idx].next;
+	vq->vq_desc_head_idx = idx;
+	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
+		vq->vq_desc_tail_idx = idx;
+	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
+	vq_update_avail_ring(vq, head_idx);
+
+	return 0;
+}
+
+static int
+virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
+{
+	struct vq_desc_extra *dxp;
+	struct vring_desc *start_dp;
+	uint16_t seg_num = cookie->nb_segs;
+	uint16_t needed = 1 + seg_num;
+	uint16_t head_idx, idx;
+	uint16_t head_size = txvq->hw->vtnet_hdr_size;
+
+	if (unlikely(txvq->vq_free_cnt == 0))
+		return -ENOSPC;
+	if (unlikely(txvq->vq_free_cnt < needed))
+		return -EMSGSIZE;
+	head_idx = txvq->vq_desc_head_idx;
+	if (unlikely(head_idx >= txvq->vq_nentries))
+		return -EFAULT;
+
+	idx = head_idx;
+	dxp = &txvq->vq_descx[idx];
+	dxp->cookie = (void *)cookie;
+	dxp->ndescs = needed;
+
+	start_dp = txvq->vq_ring.desc;
+	start_dp[idx].addr =
+		txvq->virtio_net_hdr_mem + idx * head_size;
+	start_dp[idx].len = (uint32_t)head_size;
+	start_dp[idx].flags = VRING_DESC_F_NEXT;
+
+	for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
+		idx = start_dp[idx].next;
+		start_dp[idx].addr  = RTE_MBUF_DATA_DMA_ADDR(cookie);
+		start_dp[idx].len   = cookie->data_len;
+		start_dp[idx].flags = VRING_DESC_F_NEXT;
+		cookie = cookie->next;
+	}
+
+	start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
+	idx = start_dp[idx].next;
+	txvq->vq_desc_head_idx = idx;
+	if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
+		txvq->vq_desc_tail_idx = idx;
+	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
+	vq_update_avail_ring(txvq, head_idx);
+
+	return 0;
+}
+
+static inline struct rte_mbuf *
+rte_rxmbuf_alloc(struct rte_mempool *mp)
+{
+	struct rte_mbuf *m;
+
+	m = __rte_mbuf_raw_alloc(mp);
+	__rte_mbuf_sanity_check_raw(m, 0);
+
+	return m;
+}
+
+static void
+virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
+{
+	struct rte_mbuf *m;
+	int i, nbufs, error, size = vq->vq_nentries;
+	struct vring *vr = &vq->vq_ring;
+	uint8_t *ring_mem = vq->vq_ring_virt_mem;
+
+	PMD_INIT_FUNC_TRACE();
+
+	/*
+	 * Reinitialise since virtio port might have been stopped and restarted
+	 */
+	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
+	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
+	vq->vq_used_cons_idx = 0;
+	vq->vq_desc_head_idx = 0;
+	vq->vq_avail_idx = 0;
+	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
+	vq->vq_free_cnt = vq->vq_nentries;
+	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
+
+	/* Chain all the descriptors in the ring with an END */
+	for (i = 0; i < size - 1; i++)
+		vr->desc[i].next = (uint16_t)(i + 1);
+	vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
+
+	/*
+	 * Disable device(host) interrupting guest
+	 */
+	virtqueue_disable_intr(vq);
+
+	/* Only rx virtqueue needs mbufs to be allocated at initialization */
+	if (queue_type == VTNET_RQ) {
+		if (vq->mpool == NULL)
+			rte_exit(EXIT_FAILURE,
+			"Cannot allocate initial mbufs for rx virtqueue");
+
+		/* Allocate blank mbufs for the each rx descriptor */
+		nbufs = 0;
+		error = ENOSPC;
+		while (!virtqueue_full(vq)) {
+			m = rte_rxmbuf_alloc(vq->mpool);
+			if (m == NULL)
+				break;
+
+			/******************************************
+			*         Enqueue allocated buffers        *
+			*******************************************/
+			error = virtqueue_enqueue_recv_refill(vq, m);
+
+			if (error) {
+				rte_pktmbuf_free(m);
+				break;
+			}
+			nbufs++;
+		}
+
+		vq_update_avail_idx(vq);
+
+		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
+
+		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
+			vq->vq_queue_index);
+		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
+			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	} else if (queue_type == VTNET_TQ) {
+		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
+			vq->vq_queue_index);
+		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
+			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	} else {
+		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
+			vq->vq_queue_index);
+		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
+			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	}
+}
+
+void
+virtio_dev_cq_start(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	if (hw->cvq) {
+		virtio_dev_vring_start(hw->cvq, VTNET_CQ);
+		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
+	}
+}
+
+void
+virtio_dev_rxtx_start(struct rte_eth_dev *dev)
+{
+	/*
+	 * Start receive and transmit vrings
+	 * -	Setup vring structure for all queues
+	 * -	Initialize descriptor for the rx vring
+	 * -	Allocate blank mbufs for the each rx descriptor
+	 *
+	 */
+	int i;
+
+	PMD_INIT_FUNC_TRACE();
+
+	/* Start rx vring. */
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+	}
+
+	/* Start tx vring. */
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+	}
+}
+
+int
+virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
+			uint16_t queue_idx,
+			uint16_t nb_desc,
+			unsigned int socket_id,
+			__rte_unused const struct rte_eth_rxconf *rx_conf,
+			struct rte_mempool *mp)
+{
+	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
+	struct virtqueue *vq;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+	ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
+			nb_desc, socket_id, &vq);
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "tvq initialization failed");
+		return ret;
+	}
+
+	/* Create mempool for rx mbuf allocation */
+	vq->mpool = mp;
+
+	dev->data->rx_queues[queue_idx] = vq;
+	return 0;
+}
+
+/*
+ * struct rte_eth_dev *dev: Used to update dev
+ * uint16_t nb_desc: Defaults to values read from config space
+ * unsigned int socket_id: Used to allocate memzone
+ * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
+ * uint16_t queue_idx: Just used as an index in dev txq list
+ */
+int
+virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
+			uint16_t queue_idx,
+			uint16_t nb_desc,
+			unsigned int socket_id,
+			const struct rte_eth_txconf *tx_conf)
+{
+	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
+	struct virtqueue *vq;
+	uint16_t tx_free_thresh;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
+	    != ETH_TXQ_FLAGS_NOXSUMS) {
+		PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
+		return -EINVAL;
+	}
+
+	ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
+			nb_desc, socket_id, &vq);
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "rvq initialization failed");
+		return ret;
+	}
+
+	tx_free_thresh = tx_conf->tx_free_thresh;
+	if (tx_free_thresh == 0)
+		tx_free_thresh =
+			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
+
+	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
+		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
+			"number of TX entries minus 3 (%u)."
+			" (tx_free_thresh=%u port=%u queue=%u)\n",
+			vq->vq_nentries - 3,
+			tx_free_thresh, dev->data->port_id, queue_idx);
+		return -EINVAL;
+	}
+
+	vq->vq_free_thresh = tx_free_thresh;
+
+	dev->data->tx_queues[queue_idx] = vq;
+	return 0;
+}
+
+static void
+virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
+{
+	int error;
+	/*
+	 * Requeue the discarded mbuf. This should always be
+	 * successful since it was just dequeued.
+	 */
+	error = virtqueue_enqueue_recv_refill(vq, m);
+	if (unlikely(error)) {
+		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
+		rte_pktmbuf_free(m);
+	}
+}
+
+#define VIRTIO_MBUF_BURST_SZ 64
+#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
+uint16_t
+virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	struct virtqueue *rxvq = rx_queue;
+	struct virtio_hw *hw;
+	struct rte_mbuf *rxm, *new_mbuf;
+	uint16_t nb_used, num, nb_rx;
+	uint32_t len[VIRTIO_MBUF_BURST_SZ];
+	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
+	int error;
+	uint32_t i, nb_enqueued;
+	const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
+
+	nb_used = VIRTQUEUE_NUSED(rxvq);
+
+	virtio_rmb();
+
+	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
+	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
+	if (likely(num > DESC_PER_CACHELINE))
+		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
+
+	if (num == 0)
+		return 0;
+
+	num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
+	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
+
+	hw = rxvq->hw;
+	nb_rx = 0;
+	nb_enqueued = 0;
+
+	for (i = 0; i < num ; i++) {
+		rxm = rcv_pkts[i];
+
+		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
+
+		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
+			PMD_RX_LOG(ERR, "Packet drop");
+			nb_enqueued++;
+			virtio_discard_rxbuf(rxvq, rxm);
+			rxvq->errors++;
+			continue;
+		}
+
+		rxm->port = rxvq->port_id;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
+		rxm->data_len = (uint16_t)(len[i] - hdr_size);
+
+		if (hw->vlan_strip)
+			rte_vlan_strip(rxm);
+
+		VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
+
+		rx_pkts[nb_rx++] = rxm;
+		rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
+	}
+
+	rxvq->packets += nb_rx;
+
+	/* Allocate new mbuf for the used descriptor */
+	error = ENOSPC;
+	while (likely(!virtqueue_full(rxvq))) {
+		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
+		if (unlikely(new_mbuf == NULL)) {
+			struct rte_eth_dev *dev
+				= &rte_eth_devices[rxvq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
+		if (unlikely(error)) {
+			rte_pktmbuf_free(new_mbuf);
+			break;
+		}
+		nb_enqueued++;
+	}
+
+	if (likely(nb_enqueued)) {
+		vq_update_avail_idx(rxvq);
+
+		if (unlikely(virtqueue_kick_prepare(rxvq))) {
+			virtqueue_notify(rxvq);
+			PMD_RX_LOG(DEBUG, "Notified\n");
+		}
+	}
+
+	return nb_rx;
+}
+
+uint16_t
+virtio_recv_mergeable_pkts(void *rx_queue,
+			struct rte_mbuf **rx_pkts,
+			uint16_t nb_pkts)
+{
+	struct virtqueue *rxvq = rx_queue;
+	struct virtio_hw *hw;
+	struct rte_mbuf *rxm, *new_mbuf;
+	uint16_t nb_used, num, nb_rx;
+	uint32_t len[VIRTIO_MBUF_BURST_SZ];
+	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
+	struct rte_mbuf *prev;
+	int error;
+	uint32_t i, nb_enqueued;
+	uint32_t seg_num;
+	uint16_t extra_idx;
+	uint32_t seg_res;
+	const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+
+	nb_used = VIRTQUEUE_NUSED(rxvq);
+
+	virtio_rmb();
+
+	if (nb_used == 0)
+		return 0;
+
+	PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
+
+	hw = rxvq->hw;
+	nb_rx = 0;
+	i = 0;
+	nb_enqueued = 0;
+	seg_num = 0;
+	extra_idx = 0;
+	seg_res = 0;
+
+	while (i < nb_used) {
+		struct virtio_net_hdr_mrg_rxbuf *header;
+
+		if (nb_rx == nb_pkts)
+			break;
+
+		num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
+		if (num != 1)
+			continue;
+
+		i++;
+
+		PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
+		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
+
+		rxm = rcv_pkts[0];
+
+		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
+			PMD_RX_LOG(ERR, "Packet drop\n");
+			nb_enqueued++;
+			virtio_discard_rxbuf(rxvq, rxm);
+			rxvq->errors++;
+			continue;
+		}
+
+		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
+			RTE_PKTMBUF_HEADROOM - hdr_size);
+		seg_num = header->num_buffers;
+
+		if (seg_num == 0)
+			seg_num = 1;
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rxm->nb_segs = seg_num;
+		rxm->next = NULL;
+		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
+		rxm->data_len = (uint16_t)(len[0] - hdr_size);
+
+		rxm->port = rxvq->port_id;
+		rx_pkts[nb_rx] = rxm;
+		prev = rxm;
+
+		seg_res = seg_num - 1;
+
+		while (seg_res != 0) {
+			/*
+			 * Get extra segments for current uncompleted packet.
+			 */
+			uint16_t  rcv_cnt =
+				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
+			if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
+				uint32_t rx_num =
+					virtqueue_dequeue_burst_rx(rxvq,
+					rcv_pkts, len, rcv_cnt);
+				i += rx_num;
+				rcv_cnt = rx_num;
+			} else {
+				PMD_RX_LOG(ERR,
+					"No enough segments for packet.\n");
+				nb_enqueued++;
+				virtio_discard_rxbuf(rxvq, rxm);
+				rxvq->errors++;
+				break;
+			}
+
+			extra_idx = 0;
+
+			while (extra_idx < rcv_cnt) {
+				rxm = rcv_pkts[extra_idx];
+
+				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
+				rxm->next = NULL;
+				rxm->pkt_len = (uint32_t)(len[extra_idx]);
+				rxm->data_len = (uint16_t)(len[extra_idx]);
+
+				if (prev)
+					prev->next = rxm;
+
+				prev = rxm;
+				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
+				extra_idx++;
+			};
+			seg_res -= rcv_cnt;
+		}
+
+		if (hw->vlan_strip)
+			rte_vlan_strip(rx_pkts[nb_rx]);
+
+		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
+			rx_pkts[nb_rx]->data_len);
+
+		rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
+		nb_rx++;
+	}
+
+	rxvq->packets += nb_rx;
+
+	/* Allocate new mbuf for the used descriptor */
+	error = ENOSPC;
+	while (likely(!virtqueue_full(rxvq))) {
+		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
+		if (unlikely(new_mbuf == NULL)) {
+			struct rte_eth_dev *dev
+				= &rte_eth_devices[rxvq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
+		if (unlikely(error)) {
+			rte_pktmbuf_free(new_mbuf);
+			break;
+		}
+		nb_enqueued++;
+	}
+
+	if (likely(nb_enqueued)) {
+		vq_update_avail_idx(rxvq);
+
+		if (unlikely(virtqueue_kick_prepare(rxvq))) {
+			virtqueue_notify(rxvq);
+			PMD_RX_LOG(DEBUG, "Notified");
+		}
+	}
+
+	return nb_rx;
+}
+
+uint16_t
+virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct virtqueue *txvq = tx_queue;
+	struct rte_mbuf *txm;
+	uint16_t nb_used, nb_tx;
+	int error;
+
+	if (unlikely(nb_pkts < 1))
+		return nb_pkts;
+
+	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
+	nb_used = VIRTQUEUE_NUSED(txvq);
+
+	virtio_rmb();
+	if (likely(nb_used > txvq->vq_free_thresh))
+		virtio_xmit_cleanup(txvq, nb_used);
+
+	nb_tx = 0;
+
+	while (nb_tx < nb_pkts) {
+		/* Need one more descriptor for virtio header. */
+		int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
+
+		/*Positive value indicates it need free vring descriptors */
+		if (unlikely(need > 0)) {
+			nb_used = VIRTQUEUE_NUSED(txvq);
+			virtio_rmb();
+			need = RTE_MIN(need, (int)nb_used);
+
+			virtio_xmit_cleanup(txvq, need);
+			need = (int)tx_pkts[nb_tx]->nb_segs -
+				txvq->vq_free_cnt + 1;
+		}
+
+		/*
+		 * Zero or negative value indicates it has enough free
+		 * descriptors to use for transmitting.
+		 */
+		if (likely(need <= 0)) {
+			txm = tx_pkts[nb_tx];
+
+			/* Do VLAN tag insertion */
+			if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
+				error = rte_vlan_insert(&txm);
+				if (unlikely(error)) {
+					rte_pktmbuf_free(txm);
+					++nb_tx;
+					continue;
+				}
+			}
+
+			/* Enqueue Packet buffers */
+			error = virtqueue_enqueue_xmit(txvq, txm);
+			if (unlikely(error)) {
+				if (error == ENOSPC)
+					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
+				else if (error == EMSGSIZE)
+					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1");
+				else
+					PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d", error);
+				break;
+			}
+			nb_tx++;
+			txvq->bytes += txm->pkt_len;
+		} else {
+			PMD_TX_LOG(ERR, "No free tx descriptors to transmit");
+			break;
+		}
+	}
+
+	txvq->packets += nb_tx;
+
+	if (likely(nb_tx)) {
+		vq_update_avail_idx(txvq);
+
+		if (unlikely(virtqueue_kick_prepare(txvq))) {
+			virtqueue_notify(txvq);
+			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
+		}
+	}
+
+	return nb_tx;
+}
diff --git a/drivers/net/virtio/virtqueue.c b/drivers/net/virtio/virtqueue.c
new file mode 100644
index 0000000..8a3005f
--- /dev/null
+++ b/drivers/net/virtio/virtqueue.c
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+
+#include "virtqueue.h"
+#include "virtio_logs.h"
+#include "virtio_pci.h"
+
+void
+virtqueue_disable_intr(struct virtqueue *vq)
+{
+	/*
+	 * Set VRING_AVAIL_F_NO_INTERRUPT to hint host
+	 * not to interrupt when it consumes packets
+	 * Note: this is only considered a hint to the host
+	 */
+	vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+}
+
+/*
+ * Two types of mbuf to be cleaned:
+ * 1) mbuf that has been consumed by backend but not used by virtio.
+ * 2) mbuf that hasn't been consued by backend.
+ */
+struct rte_mbuf *
+virtqueue_detatch_unused(struct virtqueue *vq)
+{
+	struct rte_mbuf *cookie;
+	int idx;
+
+	for (idx = 0; idx < vq->vq_nentries; idx++) {
+		if ((cookie = vq->vq_descx[idx].cookie) != NULL) {
+			vq->vq_descx[idx].cookie = NULL;
+			return cookie;
+		}
+	}
+	return NULL;
+}
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
new file mode 100644
index 0000000..9d6079e
--- /dev/null
+++ b/drivers/net/virtio/virtqueue.h
@@ -0,0 +1,325 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTQUEUE_H_
+#define _VIRTQUEUE_H_
+
+#include <stdint.h>
+
+#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+
+#include "virtio_pci.h"
+#include "virtio_ring.h"
+#include "virtio_logs.h"
+
+struct rte_mbuf;
+
+/*
+ * Per virtio_config.h in Linux.
+ *     For virtio_pci on SMP, we don't need to order with respect to MMIO
+ *     accesses through relaxed memory I/O windows, so smp_mb() et al are
+ *     sufficient.
+ *
+ * This driver is for virtio_pci on SMP and therefore can assume
+ * weaker (compiler barriers)
+ */
+#define virtio_mb()	rte_mb()
+#define virtio_rmb()	rte_compiler_barrier()
+#define virtio_wmb()	rte_compiler_barrier()
+
+#ifdef RTE_PMD_PACKET_PREFETCH
+#define rte_packet_prefetch(p)  rte_prefetch1(p)
+#else
+#define rte_packet_prefetch(p)  do {} while(0)
+#endif
+
+#define VIRTQUEUE_MAX_NAME_SZ 32
+
+#define RTE_MBUF_DATA_DMA_ADDR(mb) \
+	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
+
+#define VTNET_SQ_RQ_QUEUE_IDX 0
+#define VTNET_SQ_TQ_QUEUE_IDX 1
+#define VTNET_SQ_CQ_QUEUE_IDX 2
+
+enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
+/**
+ * The maximum virtqueue size is 2^15. Use that value as the end of
+ * descriptor chain terminator since it will never be a valid index
+ * in the descriptor table. This is used to verify we are correctly
+ * handling vq_free_cnt.
+ */
+#define VQ_RING_DESC_CHAIN_END 32768
+
+/**
+ * Control the RX mode, ie. promiscuous, allmulti, etc...
+ * All commands require an "out" sg entry containing a 1 byte
+ * state value, zero = disable, non-zero = enable.  Commands
+ * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
+ * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
+ */
+#define VIRTIO_NET_CTRL_RX              0
+#define VIRTIO_NET_CTRL_RX_PROMISC      0
+#define VIRTIO_NET_CTRL_RX_ALLMULTI     1
+#define VIRTIO_NET_CTRL_RX_ALLUNI       2
+#define VIRTIO_NET_CTRL_RX_NOMULTI      3
+#define VIRTIO_NET_CTRL_RX_NOUNI        4
+#define VIRTIO_NET_CTRL_RX_NOBCAST      5
+
+/**
+ * Control the MAC
+ *
+ * The MAC filter table is managed by the hypervisor, the guest should
+ * assume the size is infinite.  Filtering should be considered
+ * non-perfect, ie. based on hypervisor resources, the guest may
+ * received packets from sources not specified in the filter list.
+ *
+ * In addition to the class/cmd header, the TABLE_SET command requires
+ * two out scatterlists.  Each contains a 4 byte count of entries followed
+ * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
+ * first sg list contains unicast addresses, the second is for multicast.
+ * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
+ * is available.
+ *
+ * The ADDR_SET command requests one out scatterlist, it contains a
+ * 6 bytes MAC address. This functionality is present if the
+ * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
+ */
+struct virtio_net_ctrl_mac {
+	uint32_t entries;
+	uint8_t macs[][ETHER_ADDR_LEN];
+} __attribute__((__packed__));
+
+#define VIRTIO_NET_CTRL_MAC    1
+ #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
+ #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
+
+/**
+ * Control VLAN filtering
+ *
+ * The VLAN filter table is controlled via a simple ADD/DEL interface.
+ * VLAN IDs not added may be filtered by the hypervisor.  Del is the
+ * opposite of add.  Both commands expect an out entry containing a 2
+ * byte VLAN ID.  VLAN filtering is available with the
+ * VIRTIO_NET_F_CTRL_VLAN feature bit.
+ */
+#define VIRTIO_NET_CTRL_VLAN     2
+#define VIRTIO_NET_CTRL_VLAN_ADD 0
+#define VIRTIO_NET_CTRL_VLAN_DEL 1
+
+struct virtio_net_ctrl_hdr {
+	uint8_t class;
+	uint8_t cmd;
+} __attribute__((packed));
+
+typedef uint8_t virtio_net_ctrl_ack;
+
+#define VIRTIO_NET_OK     0
+#define VIRTIO_NET_ERR    1
+
+#define VIRTIO_MAX_CTRL_DATA 2048
+
+struct virtio_pmd_ctrl {
+	struct virtio_net_ctrl_hdr hdr;
+	virtio_net_ctrl_ack status;
+	uint8_t data[VIRTIO_MAX_CTRL_DATA];
+};
+
+struct virtqueue {
+	struct virtio_hw         *hw;     /**< virtio_hw structure pointer. */
+	const struct rte_memzone *mz;     /**< mem zone to populate RX ring. */
+	const struct rte_memzone *virtio_net_hdr_mz; /**< memzone to populate hdr. */
+	struct rte_mempool       *mpool;  /**< mempool for mbuf allocation */
+	uint16_t    queue_id;             /**< DPDK queue index. */
+	uint8_t     port_id;              /**< Device port identifier. */
+	uint16_t    vq_queue_index;       /**< PCI queue index */
+
+	void        *vq_ring_virt_mem;    /**< linear address of vring*/
+	unsigned int vq_ring_size;
+	phys_addr_t vq_ring_mem;          /**< physical address of vring */
+
+	struct vring vq_ring;    /**< vring keeping desc, used and avail */
+	uint16_t    vq_free_cnt; /**< num of desc available */
+	uint16_t    vq_nentries; /**< vring desc numbers */
+	uint16_t    vq_free_thresh; /**< free threshold */
+	/**
+	 * Head of the free chain in the descriptor table. If
+	 * there are no free descriptors, this will be set to
+	 * VQ_RING_DESC_CHAIN_END.
+	 */
+	uint16_t  vq_desc_head_idx;
+	uint16_t  vq_desc_tail_idx;
+	/**
+	 * Last consumed descriptor in the used table,
+	 * trails vq_ring.used->idx.
+	 */
+	uint16_t vq_used_cons_idx;
+	uint16_t vq_avail_idx;
+	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
+
+	/* Statistics */
+	uint64_t	packets;
+	uint64_t	bytes;
+	uint64_t	errors;
+
+	struct vq_desc_extra {
+		void              *cookie;
+		uint16_t          ndescs;
+	} vq_descx[0];
+};
+
+/* If multiqueue is provided by host, then we suppport it. */
+#ifndef VIRTIO_NET_F_MQ
+/* Device supports Receive Flow Steering */
+#define VIRTIO_NET_F_MQ 0x400000
+#define VIRTIO_NET_CTRL_MQ   4
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
+#endif
+#ifndef VIRTIO_NET_F_CTRL_MAC_ADDR
+#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000
+#define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
+#endif
+
+/**
+ * This is the first element of the scatter-gather list.  If you don't
+ * specify GSO or CSUM features, you can simply ignore the header.
+ */
+struct virtio_net_hdr {
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
+	uint8_t flags;
+#define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
+#define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP      3    /**< GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6    4    /**< GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN      0x80 /**< TCP has ECN set */
+	uint8_t gso_type;
+	uint16_t hdr_len;     /**< Ethernet + IP + tcp/udp hdrs */
+	uint16_t gso_size;    /**< Bytes to append to hdr_len per frame */
+	uint16_t csum_start;  /**< Position to start checksumming from */
+	uint16_t csum_offset; /**< Offset after that to place checksum */
+};
+
+/**
+ * This is the version of the header to use when the MRG_RXBUF
+ * feature has been negotiated.
+ */
+struct virtio_net_hdr_mrg_rxbuf {
+	struct   virtio_net_hdr hdr;
+	uint16_t num_buffers; /**< Number of merged rx buffers */
+};
+
+/**
+ * Tell the backend not to interrupt us.
+ */
+void virtqueue_disable_intr(struct virtqueue *vq);
+/**
+ *  Dump virtqueue internal structures, for debug purpose only.
+ */
+void virtqueue_dump(struct virtqueue *vq);
+/**
+ *  Get all mbufs to be freed.
+ */
+struct rte_mbuf *virtqueue_detatch_unused(struct virtqueue *vq);
+
+static inline int
+virtqueue_full(const struct virtqueue *vq)
+{
+	return vq->vq_free_cnt == 0;
+}
+
+#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx))
+
+static inline void
+vq_update_avail_idx(struct virtqueue *vq)
+{
+	virtio_wmb();
+	vq->vq_ring.avail->idx = vq->vq_avail_idx;
+}
+
+static inline void
+vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
+{
+	uint16_t avail_idx;
+	/*
+	 * Place the head of the descriptor chain into the next slot and make
+	 * it usable to the host. The chain is made available now rather than
+	 * deferring to virtqueue_notify() in the hopes that if the host is
+	 * currently running on another CPU, we can keep it processing the new
+	 * descriptor.
+	 */
+	avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
+	vq->vq_ring.avail->ring[avail_idx] = desc_idx;
+	vq->vq_avail_idx++;
+}
+
+static inline int
+virtqueue_kick_prepare(struct virtqueue *vq)
+{
+	return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY);
+}
+
+static inline void
+virtqueue_notify(struct virtqueue *vq)
+{
+	/*
+	 * Ensure updated avail->idx is visible to host.
+	 * For virtio on IA, the notificaiton is through io port operation
+	 * which is a serialization instruction itself.
+	 */
+	VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_NOTIFY, vq->vq_queue_index);
+}
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
+#define VIRTQUEUE_DUMP(vq) do { \
+	uint16_t used_idx, nused; \
+	used_idx = (vq)->vq_ring.used->idx; \
+	nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
+	PMD_INIT_LOG(DEBUG, \
+	  "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
+	  " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
+	  " avail.flags=0x%x; used.flags=0x%x", \
+	  (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \
+	  (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \
+	  (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \
+	  (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \
+} while (0)
+#else
+#define VIRTQUEUE_DUMP(vq) do { } while (0)
+#endif
+
+#endif /* _VIRTQUEUE_H_ */
diff --git a/lib/Makefile b/lib/Makefile
index 0bb87ba..3e5cab0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -41,7 +41,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += librte_timer
 DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
 DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
-DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
diff --git a/lib/librte_pmd_virtio/Makefile b/lib/librte_pmd_virtio/Makefile
deleted file mode 100644
index 21ff7e5..0000000
--- a/lib/librte_pmd_virtio/Makefile
+++ /dev/null
@@ -1,60 +0,0 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in
-#       the documentation and/or other materials provided with the
-#       distribution.
-#     * Neither the name of Intel Corporation nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# library name
-#
-LIB = librte_pmd_virtio.a
-
-CFLAGS += -O3
-CFLAGS += $(WERROR_FLAGS)
-
-EXPORT_MAP := rte_pmd_virtio_version.map
-
-LIBABIVER := 1
-
-#
-# all source are stored in SRCS-y
-#
-SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
-SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
-SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
-SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
-
-# this lib depends upon:
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_net lib/librte_malloc
-
-include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_virtio/rte_pmd_virtio_version.map b/lib/librte_pmd_virtio/rte_pmd_virtio_version.map
deleted file mode 100644
index ef35398..0000000
--- a/lib/librte_pmd_virtio/rte_pmd_virtio_version.map
+++ /dev/null
@@ -1,4 +0,0 @@
-DPDK_2.0 {
-
-	local: *;
-};
diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c
deleted file mode 100644
index e63dbfb..0000000
--- a/lib/librte_pmd_virtio/virtio_ethdev.c
+++ /dev/null
@@ -1,1504 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <string.h>
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
-#ifdef RTE_EXEC_ENV_LINUXAPP
-#include <dirent.h>
-#include <fcntl.h>
-#endif
-
-#include <rte_ethdev.h>
-#include <rte_memcpy.h>
-#include <rte_string_fns.h>
-#include <rte_memzone.h>
-#include <rte_malloc.h>
-#include <rte_atomic.h>
-#include <rte_branch_prediction.h>
-#include <rte_pci.h>
-#include <rte_ether.h>
-#include <rte_common.h>
-
-#include <rte_memory.h>
-#include <rte_eal.h>
-#include <rte_dev.h>
-
-#include "virtio_ethdev.h"
-#include "virtio_pci.h"
-#include "virtio_logs.h"
-#include "virtqueue.h"
-
-
-static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
-static int  virtio_dev_configure(struct rte_eth_dev *dev);
-static int  virtio_dev_start(struct rte_eth_dev *dev);
-static void virtio_dev_stop(struct rte_eth_dev *dev);
-static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
-static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
-static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
-static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
-static void virtio_dev_info_get(struct rte_eth_dev *dev,
-				struct rte_eth_dev_info *dev_info);
-static int virtio_dev_link_update(struct rte_eth_dev *dev,
-	__rte_unused int wait_to_complete);
-
-static void virtio_set_hwaddr(struct virtio_hw *hw);
-static void virtio_get_hwaddr(struct virtio_hw *hw);
-
-static void virtio_dev_rx_queue_release(__rte_unused void *rxq);
-static void virtio_dev_tx_queue_release(__rte_unused void *txq);
-
-static void virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
-static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
-static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
-static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
-				uint16_t vlan_id, int on);
-static void virtio_mac_addr_add(struct rte_eth_dev *dev,
-				struct ether_addr *mac_addr,
-				uint32_t index, uint32_t vmdq __rte_unused);
-static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
-static void virtio_mac_addr_set(struct rte_eth_dev *dev,
-				struct ether_addr *mac_addr);
-
-static int virtio_dev_queue_stats_mapping_set(
-	__rte_unused struct rte_eth_dev *eth_dev,
-	__rte_unused uint16_t queue_id,
-	__rte_unused uint8_t stat_idx,
-	__rte_unused uint8_t is_rx);
-
-/*
- * The set of PCI devices this driver supports
- */
-static const struct rte_pci_id pci_id_virtio_map[] = {
-
-#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
-#include "rte_pci_dev_ids.h"
-
-{ .vendor_id = 0, /* sentinel */ },
-};
-
-static int
-virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
-		int *dlen, int pkt_num)
-{
-	uint16_t head = vq->vq_desc_head_idx, i;
-	int k, sum = 0;
-	virtio_net_ctrl_ack status = ~0;
-	struct virtio_pmd_ctrl result;
-
-	ctrl->status = status;
-
-	if (!vq->hw->cvq) {
-		PMD_INIT_LOG(ERR,
-			     "%s(): Control queue is not supported.",
-			     __func__);
-		return -1;
-	}
-
-	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
-		"vq->hw->cvq = %p vq = %p",
-		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
-
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
-		return -1;
-
-	memcpy(vq->virtio_net_hdr_mz->addr, ctrl,
-		sizeof(struct virtio_pmd_ctrl));
-
-	/*
-	 * Format is enforced in qemu code:
-	 * One TX packet for header;
-	 * At least one TX packet per argument;
-	 * One RX packet for ACK.
-	 */
-	vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
-	vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
-	vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
-	vq->vq_free_cnt--;
-	i = vq->vq_ring.desc[head].next;
-
-	for (k = 0; k < pkt_num; k++) {
-		vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
-		vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
-			+ sizeof(struct virtio_net_ctrl_hdr)
-			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
-		vq->vq_ring.desc[i].len = dlen[k];
-		sum += dlen[k];
-		vq->vq_free_cnt--;
-		i = vq->vq_ring.desc[i].next;
-	}
-
-	vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
-	vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
-			+ sizeof(struct virtio_net_ctrl_hdr);
-	vq->vq_ring.desc[i].len = sizeof(ctrl->status);
-	vq->vq_free_cnt--;
-
-	vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
-
-	vq_update_avail_ring(vq, head);
-	vq_update_avail_idx(vq);
-
-	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
-
-	virtqueue_notify(vq);
-
-	rte_rmb();
-	while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
-		rte_rmb();
-		usleep(100);
-	}
-
-	while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
-		uint32_t idx, desc_idx, used_idx;
-		struct vring_used_elem *uep;
-
-		used_idx = (uint32_t)(vq->vq_used_cons_idx
-				& (vq->vq_nentries - 1));
-		uep = &vq->vq_ring.used->ring[used_idx];
-		idx = (uint32_t) uep->id;
-		desc_idx = idx;
-
-		while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
-			desc_idx = vq->vq_ring.desc[desc_idx].next;
-			vq->vq_free_cnt++;
-		}
-
-		vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
-		vq->vq_desc_head_idx = idx;
-
-		vq->vq_used_cons_idx++;
-		vq->vq_free_cnt++;
-	}
-
-	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
-			vq->vq_free_cnt, vq->vq_desc_head_idx);
-
-	memcpy(&result, vq->virtio_net_hdr_mz->addr,
-			sizeof(struct virtio_pmd_ctrl));
-
-	return result.status;
-}
-
-static int
-virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
-	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
-
-	dlen[0] = sizeof(uint16_t);
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret) {
-		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
-			  "failed, this is too late now...");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int virtio_dev_queue_setup(struct rte_eth_dev *dev,
-			int queue_type,
-			uint16_t queue_idx,
-			uint16_t  vtpci_queue_idx,
-			uint16_t nb_desc,
-			unsigned int socket_id,
-			struct virtqueue **pvq)
-{
-	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
-	const struct rte_memzone *mz;
-	uint16_t vq_size;
-	int size;
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtqueue  *vq = NULL;
-
-	/* Write the virtqueue index to the Queue Select Field */
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vtpci_queue_idx);
-	PMD_INIT_LOG(DEBUG, "selecting queue: %d", vtpci_queue_idx);
-
-	/*
-	 * Read the virtqueue size from the Queue Size field
-	 * Always power of 2 and if 0 virtqueue does not exist
-	 */
-	vq_size = VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
-	PMD_INIT_LOG(DEBUG, "vq_size: %d nb_desc:%d", vq_size, nb_desc);
-	if (nb_desc == 0)
-		nb_desc = vq_size;
-	if (vq_size == 0) {
-		PMD_INIT_LOG(ERR, "%s: virtqueue does not exist", __func__);
-		return -EINVAL;
-	} else if (!rte_is_power_of_2(vq_size)) {
-		PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2", __func__);
-		return -EINVAL;
-	} else if (nb_desc != vq_size) {
-		PMD_INIT_LOG(ERR, "Warning: nb_desc(%d) is not equal to vq size (%d), fall to vq size",
-			nb_desc, vq_size);
-		nb_desc = vq_size;
-	}
-
-	if (queue_type == VTNET_RQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d",
-			dev->data->port_id, queue_idx);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
-	} else if (queue_type == VTNET_TQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
-			dev->data->port_id, queue_idx);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
-	} else if (queue_type == VTNET_CQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_cvq",
-			dev->data->port_id);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra),
-			RTE_CACHE_LINE_SIZE);
-	}
-	if (vq == NULL) {
-		PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
-		return (-ENOMEM);
-	}
-
-	vq->hw = hw;
-	vq->port_id = dev->data->port_id;
-	vq->queue_id = queue_idx;
-	vq->vq_queue_index = vtpci_queue_idx;
-	vq->vq_nentries = vq_size;
-	vq->vq_free_cnt = vq_size;
-
-	/*
-	 * Reserve a memzone for vring elements
-	 */
-	size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
-	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
-	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size);
-
-	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
-		socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
-	if (mz == NULL) {
-		rte_free(vq);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
-	 * and only accepts 32 bit page frame number.
-	 * Check if the allocated physical memory exceeds 16TB.
-	 */
-	if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
-		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
-		rte_free(vq);
-		return -ENOMEM;
-	}
-
-	memset(mz->addr, 0, sizeof(mz->len));
-	vq->mz = mz;
-	vq->vq_ring_mem = mz->phys_addr;
-	vq->vq_ring_virt_mem = mz->addr;
-	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%"PRIx64, (uint64_t)mz->phys_addr);
-	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)mz->addr);
-	vq->virtio_net_hdr_mz  = NULL;
-	vq->virtio_net_hdr_mem = 0;
-
-	if (queue_type == VTNET_TQ) {
-		/*
-		 * For each xmit packet, allocate a virtio_net_hdr
-		 */
-		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
-			dev->data->port_id, queue_idx);
-		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
-			vq_size * hw->vtnet_hdr_size,
-			socket_id, 0, RTE_CACHE_LINE_SIZE);
-		if (vq->virtio_net_hdr_mz == NULL) {
-			rte_free(vq);
-			return -ENOMEM;
-		}
-		vq->virtio_net_hdr_mem =
-			vq->virtio_net_hdr_mz->phys_addr;
-		memset(vq->virtio_net_hdr_mz->addr, 0,
-			vq_size * hw->vtnet_hdr_size);
-	} else if (queue_type == VTNET_CQ) {
-		/* Allocate a page for control vq command, data and status */
-		snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
-			dev->data->port_id);
-		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
-			PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE);
-		if (vq->virtio_net_hdr_mz == NULL) {
-			rte_free(vq);
-			return -ENOMEM;
-		}
-		vq->virtio_net_hdr_mem =
-			vq->virtio_net_hdr_mz->phys_addr;
-		memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
-	}
-
-	/*
-	 * Set guest physical address of the virtqueue
-	 * in VIRTIO_PCI_QUEUE_PFN config register of device
-	 */
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN,
-			mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
-	*pvq = vq;
-	return 0;
-}
-
-static int
-virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
-		uint32_t socket_id)
-{
-	struct virtqueue *vq;
-	uint16_t nb_desc = 0;
-	int ret;
-	struct virtio_hw *hw = dev->data->dev_private;
-
-	PMD_INIT_FUNC_TRACE();
-	ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
-			vtpci_queue_idx, nb_desc, socket_id, &vq);
-
-	if (ret < 0) {
-		PMD_INIT_LOG(ERR, "control vq initialization failed");
-		return ret;
-	}
-
-	hw->cvq = vq;
-	return 0;
-}
-
-static void
-virtio_dev_close(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
-
-	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
-
-	/* reset the NIC */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
-	vtpci_reset(hw);
-	hw->started = 0;
-	virtio_dev_free_mbufs(dev);
-}
-
-static void
-virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
-	ctrl.data[0] = 1;
-	dlen[0] = 1;
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret)
-		PMD_INIT_LOG(ERR, "Failed to enable promisc");
-}
-
-static void
-virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
-	ctrl.data[0] = 0;
-	dlen[0] = 1;
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret)
-		PMD_INIT_LOG(ERR, "Failed to disable promisc");
-}
-
-static void
-virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
-	ctrl.data[0] = 1;
-	dlen[0] = 1;
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret)
-		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
-}
-
-static void
-virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
-	ctrl.data[0] = 0;
-	dlen[0] = 1;
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret)
-		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
-}
-
-/*
- * dev_ops for virtio, bare necessities for basic operation
- */
-static const struct eth_dev_ops virtio_eth_dev_ops = {
-	.dev_configure           = virtio_dev_configure,
-	.dev_start               = virtio_dev_start,
-	.dev_stop                = virtio_dev_stop,
-	.dev_close               = virtio_dev_close,
-	.promiscuous_enable      = virtio_dev_promiscuous_enable,
-	.promiscuous_disable     = virtio_dev_promiscuous_disable,
-	.allmulticast_enable     = virtio_dev_allmulticast_enable,
-	.allmulticast_disable    = virtio_dev_allmulticast_disable,
-
-	.dev_infos_get           = virtio_dev_info_get,
-	.stats_get               = virtio_dev_stats_get,
-	.stats_reset             = virtio_dev_stats_reset,
-	.link_update             = virtio_dev_link_update,
-	.rx_queue_setup          = virtio_dev_rx_queue_setup,
-	/* meaningfull only to multiple queue */
-	.rx_queue_release        = virtio_dev_rx_queue_release,
-	.tx_queue_setup          = virtio_dev_tx_queue_setup,
-	/* meaningfull only to multiple queue */
-	.tx_queue_release        = virtio_dev_tx_queue_release,
-	/* collect stats per queue */
-	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
-	.vlan_filter_set         = virtio_vlan_filter_set,
-	.mac_addr_add            = virtio_mac_addr_add,
-	.mac_addr_remove         = virtio_mac_addr_remove,
-	.mac_addr_set            = virtio_mac_addr_set,
-};
-
-static inline int
-virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev,
-				struct rte_eth_link *link)
-{
-	struct rte_eth_link *dst = link;
-	struct rte_eth_link *src = &(dev->data->dev_link);
-
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-			*(uint64_t *)src) == 0)
-		return -1;
-
-	return 0;
-}
-
-/**
- * Atomically writes the link status information into global
- * structure rte_eth_dev.
- *
- * @param dev
- *   - Pointer to the structure rte_eth_dev to read from.
- *   - Pointer to the buffer to be saved with the link status.
- *
- * @return
- *   - On success, zero.
- *   - On failure, negative value.
- */
-static inline int
-virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev,
-		struct rte_eth_link *link)
-{
-	struct rte_eth_link *dst = &(dev->data->dev_link);
-	struct rte_eth_link *src = link;
-
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-					*(uint64_t *)src) == 0)
-		return -1;
-
-	return 0;
-}
-
-static void
-virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
-{
-	unsigned i;
-
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		const struct virtqueue *txvq = dev->data->tx_queues[i];
-		if (txvq == NULL)
-			continue;
-
-		stats->opackets += txvq->packets;
-		stats->obytes += txvq->bytes;
-		stats->oerrors += txvq->errors;
-
-		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-			stats->q_opackets[i] = txvq->packets;
-			stats->q_obytes[i] = txvq->bytes;
-		}
-	}
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		const struct virtqueue *rxvq = dev->data->rx_queues[i];
-		if (rxvq == NULL)
-			continue;
-
-		stats->ipackets += rxvq->packets;
-		stats->ibytes += rxvq->bytes;
-		stats->ierrors += rxvq->errors;
-
-		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-			stats->q_ipackets[i] = rxvq->packets;
-			stats->q_ibytes[i] = rxvq->bytes;
-		}
-	}
-
-	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
-}
-
-static void
-virtio_dev_stats_reset(struct rte_eth_dev *dev)
-{
-	unsigned int i;
-
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		struct virtqueue *txvq = dev->data->tx_queues[i];
-		if (txvq == NULL)
-			continue;
-
-		txvq->packets = 0;
-		txvq->bytes = 0;
-		txvq->errors = 0;
-	}
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct virtqueue *rxvq = dev->data->rx_queues[i];
-		if (rxvq == NULL)
-			continue;
-
-		rxvq->packets = 0;
-		rxvq->bytes = 0;
-		rxvq->errors = 0;
-	}
-
-	dev->data->rx_mbuf_alloc_failed = 0;
-}
-
-static void
-virtio_set_hwaddr(struct virtio_hw *hw)
-{
-	vtpci_write_dev_config(hw,
-			offsetof(struct virtio_net_config, mac),
-			&hw->mac_addr, ETHER_ADDR_LEN);
-}
-
-static void
-virtio_get_hwaddr(struct virtio_hw *hw)
-{
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
-		vtpci_read_dev_config(hw,
-			offsetof(struct virtio_net_config, mac),
-			&hw->mac_addr, ETHER_ADDR_LEN);
-	} else {
-		eth_random_addr(&hw->mac_addr[0]);
-		virtio_set_hwaddr(hw);
-	}
-}
-
-static int
-virtio_mac_table_set(struct virtio_hw *hw,
-		     const struct virtio_net_ctrl_mac *uc,
-		     const struct virtio_net_ctrl_mac *mc)
-{
-	struct virtio_pmd_ctrl ctrl;
-	int err, len[2];
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
-
-	len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries);
-	memcpy(ctrl.data, uc, len[0]);
-
-	len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries);
-	memcpy(ctrl.data + len[0], mc, len[1]);
-
-	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
-	if (err != 0)
-		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
-
-	return err;
-}
-
-static void
-virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
-		    uint32_t index, uint32_t vmdq __rte_unused)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	const struct ether_addr *addrs = dev->data->mac_addrs;
-	unsigned int i;
-	struct virtio_net_ctrl_mac *uc, *mc;
-
-	if (index >= VIRTIO_MAX_MAC_ADDRS) {
-		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
-		return;
-	}
-
-	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
-	uc->entries = 0;
-	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
-	mc->entries = 0;
-
-	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
-		const struct ether_addr *addr
-			= (i == index) ? mac_addr : addrs + i;
-		struct virtio_net_ctrl_mac *tbl
-			= is_multicast_ether_addr(addr) ? mc : uc;
-
-		memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN);
-	}
-
-	virtio_mac_table_set(hw, uc, mc);
-}
-
-static void
-virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct ether_addr *addrs = dev->data->mac_addrs;
-	struct virtio_net_ctrl_mac *uc, *mc;
-	unsigned int i;
-
-	if (index >= VIRTIO_MAX_MAC_ADDRS) {
-		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
-		return;
-	}
-
-	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
-	uc->entries = 0;
-	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
-	mc->entries = 0;
-
-	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
-		struct virtio_net_ctrl_mac *tbl;
-
-		if (i == index || is_zero_ether_addr(addrs + i))
-			continue;
-
-		tbl = is_multicast_ether_addr(addrs + i) ? mc : uc;
-		memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN);
-	}
-
-	virtio_mac_table_set(hw, uc, mc);
-}
-
-static void
-virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-
-	memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN);
-
-	/* Use atomic update if available */
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
-		struct virtio_pmd_ctrl ctrl;
-		int len = ETHER_ADDR_LEN;
-
-		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
-		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
-
-		memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN);
-		virtio_send_command(hw->cvq, &ctrl, &len, 1);
-	} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
-		virtio_set_hwaddr(hw);
-}
-
-static int
-virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int len;
-
-	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
-		return -ENOTSUP;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
-	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
-	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
-	len = sizeof(vlan_id);
-
-	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
-}
-
-static void
-virtio_negotiate_features(struct virtio_hw *hw)
-{
-	uint32_t host_features, mask;
-
-	/* checksum offload not implemented */
-	mask = VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
-
-	/* TSO and LRO are only available when their corresponding
-	 * checksum offload feature is also negotiated.
-	 */
-	mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_ECN;
-	mask |= VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN;
-	mask |= VTNET_LRO_FEATURES;
-
-	/* not negotiating INDIRECT descriptor table support */
-	mask |= VIRTIO_RING_F_INDIRECT_DESC;
-
-	/* Prepare guest_features: feature that driver wants to support */
-	hw->guest_features = VTNET_FEATURES & ~mask;
-	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %x",
-		hw->guest_features);
-
-	/* Read device(host) feature bits */
-	host_features = VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES);
-	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %x",
-		host_features);
-
-	/*
-	 * Negotiate features: Subset of device feature bits are written back
-	 * guest feature bits.
-	 */
-	hw->guest_features = vtpci_negotiate_features(hw, host_features);
-	PMD_INIT_LOG(DEBUG, "features after negotiate = %x",
-		hw->guest_features);
-}
-
-#ifdef RTE_EXEC_ENV_LINUXAPP
-static int
-parse_sysfs_value(const char *filename, unsigned long *val)
-{
-	FILE *f;
-	char buf[BUFSIZ];
-	char *end = NULL;
-
-	f = fopen(filename, "r");
-	if (f == NULL) {
-		PMD_INIT_LOG(ERR, "%s(): cannot open sysfs value %s",
-			     __func__, filename);
-		return -1;
-	}
-
-	if (fgets(buf, sizeof(buf), f) == NULL) {
-		PMD_INIT_LOG(ERR, "%s(): cannot read sysfs value %s",
-			     __func__, filename);
-		fclose(f);
-		return -1;
-	}
-	*val = strtoul(buf, &end, 0);
-	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
-		PMD_INIT_LOG(ERR, "%s(): cannot parse sysfs value %s",
-			     __func__, filename);
-		fclose(f);
-		return -1;
-	}
-	fclose(f);
-	return 0;
-}
-
-static int get_uio_dev(struct rte_pci_addr *loc, char *buf, unsigned int buflen,
-			unsigned int *uio_num)
-{
-	struct dirent *e;
-	DIR *dir;
-	char dirname[PATH_MAX];
-
-	/* depending on kernel version, uio can be located in uio/uioX
-	 * or uio:uioX */
-	snprintf(dirname, sizeof(dirname),
-		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
-		     loc->domain, loc->bus, loc->devid, loc->function);
-	dir = opendir(dirname);
-	if (dir == NULL) {
-		/* retry with the parent directory */
-		snprintf(dirname, sizeof(dirname),
-			     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
-			     loc->domain, loc->bus, loc->devid, loc->function);
-		dir = opendir(dirname);
-
-		if (dir == NULL) {
-			PMD_INIT_LOG(ERR, "Cannot opendir %s", dirname);
-			return -1;
-		}
-	}
-
-	/* take the first file starting with "uio" */
-	while ((e = readdir(dir)) != NULL) {
-		/* format could be uio%d ...*/
-		int shortprefix_len = sizeof("uio") - 1;
-		/* ... or uio:uio%d */
-		int longprefix_len = sizeof("uio:uio") - 1;
-		char *endptr;
-
-		if (strncmp(e->d_name, "uio", 3) != 0)
-			continue;
-
-		/* first try uio%d */
-		errno = 0;
-		*uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
-		if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
-			snprintf(buf, buflen, "%s/uio%u", dirname, *uio_num);
-			break;
-		}
-
-		/* then try uio:uio%d */
-		errno = 0;
-		*uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
-		if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
-			snprintf(buf, buflen, "%s/uio:uio%u", dirname,
-				     *uio_num);
-			break;
-		}
-	}
-	closedir(dir);
-
-	/* No uio resource found */
-	if (e == NULL) {
-		PMD_INIT_LOG(ERR, "Could not find uio resource");
-		return -1;
-	}
-
-	return 0;
-}
-
-static int
-virtio_has_msix(const struct rte_pci_addr *loc)
-{
-	DIR *d;
-	char dirname[PATH_MAX];
-
-	snprintf(dirname, sizeof(dirname),
-		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/msi_irqs",
-		     loc->domain, loc->bus, loc->devid, loc->function);
-
-	d = opendir(dirname);
-	if (d)
-		closedir(d);
-
-	return (d != NULL);
-}
-
-/* Extract I/O port numbers from sysfs */
-static int virtio_resource_init_by_uio(struct rte_pci_device *pci_dev)
-{
-	char dirname[PATH_MAX];
-	char filename[PATH_MAX];
-	unsigned long start, size;
-	unsigned int uio_num;
-
-	if (get_uio_dev(&pci_dev->addr, dirname, sizeof(dirname), &uio_num) < 0)
-		return -1;
-
-	/* get portio size */
-	snprintf(filename, sizeof(filename),
-		     "%s/portio/port0/size", dirname);
-	if (parse_sysfs_value(filename, &size) < 0) {
-		PMD_INIT_LOG(ERR, "%s(): cannot parse size",
-			     __func__);
-		return -1;
-	}
-
-	/* get portio start */
-	snprintf(filename, sizeof(filename),
-		 "%s/portio/port0/start", dirname);
-	if (parse_sysfs_value(filename, &start) < 0) {
-		PMD_INIT_LOG(ERR, "%s(): cannot parse portio start",
-			     __func__);
-		return -1;
-	}
-	pci_dev->mem_resource[0].addr = (void *)(uintptr_t)start;
-	pci_dev->mem_resource[0].len =  (uint64_t)size;
-	PMD_INIT_LOG(DEBUG,
-		     "PCI Port IO found start=0x%lx with size=0x%lx",
-		     start, size);
-
-	/* save fd */
-	memset(dirname, 0, sizeof(dirname));
-	snprintf(dirname, sizeof(dirname), "/dev/uio%u", uio_num);
-	pci_dev->intr_handle.fd = open(dirname, O_RDWR);
-	if (pci_dev->intr_handle.fd < 0) {
-		PMD_INIT_LOG(ERR, "Cannot open %s: %s\n",
-			dirname, strerror(errno));
-		return -1;
-	}
-
-	pci_dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
-	pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
-
-	return 0;
-}
-
-/* Extract port I/O numbers from proc/ioports */
-static int virtio_resource_init_by_ioports(struct rte_pci_device *pci_dev)
-{
-	uint16_t start, end;
-	int size;
-	FILE *fp;
-	char *line = NULL;
-	char pci_id[16];
-	int found = 0;
-	size_t linesz;
-
-	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
-		 pci_dev->addr.domain,
-		 pci_dev->addr.bus,
-		 pci_dev->addr.devid,
-		 pci_dev->addr.function);
-
-	fp = fopen("/proc/ioports", "r");
-	if (fp == NULL) {
-		PMD_INIT_LOG(ERR, "%s(): can't open ioports", __func__);
-		return -1;
-	}
-
-	while (getdelim(&line, &linesz, '\n', fp) > 0) {
-		char *ptr = line;
-		char *left;
-		int n;
-
-		n = strcspn(ptr, ":");
-		ptr[n] = 0;
-		left = &ptr[n+1];
-
-		while (*left && isspace(*left))
-			left++;
-
-		if (!strncmp(left, pci_id, strlen(pci_id))) {
-			found = 1;
-
-			while (*ptr && isspace(*ptr))
-				ptr++;
-
-			sscanf(ptr, "%04hx-%04hx", &start, &end);
-			size = end - start + 1;
-
-			break;
-		}
-	}
-
-	free(line);
-	fclose(fp);
-
-	if (!found)
-		return -1;
-
-	pci_dev->mem_resource[0].addr = (void *)(uintptr_t)(uint32_t)start;
-	pci_dev->mem_resource[0].len =  (uint64_t)size;
-	PMD_INIT_LOG(DEBUG,
-		"PCI Port IO found start=0x%x with size=0x%x",
-		start, size);
-
-	/* can't support lsc interrupt without uio */
-	pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
-
-	return 0;
-}
-
-/* Extract I/O port numbers from sysfs */
-static int virtio_resource_init(struct rte_pci_device *pci_dev)
-{
-	if (virtio_resource_init_by_uio(pci_dev) == 0)
-		return 0;
-	else
-		return virtio_resource_init_by_ioports(pci_dev);
-}
-
-#else
-static int
-virtio_has_msix(const struct rte_pci_addr *loc __rte_unused)
-{
-	/* nic_uio does not enable interrupts, return 0 (false). */
-	return 0;
-}
-
-static int virtio_resource_init(struct rte_pci_device *pci_dev __rte_unused)
-{
-	/* no setup required */
-	return 0;
-}
-#endif
-
-/*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
- */
-static void
-virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
-			 void *param)
-{
-	struct rte_eth_dev *dev = param;
-	struct virtio_hw *hw = dev->data->dev_private;
-	uint8_t isr;
-
-	/* Read interrupt status which clears interrupt */
-	isr = vtpci_isr(hw);
-	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
-
-	if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
-		PMD_DRV_LOG(ERR, "interrupt enable failed");
-
-	if (isr & VIRTIO_PCI_ISR_CONFIG) {
-		if (virtio_dev_link_update(dev, 0) == 0)
-			_rte_eth_dev_callback_process(dev,
-						      RTE_ETH_EVENT_INTR_LSC);
-	}
-
-}
-
-static void
-rx_func_get(struct rte_eth_dev *eth_dev)
-{
-	struct virtio_hw *hw = eth_dev->data->dev_private;
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
-		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
-	else
-		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
-}
-
-/*
- * This function is based on probe() function in virtio_pci.c
- * It returns 0 on success.
- */
-static int
-eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
-{
-	struct virtio_hw *hw = eth_dev->data->dev_private;
-	struct virtio_net_config *config;
-	struct virtio_net_config local_config;
-	uint32_t offset_conf = sizeof(config->mac);
-	struct rte_pci_device *pci_dev;
-
-	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
-
-	eth_dev->dev_ops = &virtio_eth_dev_ops;
-	eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
-
-	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		rx_func_get(eth_dev);
-		return 0;
-	}
-
-	/* Allocate memory for storing MAC addresses */
-	eth_dev->data->mac_addrs = rte_zmalloc("virtio", ETHER_ADDR_LEN, 0);
-	if (eth_dev->data->mac_addrs == NULL) {
-		PMD_INIT_LOG(ERR,
-			"Failed to allocate %d bytes needed to store MAC addresses",
-			ETHER_ADDR_LEN);
-		return -ENOMEM;
-	}
-
-	pci_dev = eth_dev->pci_dev;
-	if (virtio_resource_init(pci_dev) < 0)
-		return -1;
-
-	hw->use_msix = virtio_has_msix(&pci_dev->addr);
-	hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
-
-	/* Reset the device although not necessary at startup */
-	vtpci_reset(hw);
-
-	/* Tell the host we've noticed this device. */
-	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
-
-	/* Tell the host we've known how to drive the device. */
-	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
-	virtio_negotiate_features(hw);
-
-	rx_func_get(eth_dev);
-
-	/* Setting up rx_header size for the device */
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
-		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-	else
-		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
-
-	/* Copy the permanent MAC address to: virtio_hw */
-	virtio_get_hwaddr(hw);
-	ether_addr_copy((struct ether_addr *) hw->mac_addr,
-			&eth_dev->data->mac_addrs[0]);
-	PMD_INIT_LOG(DEBUG,
-		     "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
-		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
-		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
-
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
-		config = &local_config;
-
-		if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-			offset_conf += sizeof(config->status);
-		} else {
-			PMD_INIT_LOG(DEBUG,
-				     "VIRTIO_NET_F_STATUS is not supported");
-			config->status = 0;
-		}
-
-		if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
-			offset_conf += sizeof(config->max_virtqueue_pairs);
-		} else {
-			PMD_INIT_LOG(DEBUG,
-				     "VIRTIO_NET_F_MQ is not supported");
-			config->max_virtqueue_pairs = 1;
-		}
-
-		vtpci_read_dev_config(hw, 0, (uint8_t *)config, offset_conf);
-
-		hw->max_rx_queues =
-			(VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
-			VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
-		hw->max_tx_queues =
-			(VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ?
-			VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs;
-
-		virtio_dev_cq_queue_setup(eth_dev,
-					config->max_virtqueue_pairs * 2,
-					SOCKET_ID_ANY);
-
-		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
-				config->max_virtqueue_pairs);
-		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
-		PMD_INIT_LOG(DEBUG,
-				"PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
-				config->mac[0], config->mac[1],
-				config->mac[2], config->mac[3],
-				config->mac[4], config->mac[5]);
-	} else {
-		hw->max_rx_queues = 1;
-		hw->max_tx_queues = 1;
-	}
-
-	eth_dev->data->nb_rx_queues = hw->max_rx_queues;
-	eth_dev->data->nb_tx_queues = hw->max_tx_queues;
-
-	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
-			hw->max_rx_queues, hw->max_tx_queues);
-	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
-			eth_dev->data->port_id, pci_dev->id.vendor_id,
-			pci_dev->id.device_id);
-
-	/* Setup interrupt callback  */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-		rte_intr_callback_register(&pci_dev->intr_handle,
-				   virtio_interrupt_handler, eth_dev);
-
-	virtio_dev_cq_start(eth_dev);
-
-	return 0;
-}
-
-static struct eth_driver rte_virtio_pmd = {
-	{
-		.name = "rte_virtio_pmd",
-		.id_table = pci_id_virtio_map,
-	},
-	.eth_dev_init = eth_virtio_dev_init,
-	.dev_private_size = sizeof(struct virtio_hw),
-};
-
-/*
- * Driver initialization routine.
- * Invoked once at EAL init time.
- * Register itself as the [Poll Mode] Driver of PCI virtio devices.
- * Returns 0 on success.
- */
-static int
-rte_virtio_pmd_init(const char *name __rte_unused,
-		    const char *param __rte_unused)
-{
-	if (rte_eal_iopl_init() != 0) {
-		PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
-		return -1;
-	}
-
-	rte_eth_driver_register(&rte_virtio_pmd);
-	return 0;
-}
-
-/*
- * Only 1 queue is supported, no queue release related operation
- */
-static void
-virtio_dev_rx_queue_release(__rte_unused void *rxq)
-{
-}
-
-static void
-virtio_dev_tx_queue_release(__rte_unused void *txq)
-{
-}
-
-/*
- * Configure virtio device
- * It returns 0 on success.
- */
-static int
-virtio_dev_configure(struct rte_eth_dev *dev)
-{
-	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
-
-	PMD_INIT_LOG(DEBUG, "configure");
-
-	if (rxmode->hw_ip_checksum) {
-		PMD_DRV_LOG(ERR, "HW IP checksum not supported");
-		return (-EINVAL);
-	}
-
-	hw->vlan_strip = rxmode->hw_vlan_strip;
-
-	if (rxmode->hw_vlan_filter
-	    && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
-		PMD_DRV_LOG(NOTICE,
-			    "vlan filtering not available on this host");
-		return -ENOTSUP;
-	}
-
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
-			PMD_DRV_LOG(ERR, "failed to set config vector");
-			return -EBUSY;
-		}
-
-	return 0;
-}
-
-
-static int
-virtio_dev_start(struct rte_eth_dev *dev)
-{
-	uint16_t nb_queues, i;
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
-
-	/* check if lsc interrupt feature is enabled */
-	if ((dev->data->dev_conf.intr_conf.lsc) &&
-		(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
-		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-			PMD_DRV_LOG(ERR, "link status not supported by host");
-			return -ENOTSUP;
-		}
-
-		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
-			PMD_DRV_LOG(ERR, "interrupt enable failed");
-			return -EIO;
-		}
-	}
-
-	/* Initialize Link state */
-	virtio_dev_link_update(dev, 0);
-
-	/* On restart after stop do not touch queues */
-	if (hw->started)
-		return 0;
-
-	/* Do final configuration before rx/tx engine starts */
-	virtio_dev_rxtx_start(dev);
-	vtpci_reinit_complete(hw);
-
-	hw->started = 1;
-
-	/*Notify the backend
-	 *Otherwise the tap backend might already stop its queue due to fullness.
-	 *vhost backend will have no chance to be waked up
-	 */
-	nb_queues = dev->data->nb_rx_queues;
-	if (nb_queues > 1) {
-		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
-			return -EINVAL;
-	}
-
-	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
-
-	for (i = 0; i < nb_queues; i++)
-		virtqueue_notify(dev->data->rx_queues[i]);
-
-	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
-
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
-
-	return 0;
-}
-
-static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
-{
-	struct rte_mbuf *buf;
-	int i, mbuf_num = 0;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		PMD_INIT_LOG(DEBUG,
-			     "Before freeing rxq[%d] used and unused buf", i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
-
-		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
-					dev->data->rx_queues[i])) != NULL) {
-			rte_pktmbuf_free(buf);
-			mbuf_num++;
-		}
-
-		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
-		PMD_INIT_LOG(DEBUG,
-			     "After freeing rxq[%d] used and unused buf", i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
-	}
-
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		PMD_INIT_LOG(DEBUG,
-			     "Before freeing txq[%d] used and unused bufs",
-			     i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
-
-		mbuf_num = 0;
-		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
-					dev->data->tx_queues[i])) != NULL) {
-			rte_pktmbuf_free(buf);
-
-			mbuf_num++;
-		}
-
-		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
-		PMD_INIT_LOG(DEBUG,
-			     "After freeing txq[%d] used and unused buf", i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
-	}
-}
-
-/*
- * Stop device: disable interrupt and mark link down
- */
-static void
-virtio_dev_stop(struct rte_eth_dev *dev)
-{
-	struct rte_eth_link link;
-
-	PMD_INIT_LOG(DEBUG, "stop");
-
-	if (dev->data->dev_conf.intr_conf.lsc)
-		rte_intr_disable(&dev->pci_dev->intr_handle);
-
-	memset(&link, 0, sizeof(link));
-	virtio_dev_atomic_write_link_status(dev, &link);
-}
-
-static int
-virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
-{
-	struct rte_eth_link link, old;
-	uint16_t status;
-	struct virtio_hw *hw = dev->data->dev_private;
-	memset(&link, 0, sizeof(link));
-	virtio_dev_atomic_read_link_status(dev, &link);
-	old = link;
-	link.link_duplex = FULL_DUPLEX;
-	link.link_speed  = SPEED_10G;
-
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-		PMD_INIT_LOG(DEBUG, "Get link status from hw");
-		vtpci_read_dev_config(hw,
-				offsetof(struct virtio_net_config, status),
-				&status, sizeof(status));
-		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
-			link.link_status = 0;
-			PMD_INIT_LOG(DEBUG, "Port %d is down",
-				     dev->data->port_id);
-		} else {
-			link.link_status = 1;
-			PMD_INIT_LOG(DEBUG, "Port %d is up",
-				     dev->data->port_id);
-		}
-	} else {
-		link.link_status = 1;   /* Link up */
-	}
-	virtio_dev_atomic_write_link_status(dev, &link);
-
-	return (old.link_status == link.link_status) ? -1 : 0;
-}
-
-static void
-virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-
-	dev_info->driver_name = dev->driver->pci_drv.name;
-	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
-	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
-	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
-	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
-	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
-	dev_info->default_txconf = (struct rte_eth_txconf) {
-		.txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS
-	};
-}
-
-/*
- * It enables testpmd to collect per queue stats.
- */
-static int
-virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
-__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
-__rte_unused uint8_t is_rx)
-{
-	return 0;
-}
-
-static struct rte_driver rte_virtio_driver = {
-	.type = PMD_PDEV,
-	.init = rte_virtio_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_virtio_driver);
diff --git a/lib/librte_pmd_virtio/virtio_ethdev.h b/lib/librte_pmd_virtio/virtio_ethdev.h
deleted file mode 100644
index e6d4533..0000000
--- a/lib/librte_pmd_virtio/virtio_ethdev.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTIO_ETHDEV_H_
-#define _VIRTIO_ETHDEV_H_
-
-#include <stdint.h>
-
-#include "virtio_pci.h"
-
-#define SPEED_10	10
-#define SPEED_100	100
-#define SPEED_1000	1000
-#define SPEED_10G	10000
-#define HALF_DUPLEX	1
-#define FULL_DUPLEX	2
-
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
-
-#define VIRTIO_MAX_RX_QUEUES 128
-#define VIRTIO_MAX_TX_QUEUES 128
-#define VIRTIO_MAX_MAC_ADDRS 64
-#define VIRTIO_MIN_RX_BUFSIZE 64
-#define VIRTIO_MAX_RX_PKTLEN  9728
-
-/* Features desired/implemented by this driver. */
-#define VTNET_FEATURES \
-	(VIRTIO_NET_F_MAC       | \
-	VIRTIO_NET_F_STATUS     | \
-	VIRTIO_NET_F_MQ         | \
-	VIRTIO_NET_F_CTRL_MAC_ADDR | \
-	VIRTIO_NET_F_CTRL_VQ    | \
-	VIRTIO_NET_F_CTRL_RX    | \
-	VIRTIO_NET_F_CTRL_VLAN  | \
-	VIRTIO_NET_F_CSUM       | \
-	VIRTIO_NET_F_HOST_TSO4  | \
-	VIRTIO_NET_F_HOST_TSO6  | \
-	VIRTIO_NET_F_HOST_ECN   | \
-	VIRTIO_NET_F_GUEST_CSUM | \
-	VIRTIO_NET_F_GUEST_TSO4 | \
-	VIRTIO_NET_F_GUEST_TSO6 | \
-	VIRTIO_NET_F_GUEST_ECN  | \
-	VIRTIO_NET_F_MRG_RXBUF  | \
-	VIRTIO_RING_F_INDIRECT_DESC)
-
-/*
- * CQ function prototype
- */
-void virtio_dev_cq_start(struct rte_eth_dev *dev);
-
-/*
- * RX/TX function prototypes
- */
-void virtio_dev_rxtx_start(struct rte_eth_dev *dev);
-
-int virtio_dev_queue_setup(struct rte_eth_dev *dev,
-			int queue_type,
-			uint16_t queue_idx,
-			uint16_t  vtpci_queue_idx,
-			uint16_t nb_desc,
-			unsigned int socket_id,
-			struct virtqueue **pvq);
-
-int  virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
-		uint16_t nb_rx_desc, unsigned int socket_id,
-		const struct rte_eth_rxconf *rx_conf,
-		struct rte_mempool *mb_pool);
-
-int  virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
-		uint16_t nb_tx_desc, unsigned int socket_id,
-		const struct rte_eth_txconf *tx_conf);
-
-uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-		uint16_t nb_pkts);
-
-uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-		uint16_t nb_pkts);
-
-uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
-		uint16_t nb_pkts);
-
-
-/*
- * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
- * frames larger than 1514 bytes. We do not yet support software LRO
- * via tcp_lro_rx().
- */
-#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
-			    VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
-
-
-#endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/lib/librte_pmd_virtio/virtio_logs.h b/lib/librte_pmd_virtio/virtio_logs.h
deleted file mode 100644
index d6c33f7..0000000
--- a/lib/librte_pmd_virtio/virtio_logs.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTIO_LOGS_H_
-#define _VIRTIO_LOGS_H_
-
-#include <rte_log.h>
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT
-#define PMD_INIT_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
-#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
-#else
-#define PMD_INIT_LOG(level, fmt, args...) do { } while(0)
-#define PMD_INIT_FUNC_TRACE() do { } while(0)
-#endif
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX
-#define PMD_RX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args)
-#else
-#define PMD_RX_LOG(level, fmt, args...) do { } while(0)
-#endif
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX
-#define PMD_TX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args)
-#else
-#define PMD_TX_LOG(level, fmt, args...) do { } while(0)
-#endif
-
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER
-#define PMD_DRV_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args)
-#else
-#define PMD_DRV_LOG(level, fmt, args...) do { } while(0)
-#endif
-
-#endif /* _VIRTIO_LOGS_H_ */
diff --git a/lib/librte_pmd_virtio/virtio_pci.c b/lib/librte_pmd_virtio/virtio_pci.c
deleted file mode 100644
index 2245bec..0000000
--- a/lib/librte_pmd_virtio/virtio_pci.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <stdint.h>
-
-#include "virtio_pci.h"
-#include "virtio_logs.h"
-
-static uint8_t vtpci_get_status(struct virtio_hw *);
-
-void
-vtpci_read_dev_config(struct virtio_hw *hw, uint64_t offset,
-		void *dst, int length)
-{
-	uint64_t off;
-	uint8_t *d;
-	int size;
-
-	off = VIRTIO_PCI_CONFIG(hw) + offset;
-	for (d = dst; length > 0; d += size, off += size, length -= size) {
-		if (length >= 4) {
-			size = 4;
-			*(uint32_t *)d = VIRTIO_READ_REG_4(hw, off);
-		} else if (length >= 2) {
-			size = 2;
-			*(uint16_t *)d = VIRTIO_READ_REG_2(hw, off);
-		} else {
-			size = 1;
-			*d = VIRTIO_READ_REG_1(hw, off);
-		}
-	}
-}
-
-void
-vtpci_write_dev_config(struct virtio_hw *hw, uint64_t offset,
-		void *src, int length)
-{
-	uint64_t off;
-	uint8_t *s;
-	int size;
-
-	off = VIRTIO_PCI_CONFIG(hw) + offset;
-	for (s = src; length > 0; s += size, off += size, length -= size) {
-		if (length >= 4) {
-			size = 4;
-			VIRTIO_WRITE_REG_4(hw, off, *(uint32_t *)s);
-		} else if (length >= 2) {
-			size = 2;
-			VIRTIO_WRITE_REG_2(hw, off, *(uint16_t *)s);
-		} else {
-			size = 1;
-			VIRTIO_WRITE_REG_1(hw, off, *s);
-		}
-	}
-}
-
-uint32_t
-vtpci_negotiate_features(struct virtio_hw *hw, uint32_t host_features)
-{
-	uint32_t features;
-	/*
-	 * Limit negotiated features to what the driver, virtqueue, and
-	 * host all support.
-	 */
-	features = host_features & hw->guest_features;
-
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_GUEST_FEATURES, features);
-	return features;
-}
-
-
-void
-vtpci_reset(struct virtio_hw *hw)
-{
-	/*
-	 * Setting the status to RESET sets the host device to
-	 * the original, uninitialized state.
-	 */
-	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
-	vtpci_get_status(hw);
-}
-
-void
-vtpci_reinit_complete(struct virtio_hw *hw)
-{
-	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
-}
-
-static uint8_t
-vtpci_get_status(struct virtio_hw *hw)
-{
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_STATUS);
-}
-
-void
-vtpci_set_status(struct virtio_hw *hw, uint8_t status)
-{
-	if (status != VIRTIO_CONFIG_STATUS_RESET)
-		status = (uint8_t)(status | vtpci_get_status(hw));
-
-	VIRTIO_WRITE_REG_1(hw, VIRTIO_PCI_STATUS, status);
-}
-
-uint8_t
-vtpci_isr(struct virtio_hw *hw)
-{
-
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_ISR);
-}
-
-
-/* Enable one vector (0) for Link State Intrerrupt */
-uint16_t
-vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
-{
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR, vec);
-	return VIRTIO_READ_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR);
-}
diff --git a/lib/librte_pmd_virtio/virtio_pci.h b/lib/librte_pmd_virtio/virtio_pci.h
deleted file mode 100644
index 64d9c34..0000000
--- a/lib/librte_pmd_virtio/virtio_pci.h
+++ /dev/null
@@ -1,270 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTIO_PCI_H_
-#define _VIRTIO_PCI_H_
-
-#include <stdint.h>
-
-#ifdef __FreeBSD__
-#include <sys/types.h>
-#include <machine/cpufunc.h>
-#else
-#include <sys/io.h>
-#endif
-
-#include <rte_ethdev.h>
-
-struct virtqueue;
-
-/* VirtIO PCI vendor/device ID. */
-#define VIRTIO_PCI_VENDORID     0x1AF4
-#define VIRTIO_PCI_DEVICEID_MIN 0x1000
-#define VIRTIO_PCI_DEVICEID_MAX 0x103F
-
-/* VirtIO ABI version, this must match exactly. */
-#define VIRTIO_PCI_ABI_VERSION 0
-
-/*
- * VirtIO Header, located in BAR 0.
- */
-#define VIRTIO_PCI_HOST_FEATURES  0  /* host's supported features (32bit, RO)*/
-#define VIRTIO_PCI_GUEST_FEATURES 4  /* guest's supported features (32, RW) */
-#define VIRTIO_PCI_QUEUE_PFN      8  /* physical address of VQ (32, RW) */
-#define VIRTIO_PCI_QUEUE_NUM      12 /* number of ring entries (16, RO) */
-#define VIRTIO_PCI_QUEUE_SEL      14 /* current VQ selection (16, RW) */
-#define VIRTIO_PCI_QUEUE_NOTIFY   16 /* notify host regarding VQ (16, RW) */
-#define VIRTIO_PCI_STATUS         18 /* device status register (8, RW) */
-#define VIRTIO_PCI_ISR		  19 /* interrupt status register, reading
-				      * also clears the register (8, RO) */
-/* Only if MSIX is enabled: */
-#define VIRTIO_MSI_CONFIG_VECTOR  20 /* configuration change vector (16, RW) */
-#define VIRTIO_MSI_QUEUE_VECTOR	  22 /* vector for selected VQ notifications
-				      (16, RW) */
-
-/* The bit of the ISR which indicates a device has an interrupt. */
-#define VIRTIO_PCI_ISR_INTR   0x1
-/* The bit of the ISR which indicates a device configuration change. */
-#define VIRTIO_PCI_ISR_CONFIG 0x2
-/* Vector value used to disable MSI for queue. */
-#define VIRTIO_MSI_NO_VECTOR 0xFFFF
-
-/* VirtIO device IDs. */
-#define VIRTIO_ID_NETWORK  0x01
-#define VIRTIO_ID_BLOCK    0x02
-#define VIRTIO_ID_CONSOLE  0x03
-#define VIRTIO_ID_ENTROPY  0x04
-#define VIRTIO_ID_BALLOON  0x05
-#define VIRTIO_ID_IOMEMORY 0x06
-#define VIRTIO_ID_9P       0x09
-
-/* Status byte for guest to report progress. */
-#define VIRTIO_CONFIG_STATUS_RESET     0x00
-#define VIRTIO_CONFIG_STATUS_ACK       0x01
-#define VIRTIO_CONFIG_STATUS_DRIVER    0x02
-#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04
-#define VIRTIO_CONFIG_STATUS_FAILED    0x80
-
-/*
- * Generate interrupt when the virtqueue ring is
- * completely used, even if we've suppressed them.
- */
-#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24)
-
-/*
- * The guest should never negotiate this feature; it
- * is used to detect faulty drivers.
- */
-#define VIRTIO_F_BAD_FEATURE (1 << 30)
-
-/*
- * Some VirtIO feature bits (currently bits 28 through 31) are
- * reserved for the transport being used (eg. virtio_ring), the
- * rest are per-device feature bits.
- */
-#define VIRTIO_TRANSPORT_F_START 28
-#define VIRTIO_TRANSPORT_F_END   32
-
-/*
- * Each virtqueue indirect descriptor list must be physically contiguous.
- * To allow us to malloc(9) each list individually, limit the number
- * supported to what will fit in one page. With 4KB pages, this is a limit
- * of 256 descriptors. If there is ever a need for more, we can switch to
- * contigmalloc(9) for the larger allocations, similar to what
- * bus_dmamem_alloc(9) does.
- *
- * Note the sizeof(struct vring_desc) is 16 bytes.
- */
-#define VIRTIO_MAX_INDIRECT ((int) (PAGE_SIZE / 16))
-
-/* The feature bitmap for virtio net */
-#define VIRTIO_NET_F_CSUM       0x00001 /* Host handles pkts w/ partial csum */
-#define VIRTIO_NET_F_GUEST_CSUM 0x00002 /* Guest handles pkts w/ partial csum*/
-#define VIRTIO_NET_F_MAC        0x00020 /* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO        0x00040 /* Host handles pkts w/ any GSO type */
-#define VIRTIO_NET_F_GUEST_TSO4 0x00080 /* Guest can handle TSOv4 in. */
-#define VIRTIO_NET_F_GUEST_TSO6 0x00100 /* Guest can handle TSOv6 in. */
-#define VIRTIO_NET_F_GUEST_ECN  0x00200 /* Guest can handle TSO[6] w/ ECN in.*/
-#define VIRTIO_NET_F_GUEST_UFO  0x00400 /* Guest can handle UFO in. */
-#define VIRTIO_NET_F_HOST_TSO4  0x00800 /* Host can handle TSOv4 in. */
-#define VIRTIO_NET_F_HOST_TSO6  0x01000 /* Host can handle TSOv6 in. */
-#define VIRTIO_NET_F_HOST_ECN   0x02000 /* Host can handle TSO[6] w/ ECN in. */
-#define VIRTIO_NET_F_HOST_UFO   0x04000 /* Host can handle UFO in. */
-#define VIRTIO_NET_F_MRG_RXBUF  0x08000 /* Host can merge receive buffers. */
-#define VIRTIO_NET_F_STATUS     0x10000 /* virtio_net_config.status available*/
-#define VIRTIO_NET_F_CTRL_VQ    0x20000 /* Control channel available */
-#define VIRTIO_NET_F_CTRL_RX    0x40000 /* Control channel RX mode support */
-#define VIRTIO_NET_F_CTRL_VLAN  0x80000 /* Control channel VLAN filtering */
-#define VIRTIO_NET_F_CTRL_RX_EXTRA  0x100000 /* Extra RX mode control support */
-#define VIRTIO_RING_F_INDIRECT_DESC 0x10000000 /* Support for indirect buffer descriptors. */
-/* The guest publishes the used index for which it expects an interrupt
- * at the end of the avail ring. Host should ignore the avail->flags field.
- * The host publishes the avail index for which it expects a kick
- * at the end of the used ring. Guest should ignore the used->flags field.
- */
-#define VIRTIO_RING_F_EVENT_IDX 0x20000000
-
-#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
-
-/*
- * Maximum number of virtqueues per device.
- */
-#define VIRTIO_MAX_VIRTQUEUES 8
-
-struct virtio_hw {
-	struct virtqueue *cvq;
-	uint32_t    io_base;
-	uint32_t    guest_features;
-	uint32_t    max_tx_queues;
-	uint32_t    max_rx_queues;
-	uint16_t    vtnet_hdr_size;
-	uint8_t	    vlan_strip;
-	uint8_t	    use_msix;
-	uint8_t     started;
-	uint8_t     mac_addr[ETHER_ADDR_LEN];
-};
-
-/*
- * This structure is just a reference to read
- * net device specific config space; it just a chodu structure
- *
- */
-struct virtio_net_config {
-	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
-	uint8_t    mac[ETHER_ADDR_LEN];
-	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
-	uint16_t   status;
-	uint16_t   max_virtqueue_pairs;
-} __attribute__((packed));
-
-/*
- * The remaining space is defined by each driver as the per-driver
- * configuration space.
- */
-#define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)
-
-/*
- * How many bits to shift physical queue address written to QUEUE_PFN.
- * 12 is historical, and due to x86 page size.
- */
-#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
-
-/* The alignment to use between consumer and producer parts of vring. */
-#define VIRTIO_PCI_VRING_ALIGN 4096
-
-#ifdef __FreeBSD__
-
-static inline void
-outb_p(unsigned char data, unsigned int port)
-{
-
-	outb(port, (u_char)data);
-}
-
-static inline void
-outw_p(unsigned short data, unsigned int port)
-{
-	outw(port, (u_short)data);
-}
-
-static inline void
-outl_p(unsigned int data, unsigned int port)
-{
-	outl(port, (u_int)data);
-}
-#endif
-
-#define VIRTIO_PCI_REG_ADDR(hw, reg) \
-	(unsigned short)((hw)->io_base + (reg))
-
-#define VIRTIO_READ_REG_1(hw, reg) \
-	inb((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_1(hw, reg, value) \
-	outb_p((unsigned char)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
-
-#define VIRTIO_READ_REG_2(hw, reg) \
-	inw((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_2(hw, reg, value) \
-	outw_p((unsigned short)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
-
-#define VIRTIO_READ_REG_4(hw, reg) \
-	inl((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_4(hw, reg, value) \
-	outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
-
-static inline int
-vtpci_with_feature(struct virtio_hw *hw, uint32_t feature)
-{
-	return (hw->guest_features & feature) != 0;
-}
-
-/*
- * Function declaration from virtio_pci.c
- */
-void vtpci_reset(struct virtio_hw *);
-
-void vtpci_reinit_complete(struct virtio_hw *);
-
-void vtpci_set_status(struct virtio_hw *, uint8_t);
-
-uint32_t vtpci_negotiate_features(struct virtio_hw *, uint32_t);
-
-void vtpci_write_dev_config(struct virtio_hw *, uint64_t, void *, int);
-
-void vtpci_read_dev_config(struct virtio_hw *, uint64_t, void *, int);
-
-uint8_t vtpci_isr(struct virtio_hw *);
-
-uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t);
-
-#endif /* _VIRTIO_PCI_H_ */
diff --git a/lib/librte_pmd_virtio/virtio_ring.h b/lib/librte_pmd_virtio/virtio_ring.h
deleted file mode 100644
index a16c499..0000000
--- a/lib/librte_pmd_virtio/virtio_ring.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTIO_RING_H_
-#define _VIRTIO_RING_H_
-
-#include <stdint.h>
-
-#include <rte_common.h>
-
-/* This marks a buffer as continuing via the next field. */
-#define VRING_DESC_F_NEXT       1
-/* This marks a buffer as write-only (otherwise read-only). */
-#define VRING_DESC_F_WRITE      2
-/* This means the buffer contains a list of buffer descriptors. */
-#define VRING_DESC_F_INDIRECT   4
-
-/* The Host uses this in used->flags to advise the Guest: don't kick me
- * when you add a buffer.  It's unreliable, so it's simply an
- * optimization.  Guest will still kick if it's out of buffers. */
-#define VRING_USED_F_NO_NOTIFY  1
-/* The Guest uses this in avail->flags to advise the Host: don't
- * interrupt me when you consume a buffer.  It's unreliable, so it's
- * simply an optimization.  */
-#define VRING_AVAIL_F_NO_INTERRUPT  1
-
-/* VirtIO ring descriptors: 16 bytes.
- * These can chain together via "next". */
-struct vring_desc {
-	uint64_t addr;  /*  Address (guest-physical). */
-	uint32_t len;   /* Length. */
-	uint16_t flags; /* The flags as indicated above. */
-	uint16_t next;  /* We chain unused descriptors via this. */
-};
-
-struct vring_avail {
-	uint16_t flags;
-	uint16_t idx;
-	uint16_t ring[0];
-};
-
-/* id is a 16bit index. uint32_t is used here for ids for padding reasons. */
-struct vring_used_elem {
-	/* Index of start of used descriptor chain. */
-	uint32_t id;
-	/* Total length of the descriptor chain which was written to. */
-	uint32_t len;
-};
-
-struct vring_used {
-	uint16_t flags;
-	uint16_t idx;
-	struct vring_used_elem ring[0];
-};
-
-struct vring {
-	unsigned int num;
-	struct vring_desc  *desc;
-	struct vring_avail *avail;
-	struct vring_used  *used;
-};
-
-/* The standard layout for the ring is a continuous chunk of memory which
- * looks like this.  We assume num is a power of 2.
- *
- * struct vring {
- *      // The actual descriptors (16 bytes each)
- *      struct vring_desc desc[num];
- *
- *      // A ring of available descriptor heads with free-running index.
- *      __u16 avail_flags;
- *      __u16 avail_idx;
- *      __u16 available[num];
- *      __u16 used_event_idx;
- *
- *      // Padding to the next align boundary.
- *      char pad[];
- *
- *      // A ring of used descriptor heads with free-running index.
- *      __u16 used_flags;
- *      __u16 used_idx;
- *      struct vring_used_elem used[num];
- *      __u16 avail_event_idx;
- * };
- *
- * NOTE: for VirtIO PCI, align is 4096.
- */
-
-/*
- * We publish the used event index at the end of the available ring, and vice
- * versa. They are at the end for backwards compatibility.
- */
-#define vring_used_event(vr)  ((vr)->avail->ring[(vr)->num])
-#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num])
-
-static inline int
-vring_size(unsigned int num, unsigned long align)
-{
-	int size;
-
-	size = num * sizeof(struct vring_desc);
-	size += sizeof(struct vring_avail) + (num * sizeof(uint16_t));
-	size = RTE_ALIGN_CEIL(size, align);
-	size += sizeof(struct vring_used) +
-		(num * sizeof(struct vring_used_elem));
-	return size;
-}
-
-static inline void
-vring_init(struct vring *vr, unsigned int num, uint8_t *p,
-	unsigned long align)
-{
-	vr->num = num;
-	vr->desc = (struct vring_desc *) p;
-	vr->avail = (struct vring_avail *) (p +
-		num * sizeof(struct vring_desc));
-	vr->used = (void *)
-		RTE_ALIGN_CEIL((uintptr_t)(&vr->avail->ring[num]), align);
-}
-
-/*
- * The following is used with VIRTIO_RING_F_EVENT_IDX.
- * Assuming a given event_idx value from the other size, if we have
- * just incremented index from old to new_idx, should we trigger an
- * event?
- */
-static inline int
-vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
-{
-	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
-}
-
-#endif /* _VIRTIO_RING_H_ */
diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c
deleted file mode 100644
index 3ff275c..0000000
--- a/lib/librte_pmd_virtio/virtio_rxtx.c
+++ /dev/null
@@ -1,815 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <rte_cycles.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_branch_prediction.h>
-#include <rte_mempool.h>
-#include <rte_malloc.h>
-#include <rte_mbuf.h>
-#include <rte_ether.h>
-#include <rte_ethdev.h>
-#include <rte_prefetch.h>
-#include <rte_string_fns.h>
-#include <rte_errno.h>
-#include <rte_byteorder.h>
-
-#include "virtio_logs.h"
-#include "virtio_ethdev.h"
-#include "virtqueue.h"
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
-#define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
-#else
-#define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
-#endif
-
-static void
-vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
-{
-	struct vring_desc *dp, *dp_tail;
-	struct vq_desc_extra *dxp;
-	uint16_t desc_idx_last = desc_idx;
-
-	dp  = &vq->vq_ring.desc[desc_idx];
-	dxp = &vq->vq_descx[desc_idx];
-	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
-	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
-		while (dp->flags & VRING_DESC_F_NEXT) {
-			desc_idx_last = dp->next;
-			dp = &vq->vq_ring.desc[dp->next];
-		}
-	}
-	dxp->ndescs = 0;
-
-	/*
-	 * We must append the existing free chain, if any, to the end of
-	 * newly freed chain. If the virtqueue was completely used, then
-	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
-	 */
-	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
-		vq->vq_desc_head_idx = desc_idx;
-	} else {
-		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
-		dp_tail->next = desc_idx;
-	}
-
-	vq->vq_desc_tail_idx = desc_idx_last;
-	dp->next = VQ_RING_DESC_CHAIN_END;
-}
-
-static uint16_t
-virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
-			   uint32_t *len, uint16_t num)
-{
-	struct vring_used_elem *uep;
-	struct rte_mbuf *cookie;
-	uint16_t used_idx, desc_idx;
-	uint16_t i;
-
-	/*  Caller does the check */
-	for (i = 0; i < num ; i++) {
-		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
-		uep = &vq->vq_ring.used->ring[used_idx];
-		desc_idx = (uint16_t) uep->id;
-		len[i] = uep->len;
-		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
-
-		if (unlikely(cookie == NULL)) {
-			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
-				vq->vq_used_cons_idx);
-			break;
-		}
-
-		rte_prefetch0(cookie);
-		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
-		rx_pkts[i]  = cookie;
-		vq->vq_used_cons_idx++;
-		vq_ring_free_chain(vq, desc_idx);
-		vq->vq_descx[desc_idx].cookie = NULL;
-	}
-
-	return i;
-}
-
-#ifndef DEFAULT_TX_FREE_THRESH
-#define DEFAULT_TX_FREE_THRESH 32
-#endif
-
-/* Cleanup from completed transmits. */
-static void
-virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
-{
-	uint16_t i, used_idx, desc_idx;
-	for (i = 0; i < num; i++) {
-		struct vring_used_elem *uep;
-		struct vq_desc_extra *dxp;
-
-		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
-		uep = &vq->vq_ring.used->ring[used_idx];
-
-		desc_idx = (uint16_t) uep->id;
-		dxp = &vq->vq_descx[desc_idx];
-		vq->vq_used_cons_idx++;
-		vq_ring_free_chain(vq, desc_idx);
-
-		if (dxp->cookie != NULL) {
-			rte_pktmbuf_free(dxp->cookie);
-			dxp->cookie = NULL;
-		}
-	}
-}
-
-
-static inline int
-virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
-{
-	struct vq_desc_extra *dxp;
-	struct virtio_hw *hw = vq->hw;
-	struct vring_desc *start_dp;
-	uint16_t needed = 1;
-	uint16_t head_idx, idx;
-
-	if (unlikely(vq->vq_free_cnt == 0))
-		return -ENOSPC;
-	if (unlikely(vq->vq_free_cnt < needed))
-		return -EMSGSIZE;
-
-	head_idx = vq->vq_desc_head_idx;
-	if (unlikely(head_idx >= vq->vq_nentries))
-		return -EFAULT;
-
-	idx = head_idx;
-	dxp = &vq->vq_descx[idx];
-	dxp->cookie = (void *)cookie;
-	dxp->ndescs = needed;
-
-	start_dp = vq->vq_ring.desc;
-	start_dp[idx].addr =
-		(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
-		- hw->vtnet_hdr_size);
-	start_dp[idx].len =
-		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
-	start_dp[idx].flags =  VRING_DESC_F_WRITE;
-	idx = start_dp[idx].next;
-	vq->vq_desc_head_idx = idx;
-	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
-		vq->vq_desc_tail_idx = idx;
-	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
-	vq_update_avail_ring(vq, head_idx);
-
-	return 0;
-}
-
-static int
-virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
-{
-	struct vq_desc_extra *dxp;
-	struct vring_desc *start_dp;
-	uint16_t seg_num = cookie->nb_segs;
-	uint16_t needed = 1 + seg_num;
-	uint16_t head_idx, idx;
-	uint16_t head_size = txvq->hw->vtnet_hdr_size;
-
-	if (unlikely(txvq->vq_free_cnt == 0))
-		return -ENOSPC;
-	if (unlikely(txvq->vq_free_cnt < needed))
-		return -EMSGSIZE;
-	head_idx = txvq->vq_desc_head_idx;
-	if (unlikely(head_idx >= txvq->vq_nentries))
-		return -EFAULT;
-
-	idx = head_idx;
-	dxp = &txvq->vq_descx[idx];
-	dxp->cookie = (void *)cookie;
-	dxp->ndescs = needed;
-
-	start_dp = txvq->vq_ring.desc;
-	start_dp[idx].addr =
-		txvq->virtio_net_hdr_mem + idx * head_size;
-	start_dp[idx].len = (uint32_t)head_size;
-	start_dp[idx].flags = VRING_DESC_F_NEXT;
-
-	for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
-		idx = start_dp[idx].next;
-		start_dp[idx].addr  = RTE_MBUF_DATA_DMA_ADDR(cookie);
-		start_dp[idx].len   = cookie->data_len;
-		start_dp[idx].flags = VRING_DESC_F_NEXT;
-		cookie = cookie->next;
-	}
-
-	start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
-	idx = start_dp[idx].next;
-	txvq->vq_desc_head_idx = idx;
-	if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
-		txvq->vq_desc_tail_idx = idx;
-	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
-	vq_update_avail_ring(txvq, head_idx);
-
-	return 0;
-}
-
-static inline struct rte_mbuf *
-rte_rxmbuf_alloc(struct rte_mempool *mp)
-{
-	struct rte_mbuf *m;
-
-	m = __rte_mbuf_raw_alloc(mp);
-	__rte_mbuf_sanity_check_raw(m, 0);
-
-	return m;
-}
-
-static void
-virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
-{
-	struct rte_mbuf *m;
-	int i, nbufs, error, size = vq->vq_nentries;
-	struct vring *vr = &vq->vq_ring;
-	uint8_t *ring_mem = vq->vq_ring_virt_mem;
-
-	PMD_INIT_FUNC_TRACE();
-
-	/*
-	 * Reinitialise since virtio port might have been stopped and restarted
-	 */
-	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
-	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
-	vq->vq_used_cons_idx = 0;
-	vq->vq_desc_head_idx = 0;
-	vq->vq_avail_idx = 0;
-	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
-	vq->vq_free_cnt = vq->vq_nentries;
-	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
-
-	/* Chain all the descriptors in the ring with an END */
-	for (i = 0; i < size - 1; i++)
-		vr->desc[i].next = (uint16_t)(i + 1);
-	vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
-
-	/*
-	 * Disable device(host) interrupting guest
-	 */
-	virtqueue_disable_intr(vq);
-
-	/* Only rx virtqueue needs mbufs to be allocated at initialization */
-	if (queue_type == VTNET_RQ) {
-		if (vq->mpool == NULL)
-			rte_exit(EXIT_FAILURE,
-			"Cannot allocate initial mbufs for rx virtqueue");
-
-		/* Allocate blank mbufs for the each rx descriptor */
-		nbufs = 0;
-		error = ENOSPC;
-		while (!virtqueue_full(vq)) {
-			m = rte_rxmbuf_alloc(vq->mpool);
-			if (m == NULL)
-				break;
-
-			/******************************************
-			*         Enqueue allocated buffers        *
-			*******************************************/
-			error = virtqueue_enqueue_recv_refill(vq, m);
-
-			if (error) {
-				rte_pktmbuf_free(m);
-				break;
-			}
-			nbufs++;
-		}
-
-		vq_update_avail_idx(vq);
-
-		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
-
-		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
-			vq->vq_queue_index);
-		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
-			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
-	} else if (queue_type == VTNET_TQ) {
-		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
-			vq->vq_queue_index);
-		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
-			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
-	} else {
-		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
-			vq->vq_queue_index);
-		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
-			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
-	}
-}
-
-void
-virtio_dev_cq_start(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-
-	if (hw->cvq) {
-		virtio_dev_vring_start(hw->cvq, VTNET_CQ);
-		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
-	}
-}
-
-void
-virtio_dev_rxtx_start(struct rte_eth_dev *dev)
-{
-	/*
-	 * Start receive and transmit vrings
-	 * -	Setup vring structure for all queues
-	 * -	Initialize descriptor for the rx vring
-	 * -	Allocate blank mbufs for the each rx descriptor
-	 *
-	 */
-	int i;
-
-	PMD_INIT_FUNC_TRACE();
-
-	/* Start rx vring. */
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
-	}
-
-	/* Start tx vring. */
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
-	}
-}
-
-int
-virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
-			uint16_t queue_idx,
-			uint16_t nb_desc,
-			unsigned int socket_id,
-			__rte_unused const struct rte_eth_rxconf *rx_conf,
-			struct rte_mempool *mp)
-{
-	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
-	struct virtqueue *vq;
-	int ret;
-
-	PMD_INIT_FUNC_TRACE();
-	ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
-			nb_desc, socket_id, &vq);
-	if (ret < 0) {
-		PMD_INIT_LOG(ERR, "tvq initialization failed");
-		return ret;
-	}
-
-	/* Create mempool for rx mbuf allocation */
-	vq->mpool = mp;
-
-	dev->data->rx_queues[queue_idx] = vq;
-	return 0;
-}
-
-/*
- * struct rte_eth_dev *dev: Used to update dev
- * uint16_t nb_desc: Defaults to values read from config space
- * unsigned int socket_id: Used to allocate memzone
- * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
- * uint16_t queue_idx: Just used as an index in dev txq list
- */
-int
-virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
-			uint16_t queue_idx,
-			uint16_t nb_desc,
-			unsigned int socket_id,
-			const struct rte_eth_txconf *tx_conf)
-{
-	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
-	struct virtqueue *vq;
-	uint16_t tx_free_thresh;
-	int ret;
-
-	PMD_INIT_FUNC_TRACE();
-
-	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
-	    != ETH_TXQ_FLAGS_NOXSUMS) {
-		PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
-		return -EINVAL;
-	}
-
-	ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
-			nb_desc, socket_id, &vq);
-	if (ret < 0) {
-		PMD_INIT_LOG(ERR, "rvq initialization failed");
-		return ret;
-	}
-
-	tx_free_thresh = tx_conf->tx_free_thresh;
-	if (tx_free_thresh == 0)
-		tx_free_thresh =
-			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
-
-	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
-		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
-			"number of TX entries minus 3 (%u)."
-			" (tx_free_thresh=%u port=%u queue=%u)\n",
-			vq->vq_nentries - 3,
-			tx_free_thresh, dev->data->port_id, queue_idx);
-		return -EINVAL;
-	}
-
-	vq->vq_free_thresh = tx_free_thresh;
-
-	dev->data->tx_queues[queue_idx] = vq;
-	return 0;
-}
-
-static void
-virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
-{
-	int error;
-	/*
-	 * Requeue the discarded mbuf. This should always be
-	 * successful since it was just dequeued.
-	 */
-	error = virtqueue_enqueue_recv_refill(vq, m);
-	if (unlikely(error)) {
-		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
-		rte_pktmbuf_free(m);
-	}
-}
-
-#define VIRTIO_MBUF_BURST_SZ 64
-#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
-uint16_t
-virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
-{
-	struct virtqueue *rxvq = rx_queue;
-	struct virtio_hw *hw;
-	struct rte_mbuf *rxm, *new_mbuf;
-	uint16_t nb_used, num, nb_rx;
-	uint32_t len[VIRTIO_MBUF_BURST_SZ];
-	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
-	int error;
-	uint32_t i, nb_enqueued;
-	const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
-
-	nb_used = VIRTQUEUE_NUSED(rxvq);
-
-	virtio_rmb();
-
-	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
-	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
-	if (likely(num > DESC_PER_CACHELINE))
-		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
-
-	if (num == 0)
-		return 0;
-
-	num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
-	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
-
-	hw = rxvq->hw;
-	nb_rx = 0;
-	nb_enqueued = 0;
-
-	for (i = 0; i < num ; i++) {
-		rxm = rcv_pkts[i];
-
-		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
-
-		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
-			PMD_RX_LOG(ERR, "Packet drop");
-			nb_enqueued++;
-			virtio_discard_rxbuf(rxvq, rxm);
-			rxvq->errors++;
-			continue;
-		}
-
-		rxm->port = rxvq->port_id;
-		rxm->data_off = RTE_PKTMBUF_HEADROOM;
-
-		rxm->nb_segs = 1;
-		rxm->next = NULL;
-		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
-		rxm->data_len = (uint16_t)(len[i] - hdr_size);
-
-		if (hw->vlan_strip)
-			rte_vlan_strip(rxm);
-
-		VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
-
-		rx_pkts[nb_rx++] = rxm;
-		rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
-	}
-
-	rxvq->packets += nb_rx;
-
-	/* Allocate new mbuf for the used descriptor */
-	error = ENOSPC;
-	while (likely(!virtqueue_full(rxvq))) {
-		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
-		if (unlikely(new_mbuf == NULL)) {
-			struct rte_eth_dev *dev
-				= &rte_eth_devices[rxvq->port_id];
-			dev->data->rx_mbuf_alloc_failed++;
-			break;
-		}
-		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
-		if (unlikely(error)) {
-			rte_pktmbuf_free(new_mbuf);
-			break;
-		}
-		nb_enqueued++;
-	}
-
-	if (likely(nb_enqueued)) {
-		vq_update_avail_idx(rxvq);
-
-		if (unlikely(virtqueue_kick_prepare(rxvq))) {
-			virtqueue_notify(rxvq);
-			PMD_RX_LOG(DEBUG, "Notified\n");
-		}
-	}
-
-	return nb_rx;
-}
-
-uint16_t
-virtio_recv_mergeable_pkts(void *rx_queue,
-			struct rte_mbuf **rx_pkts,
-			uint16_t nb_pkts)
-{
-	struct virtqueue *rxvq = rx_queue;
-	struct virtio_hw *hw;
-	struct rte_mbuf *rxm, *new_mbuf;
-	uint16_t nb_used, num, nb_rx;
-	uint32_t len[VIRTIO_MBUF_BURST_SZ];
-	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
-	struct rte_mbuf *prev;
-	int error;
-	uint32_t i, nb_enqueued;
-	uint32_t seg_num;
-	uint16_t extra_idx;
-	uint32_t seg_res;
-	const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-
-	nb_used = VIRTQUEUE_NUSED(rxvq);
-
-	virtio_rmb();
-
-	if (nb_used == 0)
-		return 0;
-
-	PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
-
-	hw = rxvq->hw;
-	nb_rx = 0;
-	i = 0;
-	nb_enqueued = 0;
-	seg_num = 0;
-	extra_idx = 0;
-	seg_res = 0;
-
-	while (i < nb_used) {
-		struct virtio_net_hdr_mrg_rxbuf *header;
-
-		if (nb_rx == nb_pkts)
-			break;
-
-		num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
-		if (num != 1)
-			continue;
-
-		i++;
-
-		PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
-		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
-
-		rxm = rcv_pkts[0];
-
-		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
-			PMD_RX_LOG(ERR, "Packet drop\n");
-			nb_enqueued++;
-			virtio_discard_rxbuf(rxvq, rxm);
-			rxvq->errors++;
-			continue;
-		}
-
-		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
-			RTE_PKTMBUF_HEADROOM - hdr_size);
-		seg_num = header->num_buffers;
-
-		if (seg_num == 0)
-			seg_num = 1;
-
-		rxm->data_off = RTE_PKTMBUF_HEADROOM;
-		rxm->nb_segs = seg_num;
-		rxm->next = NULL;
-		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
-		rxm->data_len = (uint16_t)(len[0] - hdr_size);
-
-		rxm->port = rxvq->port_id;
-		rx_pkts[nb_rx] = rxm;
-		prev = rxm;
-
-		seg_res = seg_num - 1;
-
-		while (seg_res != 0) {
-			/*
-			 * Get extra segments for current uncompleted packet.
-			 */
-			uint16_t  rcv_cnt =
-				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
-			if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
-				uint32_t rx_num =
-					virtqueue_dequeue_burst_rx(rxvq,
-					rcv_pkts, len, rcv_cnt);
-				i += rx_num;
-				rcv_cnt = rx_num;
-			} else {
-				PMD_RX_LOG(ERR,
-					"No enough segments for packet.\n");
-				nb_enqueued++;
-				virtio_discard_rxbuf(rxvq, rxm);
-				rxvq->errors++;
-				break;
-			}
-
-			extra_idx = 0;
-
-			while (extra_idx < rcv_cnt) {
-				rxm = rcv_pkts[extra_idx];
-
-				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
-				rxm->next = NULL;
-				rxm->pkt_len = (uint32_t)(len[extra_idx]);
-				rxm->data_len = (uint16_t)(len[extra_idx]);
-
-				if (prev)
-					prev->next = rxm;
-
-				prev = rxm;
-				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
-				extra_idx++;
-			};
-			seg_res -= rcv_cnt;
-		}
-
-		if (hw->vlan_strip)
-			rte_vlan_strip(rx_pkts[nb_rx]);
-
-		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
-			rx_pkts[nb_rx]->data_len);
-
-		rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
-		nb_rx++;
-	}
-
-	rxvq->packets += nb_rx;
-
-	/* Allocate new mbuf for the used descriptor */
-	error = ENOSPC;
-	while (likely(!virtqueue_full(rxvq))) {
-		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
-		if (unlikely(new_mbuf == NULL)) {
-			struct rte_eth_dev *dev
-				= &rte_eth_devices[rxvq->port_id];
-			dev->data->rx_mbuf_alloc_failed++;
-			break;
-		}
-		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
-		if (unlikely(error)) {
-			rte_pktmbuf_free(new_mbuf);
-			break;
-		}
-		nb_enqueued++;
-	}
-
-	if (likely(nb_enqueued)) {
-		vq_update_avail_idx(rxvq);
-
-		if (unlikely(virtqueue_kick_prepare(rxvq))) {
-			virtqueue_notify(rxvq);
-			PMD_RX_LOG(DEBUG, "Notified");
-		}
-	}
-
-	return nb_rx;
-}
-
-uint16_t
-virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
-	struct virtqueue *txvq = tx_queue;
-	struct rte_mbuf *txm;
-	uint16_t nb_used, nb_tx;
-	int error;
-
-	if (unlikely(nb_pkts < 1))
-		return nb_pkts;
-
-	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
-	nb_used = VIRTQUEUE_NUSED(txvq);
-
-	virtio_rmb();
-	if (likely(nb_used > txvq->vq_free_thresh))
-		virtio_xmit_cleanup(txvq, nb_used);
-
-	nb_tx = 0;
-
-	while (nb_tx < nb_pkts) {
-		/* Need one more descriptor for virtio header. */
-		int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
-
-		/*Positive value indicates it need free vring descriptors */
-		if (unlikely(need > 0)) {
-			nb_used = VIRTQUEUE_NUSED(txvq);
-			virtio_rmb();
-			need = RTE_MIN(need, (int)nb_used);
-
-			virtio_xmit_cleanup(txvq, need);
-			need = (int)tx_pkts[nb_tx]->nb_segs -
-				txvq->vq_free_cnt + 1;
-		}
-
-		/*
-		 * Zero or negative value indicates it has enough free
-		 * descriptors to use for transmitting.
-		 */
-		if (likely(need <= 0)) {
-			txm = tx_pkts[nb_tx];
-
-			/* Do VLAN tag insertion */
-			if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
-				error = rte_vlan_insert(&txm);
-				if (unlikely(error)) {
-					rte_pktmbuf_free(txm);
-					++nb_tx;
-					continue;
-				}
-			}
-
-			/* Enqueue Packet buffers */
-			error = virtqueue_enqueue_xmit(txvq, txm);
-			if (unlikely(error)) {
-				if (error == ENOSPC)
-					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
-				else if (error == EMSGSIZE)
-					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1");
-				else
-					PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d", error);
-				break;
-			}
-			nb_tx++;
-			txvq->bytes += txm->pkt_len;
-		} else {
-			PMD_TX_LOG(ERR, "No free tx descriptors to transmit");
-			break;
-		}
-	}
-
-	txvq->packets += nb_tx;
-
-	if (likely(nb_tx)) {
-		vq_update_avail_idx(txvq);
-
-		if (unlikely(virtqueue_kick_prepare(txvq))) {
-			virtqueue_notify(txvq);
-			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
-		}
-	}
-
-	return nb_tx;
-}
diff --git a/lib/librte_pmd_virtio/virtqueue.c b/lib/librte_pmd_virtio/virtqueue.c
deleted file mode 100644
index 8a3005f..0000000
--- a/lib/librte_pmd_virtio/virtqueue.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <stdint.h>
-
-#include <rte_mbuf.h>
-
-#include "virtqueue.h"
-#include "virtio_logs.h"
-#include "virtio_pci.h"
-
-void
-virtqueue_disable_intr(struct virtqueue *vq)
-{
-	/*
-	 * Set VRING_AVAIL_F_NO_INTERRUPT to hint host
-	 * not to interrupt when it consumes packets
-	 * Note: this is only considered a hint to the host
-	 */
-	vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
-}
-
-/*
- * Two types of mbuf to be cleaned:
- * 1) mbuf that has been consumed by backend but not used by virtio.
- * 2) mbuf that hasn't been consued by backend.
- */
-struct rte_mbuf *
-virtqueue_detatch_unused(struct virtqueue *vq)
-{
-	struct rte_mbuf *cookie;
-	int idx;
-
-	for (idx = 0; idx < vq->vq_nentries; idx++) {
-		if ((cookie = vq->vq_descx[idx].cookie) != NULL) {
-			vq->vq_descx[idx].cookie = NULL;
-			return cookie;
-		}
-	}
-	return NULL;
-}
diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h
deleted file mode 100644
index 9d6079e..0000000
--- a/lib/librte_pmd_virtio/virtqueue.h
+++ /dev/null
@@ -1,325 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTQUEUE_H_
-#define _VIRTQUEUE_H_
-
-#include <stdint.h>
-
-#include <rte_atomic.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_mempool.h>
-
-#include "virtio_pci.h"
-#include "virtio_ring.h"
-#include "virtio_logs.h"
-
-struct rte_mbuf;
-
-/*
- * Per virtio_config.h in Linux.
- *     For virtio_pci on SMP, we don't need to order with respect to MMIO
- *     accesses through relaxed memory I/O windows, so smp_mb() et al are
- *     sufficient.
- *
- * This driver is for virtio_pci on SMP and therefore can assume
- * weaker (compiler barriers)
- */
-#define virtio_mb()	rte_mb()
-#define virtio_rmb()	rte_compiler_barrier()
-#define virtio_wmb()	rte_compiler_barrier()
-
-#ifdef RTE_PMD_PACKET_PREFETCH
-#define rte_packet_prefetch(p)  rte_prefetch1(p)
-#else
-#define rte_packet_prefetch(p)  do {} while(0)
-#endif
-
-#define VIRTQUEUE_MAX_NAME_SZ 32
-
-#define RTE_MBUF_DATA_DMA_ADDR(mb) \
-	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
-
-#define VTNET_SQ_RQ_QUEUE_IDX 0
-#define VTNET_SQ_TQ_QUEUE_IDX 1
-#define VTNET_SQ_CQ_QUEUE_IDX 2
-
-enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
-/**
- * The maximum virtqueue size is 2^15. Use that value as the end of
- * descriptor chain terminator since it will never be a valid index
- * in the descriptor table. This is used to verify we are correctly
- * handling vq_free_cnt.
- */
-#define VQ_RING_DESC_CHAIN_END 32768
-
-/**
- * Control the RX mode, ie. promiscuous, allmulti, etc...
- * All commands require an "out" sg entry containing a 1 byte
- * state value, zero = disable, non-zero = enable.  Commands
- * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
- * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
- */
-#define VIRTIO_NET_CTRL_RX              0
-#define VIRTIO_NET_CTRL_RX_PROMISC      0
-#define VIRTIO_NET_CTRL_RX_ALLMULTI     1
-#define VIRTIO_NET_CTRL_RX_ALLUNI       2
-#define VIRTIO_NET_CTRL_RX_NOMULTI      3
-#define VIRTIO_NET_CTRL_RX_NOUNI        4
-#define VIRTIO_NET_CTRL_RX_NOBCAST      5
-
-/**
- * Control the MAC
- *
- * The MAC filter table is managed by the hypervisor, the guest should
- * assume the size is infinite.  Filtering should be considered
- * non-perfect, ie. based on hypervisor resources, the guest may
- * received packets from sources not specified in the filter list.
- *
- * In addition to the class/cmd header, the TABLE_SET command requires
- * two out scatterlists.  Each contains a 4 byte count of entries followed
- * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
- * first sg list contains unicast addresses, the second is for multicast.
- * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
- * is available.
- *
- * The ADDR_SET command requests one out scatterlist, it contains a
- * 6 bytes MAC address. This functionality is present if the
- * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
- */
-struct virtio_net_ctrl_mac {
-	uint32_t entries;
-	uint8_t macs[][ETHER_ADDR_LEN];
-} __attribute__((__packed__));
-
-#define VIRTIO_NET_CTRL_MAC    1
- #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
- #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
-
-/**
- * Control VLAN filtering
- *
- * The VLAN filter table is controlled via a simple ADD/DEL interface.
- * VLAN IDs not added may be filtered by the hypervisor.  Del is the
- * opposite of add.  Both commands expect an out entry containing a 2
- * byte VLAN ID.  VLAN filtering is available with the
- * VIRTIO_NET_F_CTRL_VLAN feature bit.
- */
-#define VIRTIO_NET_CTRL_VLAN     2
-#define VIRTIO_NET_CTRL_VLAN_ADD 0
-#define VIRTIO_NET_CTRL_VLAN_DEL 1
-
-struct virtio_net_ctrl_hdr {
-	uint8_t class;
-	uint8_t cmd;
-} __attribute__((packed));
-
-typedef uint8_t virtio_net_ctrl_ack;
-
-#define VIRTIO_NET_OK     0
-#define VIRTIO_NET_ERR    1
-
-#define VIRTIO_MAX_CTRL_DATA 2048
-
-struct virtio_pmd_ctrl {
-	struct virtio_net_ctrl_hdr hdr;
-	virtio_net_ctrl_ack status;
-	uint8_t data[VIRTIO_MAX_CTRL_DATA];
-};
-
-struct virtqueue {
-	struct virtio_hw         *hw;     /**< virtio_hw structure pointer. */
-	const struct rte_memzone *mz;     /**< mem zone to populate RX ring. */
-	const struct rte_memzone *virtio_net_hdr_mz; /**< memzone to populate hdr. */
-	struct rte_mempool       *mpool;  /**< mempool for mbuf allocation */
-	uint16_t    queue_id;             /**< DPDK queue index. */
-	uint8_t     port_id;              /**< Device port identifier. */
-	uint16_t    vq_queue_index;       /**< PCI queue index */
-
-	void        *vq_ring_virt_mem;    /**< linear address of vring*/
-	unsigned int vq_ring_size;
-	phys_addr_t vq_ring_mem;          /**< physical address of vring */
-
-	struct vring vq_ring;    /**< vring keeping desc, used and avail */
-	uint16_t    vq_free_cnt; /**< num of desc available */
-	uint16_t    vq_nentries; /**< vring desc numbers */
-	uint16_t    vq_free_thresh; /**< free threshold */
-	/**
-	 * Head of the free chain in the descriptor table. If
-	 * there are no free descriptors, this will be set to
-	 * VQ_RING_DESC_CHAIN_END.
-	 */
-	uint16_t  vq_desc_head_idx;
-	uint16_t  vq_desc_tail_idx;
-	/**
-	 * Last consumed descriptor in the used table,
-	 * trails vq_ring.used->idx.
-	 */
-	uint16_t vq_used_cons_idx;
-	uint16_t vq_avail_idx;
-	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
-
-	/* Statistics */
-	uint64_t	packets;
-	uint64_t	bytes;
-	uint64_t	errors;
-
-	struct vq_desc_extra {
-		void              *cookie;
-		uint16_t          ndescs;
-	} vq_descx[0];
-};
-
-/* If multiqueue is provided by host, then we suppport it. */
-#ifndef VIRTIO_NET_F_MQ
-/* Device supports Receive Flow Steering */
-#define VIRTIO_NET_F_MQ 0x400000
-#define VIRTIO_NET_CTRL_MQ   4
-#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
-#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
-#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
-#endif
-#ifndef VIRTIO_NET_F_CTRL_MAC_ADDR
-#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000
-#define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
-#endif
-
-/**
- * This is the first element of the scatter-gather list.  If you don't
- * specify GSO or CSUM features, you can simply ignore the header.
- */
-struct virtio_net_hdr {
-#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
-	uint8_t flags;
-#define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
-#define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
-#define VIRTIO_NET_HDR_GSO_UDP      3    /**< GSO frame, IPv4 UDP (UFO) */
-#define VIRTIO_NET_HDR_GSO_TCPV6    4    /**< GSO frame, IPv6 TCP */
-#define VIRTIO_NET_HDR_GSO_ECN      0x80 /**< TCP has ECN set */
-	uint8_t gso_type;
-	uint16_t hdr_len;     /**< Ethernet + IP + tcp/udp hdrs */
-	uint16_t gso_size;    /**< Bytes to append to hdr_len per frame */
-	uint16_t csum_start;  /**< Position to start checksumming from */
-	uint16_t csum_offset; /**< Offset after that to place checksum */
-};
-
-/**
- * This is the version of the header to use when the MRG_RXBUF
- * feature has been negotiated.
- */
-struct virtio_net_hdr_mrg_rxbuf {
-	struct   virtio_net_hdr hdr;
-	uint16_t num_buffers; /**< Number of merged rx buffers */
-};
-
-/**
- * Tell the backend not to interrupt us.
- */
-void virtqueue_disable_intr(struct virtqueue *vq);
-/**
- *  Dump virtqueue internal structures, for debug purpose only.
- */
-void virtqueue_dump(struct virtqueue *vq);
-/**
- *  Get all mbufs to be freed.
- */
-struct rte_mbuf *virtqueue_detatch_unused(struct virtqueue *vq);
-
-static inline int
-virtqueue_full(const struct virtqueue *vq)
-{
-	return vq->vq_free_cnt == 0;
-}
-
-#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx))
-
-static inline void
-vq_update_avail_idx(struct virtqueue *vq)
-{
-	virtio_wmb();
-	vq->vq_ring.avail->idx = vq->vq_avail_idx;
-}
-
-static inline void
-vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
-{
-	uint16_t avail_idx;
-	/*
-	 * Place the head of the descriptor chain into the next slot and make
-	 * it usable to the host. The chain is made available now rather than
-	 * deferring to virtqueue_notify() in the hopes that if the host is
-	 * currently running on another CPU, we can keep it processing the new
-	 * descriptor.
-	 */
-	avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
-	vq->vq_ring.avail->ring[avail_idx] = desc_idx;
-	vq->vq_avail_idx++;
-}
-
-static inline int
-virtqueue_kick_prepare(struct virtqueue *vq)
-{
-	return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY);
-}
-
-static inline void
-virtqueue_notify(struct virtqueue *vq)
-{
-	/*
-	 * Ensure updated avail->idx is visible to host.
-	 * For virtio on IA, the notificaiton is through io port operation
-	 * which is a serialization instruction itself.
-	 */
-	VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_NOTIFY, vq->vq_queue_index);
-}
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
-#define VIRTQUEUE_DUMP(vq) do { \
-	uint16_t used_idx, nused; \
-	used_idx = (vq)->vq_ring.used->idx; \
-	nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
-	PMD_INIT_LOG(DEBUG, \
-	  "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
-	  " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
-	  " avail.flags=0x%x; used.flags=0x%x", \
-	  (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \
-	  (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \
-	  (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \
-	  (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \
-} while (0)
-#else
-#define VIRTQUEUE_DUMP(vq) do { } while (0)
-#endif
-
-#endif /* _VIRTQUEUE_H_ */
-- 
2.1.0

^ permalink raw reply	[relevance 1%]

* Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
  2015-05-13 13:52  0%     ` De Lara Guarch, Pablo
@ 2015-05-13 14:20  0%       ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-05-13 14:20 UTC (permalink / raw)
  To: De Lara Guarch, Pablo; +Cc: dev

On Wed, May 13, 2015 at 01:52:33PM +0000, De Lara Guarch, Pablo wrote:
> Hi Neil,
> 
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Tuesday, May 12, 2015 4:33 PM
> > To: De Lara Guarch, Pablo
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
> > 
> > On Tue, May 12, 2015 at 12:02:32PM +0100, Pablo de Lara wrote:
> > > Jenkins hash function was developed originally in 1996,
> > > and was integrated in first versions of DPDK.
> > > The function has been improved in 2006,
> > > achieving up to 60% better performance, compared to the original one.
> > >
> > > This patchset updates the current jhash in DPDK,
> > > including two new functions that generate two hashes from a single key.
> > >
> > > It also separates the existing hash function performance tests to
> > > another file, to make it quicker to run.
> > >
> > > changes in v4:
> > > - Simplify key alignment checks
> > > - Include missing x86 arch check
> > >
> > > changes in v3:
> > >
> > > - Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
> > >   functions
> > >
> > > changes in v2:
> > >
> > > - Split single commit in three commits, one that updates the existing
> > functions
> > >   and another that adds two new functions and use one of those functions
> > >   as a base to be called by the other ones.
> > > - Remove some unnecessary ifdefs in the code.
> > > - Add new macros to help on the reutilization of constants
> > > - Separate hash function performance tests to another file
> > >   and improve cycle measurements.
> > > - Rename existing function rte_jhash2 to rte_jhash_32b
> > >   (something more meaninful) and mark rte_jhash2 as
> > >   deprecated
> > >
> > > Pablo de Lara (6):
> > >   test/hash: move hash function perf tests to separate file
> > >   test/hash: improve accuracy on cycle measurements
> > >   hash: update jhash function with the latest available
> > >   hash: add two new functions to jhash library
> > >   hash: remove duplicated code
> > >   hash: rename rte_jhash2 to rte_jhash_32b
> > >
> > >  app/test/Makefile               |    1 +
> > >  app/test/test_func_reentrancy.c |    2 +-
> > >  app/test/test_hash.c            |    4 +-
> > >  app/test/test_hash_func_perf.c  |  145 +++++++++++++++++
> > >  app/test/test_hash_perf.c       |   71 +--------
> > >  lib/librte_hash/rte_jhash.h     |  338 +++++++++++++++++++++++++++++-
> > ---------
> > >  6 files changed, 402 insertions(+), 159 deletions(-)
> > >  create mode 100644 app/test/test_hash_func_perf.c
> > >
> > > --
> > > 1.7.4.1
> > >
> > >
> > did you run this through the ABI checker?  I see you're removing several
> > symbols
> > that will likely need to go through the ABI deprecation process.
> > 
> > Neil
> 
> I had not run it, but I just did. I see no problems on librte_hash
> (but I see some on rte_ethdev.h, due to another commit).
> 
> Anyway, I renamed two functions to be more meaningful, but those functions are "static inline", 
> so I am not sure exactly what the deprecation process is for those.
> What I did was leaving the original function that calls the same function as the new renamed one,
> but adds a line warning that the functions is deprecated.
> 
> Is that OK or should I do it differently?
> 
As long as their all static inline and binaries that are already compiled can
continue to access the data structures they reference at the member offsets
encoded to them at compile time, you should be ok.

Thanks!
Neil

> Thanks!
> Pablo
> 

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
  2015-05-12 15:33  4%   ` Neil Horman
@ 2015-05-13 13:52  0%     ` De Lara Guarch, Pablo
  2015-05-13 14:20  0%       ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: De Lara Guarch, Pablo @ 2015-05-13 13:52 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

Hi Neil,

> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Tuesday, May 12, 2015 4:33 PM
> To: De Lara Guarch, Pablo
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
> 
> On Tue, May 12, 2015 at 12:02:32PM +0100, Pablo de Lara wrote:
> > Jenkins hash function was developed originally in 1996,
> > and was integrated in first versions of DPDK.
> > The function has been improved in 2006,
> > achieving up to 60% better performance, compared to the original one.
> >
> > This patchset updates the current jhash in DPDK,
> > including two new functions that generate two hashes from a single key.
> >
> > It also separates the existing hash function performance tests to
> > another file, to make it quicker to run.
> >
> > changes in v4:
> > - Simplify key alignment checks
> > - Include missing x86 arch check
> >
> > changes in v3:
> >
> > - Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
> >   functions
> >
> > changes in v2:
> >
> > - Split single commit in three commits, one that updates the existing
> functions
> >   and another that adds two new functions and use one of those functions
> >   as a base to be called by the other ones.
> > - Remove some unnecessary ifdefs in the code.
> > - Add new macros to help on the reutilization of constants
> > - Separate hash function performance tests to another file
> >   and improve cycle measurements.
> > - Rename existing function rte_jhash2 to rte_jhash_32b
> >   (something more meaninful) and mark rte_jhash2 as
> >   deprecated
> >
> > Pablo de Lara (6):
> >   test/hash: move hash function perf tests to separate file
> >   test/hash: improve accuracy on cycle measurements
> >   hash: update jhash function with the latest available
> >   hash: add two new functions to jhash library
> >   hash: remove duplicated code
> >   hash: rename rte_jhash2 to rte_jhash_32b
> >
> >  app/test/Makefile               |    1 +
> >  app/test/test_func_reentrancy.c |    2 +-
> >  app/test/test_hash.c            |    4 +-
> >  app/test/test_hash_func_perf.c  |  145 +++++++++++++++++
> >  app/test/test_hash_perf.c       |   71 +--------
> >  lib/librte_hash/rte_jhash.h     |  338 +++++++++++++++++++++++++++++-
> ---------
> >  6 files changed, 402 insertions(+), 159 deletions(-)
> >  create mode 100644 app/test/test_hash_func_perf.c
> >
> > --
> > 1.7.4.1
> >
> >
> did you run this through the ABI checker?  I see you're removing several
> symbols
> that will likely need to go through the ABI deprecation process.
> 
> Neil

I had not run it, but I just did. I see no problems on librte_hash
(but I see some on rte_ethdev.h, due to another commit).

Anyway, I renamed two functions to be more meaningful, but those functions are "static inline", 
so I am not sure exactly what the deprecation process is for those.
What I did was leaving the original function that calls the same function as the new renamed one,
but adds a line warning that the functions is deprecated.

Is that OK or should I do it differently?

Thanks!
Pablo

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH 0/3] eal: uio irq fixes and enhancements
  2015-05-13  8:57  3%   ` Bruce Richardson
@ 2015-05-13  9:32  0%     ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-05-13  9:32 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev

2015-05-13 09:57, Bruce Richardson:
> On Tue, May 12, 2015 at 10:02:20PM +0200, Thomas Monjalon wrote:
> > 2015-04-28 09:36, Stephen Hemminger:
> > > This set of patches starts out with fixing a regression where
> > > uio_pci_generic broke link state interrupt, then adds better
> > > management of PCI config space.
> > > 
> > > Will leave up to document writers to update various release
> > > notes and API manuals as they see fit.
> > > 
> > > Also, needs what ever shared library map file updates which
> > > maybe required when using dynamic libraries. But that should
> > > not stop acceptance of this patch set.
> > 
> > No, an incomplete patch cannot be accepted.
> > There are several solutions:
> > - Siobhan and Neil accept to work on doc and .map file
> > - You provide a good v2
> > - Someone else finish this patchset
> > - The bug remains (not a solution)
> 
> Merge patch one on it's own to fix the issue? I don't think patch 1 requires
> any further doc or ABI map file changes, does it?

Yes you're right.
First patch is now applied.

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH 0/3] eal: uio irq fixes and enhancements
  @ 2015-05-13  8:57  3%   ` Bruce Richardson
  2015-05-13  9:32  0%     ` Thomas Monjalon
  0 siblings, 1 reply; 200+ results
From: Bruce Richardson @ 2015-05-13  8:57 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Tue, May 12, 2015 at 10:02:20PM +0200, Thomas Monjalon wrote:
> 2015-04-28 09:36, Stephen Hemminger:
> > This set of patches starts out with fixing a regression where
> > uio_pci_generic broke link state interrupt, then adds better
> > management of PCI config space.
> > 
> > Will leave up to document writers to update various release
> > notes and API manuals as they see fit.
> > 
> > Also, needs what ever shared library map file updates which
> > maybe required when using dynamic libraries. But that should
> > not stop acceptance of this patch set.
> 
> No, an incomplete patch cannot be accepted.
> There are several solutions:
> - Siobhan and Neil accept to work on doc and .map file
> - You provide a good v2
> - Someone else finish this patchset
> - The bug remains (not a solution)

Merge patch one on it's own to fix the issue? I don't think patch 1 requires
any further doc or ABI map file changes, does it?

/Bruce

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH 14/19] virtio: move virtio PMD to drivers directory
  @ 2015-05-12 17:05  1%   ` Bruce Richardson
    1 sibling, 0 replies; 200+ results
From: Bruce Richardson @ 2015-05-12 17:05 UTC (permalink / raw)
  To: dev

Move virtio PMD to drivers directory

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/Makefile                                 |    2 +-
 drivers/virtio/Makefile                          |   60 +
 drivers/virtio/rte_pmd_virtio_version.map        |    4 +
 drivers/virtio/virtio_ethdev.c                   | 1504 ++++++++++++++++++++++
 drivers/virtio/virtio_ethdev.h                   |  124 ++
 drivers/virtio/virtio_logs.h                     |   70 +
 drivers/virtio/virtio_pci.c                      |  147 +++
 drivers/virtio/virtio_pci.h                      |  270 ++++
 drivers/virtio/virtio_ring.h                     |  163 +++
 drivers/virtio/virtio_rxtx.c                     |  815 ++++++++++++
 drivers/virtio/virtqueue.c                       |   70 +
 drivers/virtio/virtqueue.h                       |  325 +++++
 lib/Makefile                                     |    1 -
 lib/librte_pmd_virtio/Makefile                   |   60 -
 lib/librte_pmd_virtio/rte_pmd_virtio_version.map |    4 -
 lib/librte_pmd_virtio/virtio_ethdev.c            | 1504 ----------------------
 lib/librte_pmd_virtio/virtio_ethdev.h            |  124 --
 lib/librte_pmd_virtio/virtio_logs.h              |   70 -
 lib/librte_pmd_virtio/virtio_pci.c               |  147 ---
 lib/librte_pmd_virtio/virtio_pci.h               |  270 ----
 lib/librte_pmd_virtio/virtio_ring.h              |  163 ---
 lib/librte_pmd_virtio/virtio_rxtx.c              |  815 ------------
 lib/librte_pmd_virtio/virtqueue.c                |   70 -
 lib/librte_pmd_virtio/virtqueue.h                |  325 -----
 24 files changed, 3553 insertions(+), 3554 deletions(-)
 create mode 100644 drivers/virtio/Makefile
 create mode 100644 drivers/virtio/rte_pmd_virtio_version.map
 create mode 100644 drivers/virtio/virtio_ethdev.c
 create mode 100644 drivers/virtio/virtio_ethdev.h
 create mode 100644 drivers/virtio/virtio_logs.h
 create mode 100644 drivers/virtio/virtio_pci.c
 create mode 100644 drivers/virtio/virtio_pci.h
 create mode 100644 drivers/virtio/virtio_ring.h
 create mode 100644 drivers/virtio/virtio_rxtx.c
 create mode 100644 drivers/virtio/virtqueue.c
 create mode 100644 drivers/virtio/virtqueue.h
 delete mode 100644 lib/librte_pmd_virtio/Makefile
 delete mode 100644 lib/librte_pmd_virtio/rte_pmd_virtio_version.map
 delete mode 100644 lib/librte_pmd_virtio/virtio_ethdev.c
 delete mode 100644 lib/librte_pmd_virtio/virtio_ethdev.h
 delete mode 100644 lib/librte_pmd_virtio/virtio_logs.h
 delete mode 100644 lib/librte_pmd_virtio/virtio_pci.c
 delete mode 100644 lib/librte_pmd_virtio/virtio_pci.h
 delete mode 100644 lib/librte_pmd_virtio/virtio_ring.h
 delete mode 100644 lib/librte_pmd_virtio/virtio_rxtx.c
 delete mode 100644 lib/librte_pmd_virtio/virtqueue.c
 delete mode 100644 lib/librte_pmd_virtio/virtqueue.h

diff --git a/drivers/Makefile b/drivers/Makefile
index 567d77f..7d848e1 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -42,7 +42,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_NULL) += null
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
-#DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
+DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 #DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 #DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
 
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
new file mode 100644
index 0000000..21ff7e5
--- /dev/null
+++ b/drivers/virtio/Makefile
@@ -0,0 +1,60 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_virtio.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_pmd_virtio_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
+
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_net lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/virtio/rte_pmd_virtio_version.map b/drivers/virtio/rte_pmd_virtio_version.map
new file mode 100644
index 0000000..ef35398
--- /dev/null
+++ b/drivers/virtio/rte_pmd_virtio_version.map
@@ -0,0 +1,4 @@
+DPDK_2.0 {
+
+	local: *;
+};
diff --git a/drivers/virtio/virtio_ethdev.c b/drivers/virtio/virtio_ethdev.c
new file mode 100644
index 0000000..e63dbfb
--- /dev/null
+++ b/drivers/virtio/virtio_ethdev.c
@@ -0,0 +1,1504 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#include <dirent.h>
+#include <fcntl.h>
+#endif
+
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_pci.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+
+#include "virtio_ethdev.h"
+#include "virtio_pci.h"
+#include "virtio_logs.h"
+#include "virtqueue.h"
+
+
+static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
+static int  virtio_dev_configure(struct rte_eth_dev *dev);
+static int  virtio_dev_start(struct rte_eth_dev *dev);
+static void virtio_dev_stop(struct rte_eth_dev *dev);
+static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
+static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
+static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
+static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
+static void virtio_dev_info_get(struct rte_eth_dev *dev,
+				struct rte_eth_dev_info *dev_info);
+static int virtio_dev_link_update(struct rte_eth_dev *dev,
+	__rte_unused int wait_to_complete);
+
+static void virtio_set_hwaddr(struct virtio_hw *hw);
+static void virtio_get_hwaddr(struct virtio_hw *hw);
+
+static void virtio_dev_rx_queue_release(__rte_unused void *rxq);
+static void virtio_dev_tx_queue_release(__rte_unused void *txq);
+
+static void virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
+static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
+static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
+static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
+				uint16_t vlan_id, int on);
+static void virtio_mac_addr_add(struct rte_eth_dev *dev,
+				struct ether_addr *mac_addr,
+				uint32_t index, uint32_t vmdq __rte_unused);
+static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
+static void virtio_mac_addr_set(struct rte_eth_dev *dev,
+				struct ether_addr *mac_addr);
+
+static int virtio_dev_queue_stats_mapping_set(
+	__rte_unused struct rte_eth_dev *eth_dev,
+	__rte_unused uint16_t queue_id,
+	__rte_unused uint8_t stat_idx,
+	__rte_unused uint8_t is_rx);
+
+/*
+ * The set of PCI devices this driver supports
+ */
+static const struct rte_pci_id pci_id_virtio_map[] = {
+
+#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
+#include "rte_pci_dev_ids.h"
+
+{ .vendor_id = 0, /* sentinel */ },
+};
+
+static int
+virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
+		int *dlen, int pkt_num)
+{
+	uint16_t head = vq->vq_desc_head_idx, i;
+	int k, sum = 0;
+	virtio_net_ctrl_ack status = ~0;
+	struct virtio_pmd_ctrl result;
+
+	ctrl->status = status;
+
+	if (!vq->hw->cvq) {
+		PMD_INIT_LOG(ERR,
+			     "%s(): Control queue is not supported.",
+			     __func__);
+		return -1;
+	}
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
+		"vq->hw->cvq = %p vq = %p",
+		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
+
+	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+		return -1;
+
+	memcpy(vq->virtio_net_hdr_mz->addr, ctrl,
+		sizeof(struct virtio_pmd_ctrl));
+
+	/*
+	 * Format is enforced in qemu code:
+	 * One TX packet for header;
+	 * At least one TX packet per argument;
+	 * One RX packet for ACK.
+	 */
+	vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
+	vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
+	vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
+	vq->vq_free_cnt--;
+	i = vq->vq_ring.desc[head].next;
+
+	for (k = 0; k < pkt_num; k++) {
+		vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
+		vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+			+ sizeof(struct virtio_net_ctrl_hdr)
+			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
+		vq->vq_ring.desc[i].len = dlen[k];
+		sum += dlen[k];
+		vq->vq_free_cnt--;
+		i = vq->vq_ring.desc[i].next;
+	}
+
+	vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+	vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+			+ sizeof(struct virtio_net_ctrl_hdr);
+	vq->vq_ring.desc[i].len = sizeof(ctrl->status);
+	vq->vq_free_cnt--;
+
+	vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
+
+	vq_update_avail_ring(vq, head);
+	vq_update_avail_idx(vq);
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
+
+	virtqueue_notify(vq);
+
+	rte_rmb();
+	while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+		rte_rmb();
+		usleep(100);
+	}
+
+	while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+		uint32_t idx, desc_idx, used_idx;
+		struct vring_used_elem *uep;
+
+		used_idx = (uint32_t)(vq->vq_used_cons_idx
+				& (vq->vq_nentries - 1));
+		uep = &vq->vq_ring.used->ring[used_idx];
+		idx = (uint32_t) uep->id;
+		desc_idx = idx;
+
+		while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
+			desc_idx = vq->vq_ring.desc[desc_idx].next;
+			vq->vq_free_cnt++;
+		}
+
+		vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
+		vq->vq_desc_head_idx = idx;
+
+		vq->vq_used_cons_idx++;
+		vq->vq_free_cnt++;
+	}
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
+			vq->vq_free_cnt, vq->vq_desc_head_idx);
+
+	memcpy(&result, vq->virtio_net_hdr_mz->addr,
+			sizeof(struct virtio_pmd_ctrl));
+
+	return result.status;
+}
+
+static int
+virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
+	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
+
+	dlen[0] = sizeof(uint16_t);
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
+			  "failed, this is too late now...");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int virtio_dev_queue_setup(struct rte_eth_dev *dev,
+			int queue_type,
+			uint16_t queue_idx,
+			uint16_t  vtpci_queue_idx,
+			uint16_t nb_desc,
+			unsigned int socket_id,
+			struct virtqueue **pvq)
+{
+	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
+	const struct rte_memzone *mz;
+	uint16_t vq_size;
+	int size;
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtqueue  *vq = NULL;
+
+	/* Write the virtqueue index to the Queue Select Field */
+	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vtpci_queue_idx);
+	PMD_INIT_LOG(DEBUG, "selecting queue: %d", vtpci_queue_idx);
+
+	/*
+	 * Read the virtqueue size from the Queue Size field
+	 * Always power of 2 and if 0 virtqueue does not exist
+	 */
+	vq_size = VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
+	PMD_INIT_LOG(DEBUG, "vq_size: %d nb_desc:%d", vq_size, nb_desc);
+	if (nb_desc == 0)
+		nb_desc = vq_size;
+	if (vq_size == 0) {
+		PMD_INIT_LOG(ERR, "%s: virtqueue does not exist", __func__);
+		return -EINVAL;
+	} else if (!rte_is_power_of_2(vq_size)) {
+		PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2", __func__);
+		return -EINVAL;
+	} else if (nb_desc != vq_size) {
+		PMD_INIT_LOG(ERR, "Warning: nb_desc(%d) is not equal to vq size (%d), fall to vq size",
+			nb_desc, vq_size);
+		nb_desc = vq_size;
+	}
+
+	if (queue_type == VTNET_RQ) {
+		snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d",
+			dev->data->port_id, queue_idx);
+		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
+			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
+	} else if (queue_type == VTNET_TQ) {
+		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
+			dev->data->port_id, queue_idx);
+		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
+			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
+	} else if (queue_type == VTNET_CQ) {
+		snprintf(vq_name, sizeof(vq_name), "port%d_cvq",
+			dev->data->port_id);
+		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
+			vq_size * sizeof(struct vq_desc_extra),
+			RTE_CACHE_LINE_SIZE);
+	}
+	if (vq == NULL) {
+		PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
+		return (-ENOMEM);
+	}
+
+	vq->hw = hw;
+	vq->port_id = dev->data->port_id;
+	vq->queue_id = queue_idx;
+	vq->vq_queue_index = vtpci_queue_idx;
+	vq->vq_nentries = vq_size;
+	vq->vq_free_cnt = vq_size;
+
+	/*
+	 * Reserve a memzone for vring elements
+	 */
+	size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
+	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
+	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size);
+
+	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
+		socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
+	if (mz == NULL) {
+		rte_free(vq);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
+	 * and only accepts 32 bit page frame number.
+	 * Check if the allocated physical memory exceeds 16TB.
+	 */
+	if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
+		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
+		rte_free(vq);
+		return -ENOMEM;
+	}
+
+	memset(mz->addr, 0, sizeof(mz->len));
+	vq->mz = mz;
+	vq->vq_ring_mem = mz->phys_addr;
+	vq->vq_ring_virt_mem = mz->addr;
+	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%"PRIx64, (uint64_t)mz->phys_addr);
+	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)mz->addr);
+	vq->virtio_net_hdr_mz  = NULL;
+	vq->virtio_net_hdr_mem = 0;
+
+	if (queue_type == VTNET_TQ) {
+		/*
+		 * For each xmit packet, allocate a virtio_net_hdr
+		 */
+		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
+			dev->data->port_id, queue_idx);
+		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
+			vq_size * hw->vtnet_hdr_size,
+			socket_id, 0, RTE_CACHE_LINE_SIZE);
+		if (vq->virtio_net_hdr_mz == NULL) {
+			rte_free(vq);
+			return -ENOMEM;
+		}
+		vq->virtio_net_hdr_mem =
+			vq->virtio_net_hdr_mz->phys_addr;
+		memset(vq->virtio_net_hdr_mz->addr, 0,
+			vq_size * hw->vtnet_hdr_size);
+	} else if (queue_type == VTNET_CQ) {
+		/* Allocate a page for control vq command, data and status */
+		snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
+			dev->data->port_id);
+		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
+			PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE);
+		if (vq->virtio_net_hdr_mz == NULL) {
+			rte_free(vq);
+			return -ENOMEM;
+		}
+		vq->virtio_net_hdr_mem =
+			vq->virtio_net_hdr_mz->phys_addr;
+		memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
+	}
+
+	/*
+	 * Set guest physical address of the virtqueue
+	 * in VIRTIO_PCI_QUEUE_PFN config register of device
+	 */
+	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN,
+			mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	*pvq = vq;
+	return 0;
+}
+
+static int
+virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
+		uint32_t socket_id)
+{
+	struct virtqueue *vq;
+	uint16_t nb_desc = 0;
+	int ret;
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+	ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
+			vtpci_queue_idx, nb_desc, socket_id, &vq);
+
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "control vq initialization failed");
+		return ret;
+	}
+
+	hw->cvq = vq;
+	return 0;
+}
+
+static void
+virtio_dev_close(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
+
+	/* reset the NIC */
+	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
+	vtpci_reset(hw);
+	hw->started = 0;
+	virtio_dev_free_mbufs(dev);
+}
+
+static void
+virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
+	ctrl.data[0] = 1;
+	dlen[0] = 1;
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret)
+		PMD_INIT_LOG(ERR, "Failed to enable promisc");
+}
+
+static void
+virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
+	ctrl.data[0] = 0;
+	dlen[0] = 1;
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret)
+		PMD_INIT_LOG(ERR, "Failed to disable promisc");
+}
+
+static void
+virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
+	ctrl.data[0] = 1;
+	dlen[0] = 1;
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret)
+		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
+}
+
+static void
+virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
+	ctrl.data[0] = 0;
+	dlen[0] = 1;
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret)
+		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
+}
+
+/*
+ * dev_ops for virtio, bare necessities for basic operation
+ */
+static const struct eth_dev_ops virtio_eth_dev_ops = {
+	.dev_configure           = virtio_dev_configure,
+	.dev_start               = virtio_dev_start,
+	.dev_stop                = virtio_dev_stop,
+	.dev_close               = virtio_dev_close,
+	.promiscuous_enable      = virtio_dev_promiscuous_enable,
+	.promiscuous_disable     = virtio_dev_promiscuous_disable,
+	.allmulticast_enable     = virtio_dev_allmulticast_enable,
+	.allmulticast_disable    = virtio_dev_allmulticast_disable,
+
+	.dev_infos_get           = virtio_dev_info_get,
+	.stats_get               = virtio_dev_stats_get,
+	.stats_reset             = virtio_dev_stats_reset,
+	.link_update             = virtio_dev_link_update,
+	.rx_queue_setup          = virtio_dev_rx_queue_setup,
+	/* meaningfull only to multiple queue */
+	.rx_queue_release        = virtio_dev_rx_queue_release,
+	.tx_queue_setup          = virtio_dev_tx_queue_setup,
+	/* meaningfull only to multiple queue */
+	.tx_queue_release        = virtio_dev_tx_queue_release,
+	/* collect stats per queue */
+	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
+	.vlan_filter_set         = virtio_vlan_filter_set,
+	.mac_addr_add            = virtio_mac_addr_add,
+	.mac_addr_remove         = virtio_mac_addr_remove,
+	.mac_addr_set            = virtio_mac_addr_set,
+};
+
+static inline int
+virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev,
+				struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = link;
+	struct rte_eth_link *src = &(dev->data->dev_link);
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+			*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+/**
+ * Atomically writes the link status information into global
+ * structure rte_eth_dev.
+ *
+ * @param dev
+ *   - Pointer to the structure rte_eth_dev to read from.
+ *   - Pointer to the buffer to be saved with the link status.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, negative value.
+ */
+static inline int
+virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+		struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = &(dev->data->dev_link);
+	struct rte_eth_link *src = link;
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+					*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+static void
+virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	unsigned i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const struct virtqueue *txvq = dev->data->tx_queues[i];
+		if (txvq == NULL)
+			continue;
+
+		stats->opackets += txvq->packets;
+		stats->obytes += txvq->bytes;
+		stats->oerrors += txvq->errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = txvq->packets;
+			stats->q_obytes[i] = txvq->bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const struct virtqueue *rxvq = dev->data->rx_queues[i];
+		if (rxvq == NULL)
+			continue;
+
+		stats->ipackets += rxvq->packets;
+		stats->ibytes += rxvq->bytes;
+		stats->ierrors += rxvq->errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = rxvq->packets;
+			stats->q_ibytes[i] = rxvq->bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+}
+
+static void
+virtio_dev_stats_reset(struct rte_eth_dev *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct virtqueue *txvq = dev->data->tx_queues[i];
+		if (txvq == NULL)
+			continue;
+
+		txvq->packets = 0;
+		txvq->bytes = 0;
+		txvq->errors = 0;
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct virtqueue *rxvq = dev->data->rx_queues[i];
+		if (rxvq == NULL)
+			continue;
+
+		rxvq->packets = 0;
+		rxvq->bytes = 0;
+		rxvq->errors = 0;
+	}
+
+	dev->data->rx_mbuf_alloc_failed = 0;
+}
+
+static void
+virtio_set_hwaddr(struct virtio_hw *hw)
+{
+	vtpci_write_dev_config(hw,
+			offsetof(struct virtio_net_config, mac),
+			&hw->mac_addr, ETHER_ADDR_LEN);
+}
+
+static void
+virtio_get_hwaddr(struct virtio_hw *hw)
+{
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
+		vtpci_read_dev_config(hw,
+			offsetof(struct virtio_net_config, mac),
+			&hw->mac_addr, ETHER_ADDR_LEN);
+	} else {
+		eth_random_addr(&hw->mac_addr[0]);
+		virtio_set_hwaddr(hw);
+	}
+}
+
+static int
+virtio_mac_table_set(struct virtio_hw *hw,
+		     const struct virtio_net_ctrl_mac *uc,
+		     const struct virtio_net_ctrl_mac *mc)
+{
+	struct virtio_pmd_ctrl ctrl;
+	int err, len[2];
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
+
+	len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries);
+	memcpy(ctrl.data, uc, len[0]);
+
+	len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries);
+	memcpy(ctrl.data + len[0], mc, len[1]);
+
+	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
+	if (err != 0)
+		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
+
+	return err;
+}
+
+static void
+virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+		    uint32_t index, uint32_t vmdq __rte_unused)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	const struct ether_addr *addrs = dev->data->mac_addrs;
+	unsigned int i;
+	struct virtio_net_ctrl_mac *uc, *mc;
+
+	if (index >= VIRTIO_MAX_MAC_ADDRS) {
+		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
+		return;
+	}
+
+	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
+	uc->entries = 0;
+	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
+	mc->entries = 0;
+
+	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
+		const struct ether_addr *addr
+			= (i == index) ? mac_addr : addrs + i;
+		struct virtio_net_ctrl_mac *tbl
+			= is_multicast_ether_addr(addr) ? mc : uc;
+
+		memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN);
+	}
+
+	virtio_mac_table_set(hw, uc, mc);
+}
+
+static void
+virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct ether_addr *addrs = dev->data->mac_addrs;
+	struct virtio_net_ctrl_mac *uc, *mc;
+	unsigned int i;
+
+	if (index >= VIRTIO_MAX_MAC_ADDRS) {
+		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
+		return;
+	}
+
+	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
+	uc->entries = 0;
+	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
+	mc->entries = 0;
+
+	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
+		struct virtio_net_ctrl_mac *tbl;
+
+		if (i == index || is_zero_ether_addr(addrs + i))
+			continue;
+
+		tbl = is_multicast_ether_addr(addrs + i) ? mc : uc;
+		memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN);
+	}
+
+	virtio_mac_table_set(hw, uc, mc);
+}
+
+static void
+virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN);
+
+	/* Use atomic update if available */
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
+		struct virtio_pmd_ctrl ctrl;
+		int len = ETHER_ADDR_LEN;
+
+		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
+		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
+
+		memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN);
+		virtio_send_command(hw->cvq, &ctrl, &len, 1);
+	} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
+		virtio_set_hwaddr(hw);
+}
+
+static int
+virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int len;
+
+	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
+		return -ENOTSUP;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
+	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
+	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
+	len = sizeof(vlan_id);
+
+	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
+}
+
+static void
+virtio_negotiate_features(struct virtio_hw *hw)
+{
+	uint32_t host_features, mask;
+
+	/* checksum offload not implemented */
+	mask = VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
+
+	/* TSO and LRO are only available when their corresponding
+	 * checksum offload feature is also negotiated.
+	 */
+	mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_ECN;
+	mask |= VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN;
+	mask |= VTNET_LRO_FEATURES;
+
+	/* not negotiating INDIRECT descriptor table support */
+	mask |= VIRTIO_RING_F_INDIRECT_DESC;
+
+	/* Prepare guest_features: feature that driver wants to support */
+	hw->guest_features = VTNET_FEATURES & ~mask;
+	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %x",
+		hw->guest_features);
+
+	/* Read device(host) feature bits */
+	host_features = VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES);
+	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %x",
+		host_features);
+
+	/*
+	 * Negotiate features: Subset of device feature bits are written back
+	 * guest feature bits.
+	 */
+	hw->guest_features = vtpci_negotiate_features(hw, host_features);
+	PMD_INIT_LOG(DEBUG, "features after negotiate = %x",
+		hw->guest_features);
+}
+
+#ifdef RTE_EXEC_ENV_LINUXAPP
+static int
+parse_sysfs_value(const char *filename, unsigned long *val)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		PMD_INIT_LOG(ERR, "%s(): cannot open sysfs value %s",
+			     __func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		PMD_INIT_LOG(ERR, "%s(): cannot read sysfs value %s",
+			     __func__, filename);
+		fclose(f);
+		return -1;
+	}
+	*val = strtoul(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		PMD_INIT_LOG(ERR, "%s(): cannot parse sysfs value %s",
+			     __func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+static int get_uio_dev(struct rte_pci_addr *loc, char *buf, unsigned int buflen,
+			unsigned int *uio_num)
+{
+	struct dirent *e;
+	DIR *dir;
+	char dirname[PATH_MAX];
+
+	/* depending on kernel version, uio can be located in uio/uioX
+	 * or uio:uioX */
+	snprintf(dirname, sizeof(dirname),
+		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
+		     loc->domain, loc->bus, loc->devid, loc->function);
+	dir = opendir(dirname);
+	if (dir == NULL) {
+		/* retry with the parent directory */
+		snprintf(dirname, sizeof(dirname),
+			     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
+			     loc->domain, loc->bus, loc->devid, loc->function);
+		dir = opendir(dirname);
+
+		if (dir == NULL) {
+			PMD_INIT_LOG(ERR, "Cannot opendir %s", dirname);
+			return -1;
+		}
+	}
+
+	/* take the first file starting with "uio" */
+	while ((e = readdir(dir)) != NULL) {
+		/* format could be uio%d ...*/
+		int shortprefix_len = sizeof("uio") - 1;
+		/* ... or uio:uio%d */
+		int longprefix_len = sizeof("uio:uio") - 1;
+		char *endptr;
+
+		if (strncmp(e->d_name, "uio", 3) != 0)
+			continue;
+
+		/* first try uio%d */
+		errno = 0;
+		*uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
+			snprintf(buf, buflen, "%s/uio%u", dirname, *uio_num);
+			break;
+		}
+
+		/* then try uio:uio%d */
+		errno = 0;
+		*uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
+			snprintf(buf, buflen, "%s/uio:uio%u", dirname,
+				     *uio_num);
+			break;
+		}
+	}
+	closedir(dir);
+
+	/* No uio resource found */
+	if (e == NULL) {
+		PMD_INIT_LOG(ERR, "Could not find uio resource");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+virtio_has_msix(const struct rte_pci_addr *loc)
+{
+	DIR *d;
+	char dirname[PATH_MAX];
+
+	snprintf(dirname, sizeof(dirname),
+		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/msi_irqs",
+		     loc->domain, loc->bus, loc->devid, loc->function);
+
+	d = opendir(dirname);
+	if (d)
+		closedir(d);
+
+	return (d != NULL);
+}
+
+/* Extract I/O port numbers from sysfs */
+static int virtio_resource_init_by_uio(struct rte_pci_device *pci_dev)
+{
+	char dirname[PATH_MAX];
+	char filename[PATH_MAX];
+	unsigned long start, size;
+	unsigned int uio_num;
+
+	if (get_uio_dev(&pci_dev->addr, dirname, sizeof(dirname), &uio_num) < 0)
+		return -1;
+
+	/* get portio size */
+	snprintf(filename, sizeof(filename),
+		     "%s/portio/port0/size", dirname);
+	if (parse_sysfs_value(filename, &size) < 0) {
+		PMD_INIT_LOG(ERR, "%s(): cannot parse size",
+			     __func__);
+		return -1;
+	}
+
+	/* get portio start */
+	snprintf(filename, sizeof(filename),
+		 "%s/portio/port0/start", dirname);
+	if (parse_sysfs_value(filename, &start) < 0) {
+		PMD_INIT_LOG(ERR, "%s(): cannot parse portio start",
+			     __func__);
+		return -1;
+	}
+	pci_dev->mem_resource[0].addr = (void *)(uintptr_t)start;
+	pci_dev->mem_resource[0].len =  (uint64_t)size;
+	PMD_INIT_LOG(DEBUG,
+		     "PCI Port IO found start=0x%lx with size=0x%lx",
+		     start, size);
+
+	/* save fd */
+	memset(dirname, 0, sizeof(dirname));
+	snprintf(dirname, sizeof(dirname), "/dev/uio%u", uio_num);
+	pci_dev->intr_handle.fd = open(dirname, O_RDWR);
+	if (pci_dev->intr_handle.fd < 0) {
+		PMD_INIT_LOG(ERR, "Cannot open %s: %s\n",
+			dirname, strerror(errno));
+		return -1;
+	}
+
+	pci_dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+	pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+
+	return 0;
+}
+
+/* Extract port I/O numbers from proc/ioports */
+static int virtio_resource_init_by_ioports(struct rte_pci_device *pci_dev)
+{
+	uint16_t start, end;
+	int size;
+	FILE *fp;
+	char *line = NULL;
+	char pci_id[16];
+	int found = 0;
+	size_t linesz;
+
+	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
+		 pci_dev->addr.domain,
+		 pci_dev->addr.bus,
+		 pci_dev->addr.devid,
+		 pci_dev->addr.function);
+
+	fp = fopen("/proc/ioports", "r");
+	if (fp == NULL) {
+		PMD_INIT_LOG(ERR, "%s(): can't open ioports", __func__);
+		return -1;
+	}
+
+	while (getdelim(&line, &linesz, '\n', fp) > 0) {
+		char *ptr = line;
+		char *left;
+		int n;
+
+		n = strcspn(ptr, ":");
+		ptr[n] = 0;
+		left = &ptr[n+1];
+
+		while (*left && isspace(*left))
+			left++;
+
+		if (!strncmp(left, pci_id, strlen(pci_id))) {
+			found = 1;
+
+			while (*ptr && isspace(*ptr))
+				ptr++;
+
+			sscanf(ptr, "%04hx-%04hx", &start, &end);
+			size = end - start + 1;
+
+			break;
+		}
+	}
+
+	free(line);
+	fclose(fp);
+
+	if (!found)
+		return -1;
+
+	pci_dev->mem_resource[0].addr = (void *)(uintptr_t)(uint32_t)start;
+	pci_dev->mem_resource[0].len =  (uint64_t)size;
+	PMD_INIT_LOG(DEBUG,
+		"PCI Port IO found start=0x%x with size=0x%x",
+		start, size);
+
+	/* can't support lsc interrupt without uio */
+	pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+
+	return 0;
+}
+
+/* Extract I/O port numbers from sysfs */
+static int virtio_resource_init(struct rte_pci_device *pci_dev)
+{
+	if (virtio_resource_init_by_uio(pci_dev) == 0)
+		return 0;
+	else
+		return virtio_resource_init_by_ioports(pci_dev);
+}
+
+#else
+static int
+virtio_has_msix(const struct rte_pci_addr *loc __rte_unused)
+{
+	/* nic_uio does not enable interrupts, return 0 (false). */
+	return 0;
+}
+
+static int virtio_resource_init(struct rte_pci_device *pci_dev __rte_unused)
+{
+	/* no setup required */
+	return 0;
+}
+#endif
+
+/*
+ * Process Virtio Config changed interrupt and call the callback
+ * if link state changed.
+ */
+static void
+virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+			 void *param)
+{
+	struct rte_eth_dev *dev = param;
+	struct virtio_hw *hw = dev->data->dev_private;
+	uint8_t isr;
+
+	/* Read interrupt status which clears interrupt */
+	isr = vtpci_isr(hw);
+	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
+
+	if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
+		PMD_DRV_LOG(ERR, "interrupt enable failed");
+
+	if (isr & VIRTIO_PCI_ISR_CONFIG) {
+		if (virtio_dev_link_update(dev, 0) == 0)
+			_rte_eth_dev_callback_process(dev,
+						      RTE_ETH_EVENT_INTR_LSC);
+	}
+
+}
+
+static void
+rx_func_get(struct rte_eth_dev *eth_dev)
+{
+	struct virtio_hw *hw = eth_dev->data->dev_private;
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
+		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
+	else
+		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
+}
+
+/*
+ * This function is based on probe() function in virtio_pci.c
+ * It returns 0 on success.
+ */
+static int
+eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
+{
+	struct virtio_hw *hw = eth_dev->data->dev_private;
+	struct virtio_net_config *config;
+	struct virtio_net_config local_config;
+	uint32_t offset_conf = sizeof(config->mac);
+	struct rte_pci_device *pci_dev;
+
+	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
+
+	eth_dev->dev_ops = &virtio_eth_dev_ops;
+	eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		rx_func_get(eth_dev);
+		return 0;
+	}
+
+	/* Allocate memory for storing MAC addresses */
+	eth_dev->data->mac_addrs = rte_zmalloc("virtio", ETHER_ADDR_LEN, 0);
+	if (eth_dev->data->mac_addrs == NULL) {
+		PMD_INIT_LOG(ERR,
+			"Failed to allocate %d bytes needed to store MAC addresses",
+			ETHER_ADDR_LEN);
+		return -ENOMEM;
+	}
+
+	pci_dev = eth_dev->pci_dev;
+	if (virtio_resource_init(pci_dev) < 0)
+		return -1;
+
+	hw->use_msix = virtio_has_msix(&pci_dev->addr);
+	hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
+
+	/* Reset the device although not necessary at startup */
+	vtpci_reset(hw);
+
+	/* Tell the host we've noticed this device. */
+	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
+
+	/* Tell the host we've known how to drive the device. */
+	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
+	virtio_negotiate_features(hw);
+
+	rx_func_get(eth_dev);
+
+	/* Setting up rx_header size for the device */
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
+		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	else
+		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
+
+	/* Copy the permanent MAC address to: virtio_hw */
+	virtio_get_hwaddr(hw);
+	ether_addr_copy((struct ether_addr *) hw->mac_addr,
+			&eth_dev->data->mac_addrs[0]);
+	PMD_INIT_LOG(DEBUG,
+		     "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
+		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
+		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
+
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
+		config = &local_config;
+
+		if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+			offset_conf += sizeof(config->status);
+		} else {
+			PMD_INIT_LOG(DEBUG,
+				     "VIRTIO_NET_F_STATUS is not supported");
+			config->status = 0;
+		}
+
+		if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
+			offset_conf += sizeof(config->max_virtqueue_pairs);
+		} else {
+			PMD_INIT_LOG(DEBUG,
+				     "VIRTIO_NET_F_MQ is not supported");
+			config->max_virtqueue_pairs = 1;
+		}
+
+		vtpci_read_dev_config(hw, 0, (uint8_t *)config, offset_conf);
+
+		hw->max_rx_queues =
+			(VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
+			VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
+		hw->max_tx_queues =
+			(VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ?
+			VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs;
+
+		virtio_dev_cq_queue_setup(eth_dev,
+					config->max_virtqueue_pairs * 2,
+					SOCKET_ID_ANY);
+
+		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
+				config->max_virtqueue_pairs);
+		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
+		PMD_INIT_LOG(DEBUG,
+				"PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
+				config->mac[0], config->mac[1],
+				config->mac[2], config->mac[3],
+				config->mac[4], config->mac[5]);
+	} else {
+		hw->max_rx_queues = 1;
+		hw->max_tx_queues = 1;
+	}
+
+	eth_dev->data->nb_rx_queues = hw->max_rx_queues;
+	eth_dev->data->nb_tx_queues = hw->max_tx_queues;
+
+	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
+			hw->max_rx_queues, hw->max_tx_queues);
+	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
+			eth_dev->data->port_id, pci_dev->id.vendor_id,
+			pci_dev->id.device_id);
+
+	/* Setup interrupt callback  */
+	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		rte_intr_callback_register(&pci_dev->intr_handle,
+				   virtio_interrupt_handler, eth_dev);
+
+	virtio_dev_cq_start(eth_dev);
+
+	return 0;
+}
+
+static struct eth_driver rte_virtio_pmd = {
+	{
+		.name = "rte_virtio_pmd",
+		.id_table = pci_id_virtio_map,
+	},
+	.eth_dev_init = eth_virtio_dev_init,
+	.dev_private_size = sizeof(struct virtio_hw),
+};
+
+/*
+ * Driver initialization routine.
+ * Invoked once at EAL init time.
+ * Register itself as the [Poll Mode] Driver of PCI virtio devices.
+ * Returns 0 on success.
+ */
+static int
+rte_virtio_pmd_init(const char *name __rte_unused,
+		    const char *param __rte_unused)
+{
+	if (rte_eal_iopl_init() != 0) {
+		PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
+		return -1;
+	}
+
+	rte_eth_driver_register(&rte_virtio_pmd);
+	return 0;
+}
+
+/*
+ * Only 1 queue is supported, no queue release related operation
+ */
+static void
+virtio_dev_rx_queue_release(__rte_unused void *rxq)
+{
+}
+
+static void
+virtio_dev_tx_queue_release(__rte_unused void *txq)
+{
+}
+
+/*
+ * Configure virtio device
+ * It returns 0 on success.
+ */
+static int
+virtio_dev_configure(struct rte_eth_dev *dev)
+{
+	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	PMD_INIT_LOG(DEBUG, "configure");
+
+	if (rxmode->hw_ip_checksum) {
+		PMD_DRV_LOG(ERR, "HW IP checksum not supported");
+		return (-EINVAL);
+	}
+
+	hw->vlan_strip = rxmode->hw_vlan_strip;
+
+	if (rxmode->hw_vlan_filter
+	    && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
+		PMD_DRV_LOG(NOTICE,
+			    "vlan filtering not available on this host");
+		return -ENOTSUP;
+	}
+
+	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
+			PMD_DRV_LOG(ERR, "failed to set config vector");
+			return -EBUSY;
+		}
+
+	return 0;
+}
+
+
+static int
+virtio_dev_start(struct rte_eth_dev *dev)
+{
+	uint16_t nb_queues, i;
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	/* check if lsc interrupt feature is enabled */
+	if ((dev->data->dev_conf.intr_conf.lsc) &&
+		(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+			PMD_DRV_LOG(ERR, "link status not supported by host");
+			return -ENOTSUP;
+		}
+
+		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return -EIO;
+		}
+	}
+
+	/* Initialize Link state */
+	virtio_dev_link_update(dev, 0);
+
+	/* On restart after stop do not touch queues */
+	if (hw->started)
+		return 0;
+
+	/* Do final configuration before rx/tx engine starts */
+	virtio_dev_rxtx_start(dev);
+	vtpci_reinit_complete(hw);
+
+	hw->started = 1;
+
+	/*Notify the backend
+	 *Otherwise the tap backend might already stop its queue due to fullness.
+	 *vhost backend will have no chance to be waked up
+	 */
+	nb_queues = dev->data->nb_rx_queues;
+	if (nb_queues > 1) {
+		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
+			return -EINVAL;
+	}
+
+	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
+
+	for (i = 0; i < nb_queues; i++)
+		virtqueue_notify(dev->data->rx_queues[i]);
+
+	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++)
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+
+	return 0;
+}
+
+static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
+{
+	struct rte_mbuf *buf;
+	int i, mbuf_num = 0;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		PMD_INIT_LOG(DEBUG,
+			     "Before freeing rxq[%d] used and unused buf", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+
+		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
+					dev->data->rx_queues[i])) != NULL) {
+			rte_pktmbuf_free(buf);
+			mbuf_num++;
+		}
+
+		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
+		PMD_INIT_LOG(DEBUG,
+			     "After freeing rxq[%d] used and unused buf", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		PMD_INIT_LOG(DEBUG,
+			     "Before freeing txq[%d] used and unused bufs",
+			     i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+
+		mbuf_num = 0;
+		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
+					dev->data->tx_queues[i])) != NULL) {
+			rte_pktmbuf_free(buf);
+
+			mbuf_num++;
+		}
+
+		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
+		PMD_INIT_LOG(DEBUG,
+			     "After freeing txq[%d] used and unused buf", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+	}
+}
+
+/*
+ * Stop device: disable interrupt and mark link down
+ */
+static void
+virtio_dev_stop(struct rte_eth_dev *dev)
+{
+	struct rte_eth_link link;
+
+	PMD_INIT_LOG(DEBUG, "stop");
+
+	if (dev->data->dev_conf.intr_conf.lsc)
+		rte_intr_disable(&dev->pci_dev->intr_handle);
+
+	memset(&link, 0, sizeof(link));
+	virtio_dev_atomic_write_link_status(dev, &link);
+}
+
+static int
+virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
+{
+	struct rte_eth_link link, old;
+	uint16_t status;
+	struct virtio_hw *hw = dev->data->dev_private;
+	memset(&link, 0, sizeof(link));
+	virtio_dev_atomic_read_link_status(dev, &link);
+	old = link;
+	link.link_duplex = FULL_DUPLEX;
+	link.link_speed  = SPEED_10G;
+
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+		PMD_INIT_LOG(DEBUG, "Get link status from hw");
+		vtpci_read_dev_config(hw,
+				offsetof(struct virtio_net_config, status),
+				&status, sizeof(status));
+		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
+			link.link_status = 0;
+			PMD_INIT_LOG(DEBUG, "Port %d is down",
+				     dev->data->port_id);
+		} else {
+			link.link_status = 1;
+			PMD_INIT_LOG(DEBUG, "Port %d is up",
+				     dev->data->port_id);
+		}
+	} else {
+		link.link_status = 1;   /* Link up */
+	}
+	virtio_dev_atomic_write_link_status(dev, &link);
+
+	return (old.link_status == link.link_status) ? -1 : 0;
+}
+
+static void
+virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	dev_info->driver_name = dev->driver->pci_drv.name;
+	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
+	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
+	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
+	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
+	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
+	dev_info->default_txconf = (struct rte_eth_txconf) {
+		.txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS
+	};
+}
+
+/*
+ * It enables testpmd to collect per queue stats.
+ */
+static int
+virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
+__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
+__rte_unused uint8_t is_rx)
+{
+	return 0;
+}
+
+static struct rte_driver rte_virtio_driver = {
+	.type = PMD_PDEV,
+	.init = rte_virtio_pmd_init,
+};
+
+PMD_REGISTER_DRIVER(rte_virtio_driver);
diff --git a/drivers/virtio/virtio_ethdev.h b/drivers/virtio/virtio_ethdev.h
new file mode 100644
index 0000000..e6d4533
--- /dev/null
+++ b/drivers/virtio/virtio_ethdev.h
@@ -0,0 +1,124 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_ETHDEV_H_
+#define _VIRTIO_ETHDEV_H_
+
+#include <stdint.h>
+
+#include "virtio_pci.h"
+
+#define SPEED_10	10
+#define SPEED_100	100
+#define SPEED_1000	1000
+#define SPEED_10G	10000
+#define HALF_DUPLEX	1
+#define FULL_DUPLEX	2
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define VIRTIO_MAX_RX_QUEUES 128
+#define VIRTIO_MAX_TX_QUEUES 128
+#define VIRTIO_MAX_MAC_ADDRS 64
+#define VIRTIO_MIN_RX_BUFSIZE 64
+#define VIRTIO_MAX_RX_PKTLEN  9728
+
+/* Features desired/implemented by this driver. */
+#define VTNET_FEATURES \
+	(VIRTIO_NET_F_MAC       | \
+	VIRTIO_NET_F_STATUS     | \
+	VIRTIO_NET_F_MQ         | \
+	VIRTIO_NET_F_CTRL_MAC_ADDR | \
+	VIRTIO_NET_F_CTRL_VQ    | \
+	VIRTIO_NET_F_CTRL_RX    | \
+	VIRTIO_NET_F_CTRL_VLAN  | \
+	VIRTIO_NET_F_CSUM       | \
+	VIRTIO_NET_F_HOST_TSO4  | \
+	VIRTIO_NET_F_HOST_TSO6  | \
+	VIRTIO_NET_F_HOST_ECN   | \
+	VIRTIO_NET_F_GUEST_CSUM | \
+	VIRTIO_NET_F_GUEST_TSO4 | \
+	VIRTIO_NET_F_GUEST_TSO6 | \
+	VIRTIO_NET_F_GUEST_ECN  | \
+	VIRTIO_NET_F_MRG_RXBUF  | \
+	VIRTIO_RING_F_INDIRECT_DESC)
+
+/*
+ * CQ function prototype
+ */
+void virtio_dev_cq_start(struct rte_eth_dev *dev);
+
+/*
+ * RX/TX function prototypes
+ */
+void virtio_dev_rxtx_start(struct rte_eth_dev *dev);
+
+int virtio_dev_queue_setup(struct rte_eth_dev *dev,
+			int queue_type,
+			uint16_t queue_idx,
+			uint16_t  vtpci_queue_idx,
+			uint16_t nb_desc,
+			unsigned int socket_id,
+			struct virtqueue **pvq);
+
+int  virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+		uint16_t nb_rx_desc, unsigned int socket_id,
+		const struct rte_eth_rxconf *rx_conf,
+		struct rte_mempool *mb_pool);
+
+int  virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+		uint16_t nb_tx_desc, unsigned int socket_id,
+		const struct rte_eth_txconf *tx_conf);
+
+uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts);
+
+uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts);
+
+uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts);
+
+
+/*
+ * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
+ * frames larger than 1514 bytes. We do not yet support software LRO
+ * via tcp_lro_rx().
+ */
+#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
+			    VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
+
+
+#endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/virtio/virtio_logs.h b/drivers/virtio/virtio_logs.h
new file mode 100644
index 0000000..d6c33f7
--- /dev/null
+++ b/drivers/virtio/virtio_logs.h
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_LOGS_H_
+#define _VIRTIO_LOGS_H_
+
+#include <rte_log.h>
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT
+#define PMD_INIT_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+#else
+#define PMD_INIT_LOG(level, fmt, args...) do { } while(0)
+#define PMD_INIT_FUNC_TRACE() do { } while(0)
+#endif
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX
+#define PMD_RX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args)
+#else
+#define PMD_RX_LOG(level, fmt, args...) do { } while(0)
+#endif
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX
+#define PMD_TX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args)
+#else
+#define PMD_TX_LOG(level, fmt, args...) do { } while(0)
+#endif
+
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER
+#define PMD_DRV_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args)
+#else
+#define PMD_DRV_LOG(level, fmt, args...) do { } while(0)
+#endif
+
+#endif /* _VIRTIO_LOGS_H_ */
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
new file mode 100644
index 0000000..2245bec
--- /dev/null
+++ b/drivers/virtio/virtio_pci.c
@@ -0,0 +1,147 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+
+#include "virtio_pci.h"
+#include "virtio_logs.h"
+
+static uint8_t vtpci_get_status(struct virtio_hw *);
+
+void
+vtpci_read_dev_config(struct virtio_hw *hw, uint64_t offset,
+		void *dst, int length)
+{
+	uint64_t off;
+	uint8_t *d;
+	int size;
+
+	off = VIRTIO_PCI_CONFIG(hw) + offset;
+	for (d = dst; length > 0; d += size, off += size, length -= size) {
+		if (length >= 4) {
+			size = 4;
+			*(uint32_t *)d = VIRTIO_READ_REG_4(hw, off);
+		} else if (length >= 2) {
+			size = 2;
+			*(uint16_t *)d = VIRTIO_READ_REG_2(hw, off);
+		} else {
+			size = 1;
+			*d = VIRTIO_READ_REG_1(hw, off);
+		}
+	}
+}
+
+void
+vtpci_write_dev_config(struct virtio_hw *hw, uint64_t offset,
+		void *src, int length)
+{
+	uint64_t off;
+	uint8_t *s;
+	int size;
+
+	off = VIRTIO_PCI_CONFIG(hw) + offset;
+	for (s = src; length > 0; s += size, off += size, length -= size) {
+		if (length >= 4) {
+			size = 4;
+			VIRTIO_WRITE_REG_4(hw, off, *(uint32_t *)s);
+		} else if (length >= 2) {
+			size = 2;
+			VIRTIO_WRITE_REG_2(hw, off, *(uint16_t *)s);
+		} else {
+			size = 1;
+			VIRTIO_WRITE_REG_1(hw, off, *s);
+		}
+	}
+}
+
+uint32_t
+vtpci_negotiate_features(struct virtio_hw *hw, uint32_t host_features)
+{
+	uint32_t features;
+	/*
+	 * Limit negotiated features to what the driver, virtqueue, and
+	 * host all support.
+	 */
+	features = host_features & hw->guest_features;
+
+	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_GUEST_FEATURES, features);
+	return features;
+}
+
+
+void
+vtpci_reset(struct virtio_hw *hw)
+{
+	/*
+	 * Setting the status to RESET sets the host device to
+	 * the original, uninitialized state.
+	 */
+	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+	vtpci_get_status(hw);
+}
+
+void
+vtpci_reinit_complete(struct virtio_hw *hw)
+{
+	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
+}
+
+static uint8_t
+vtpci_get_status(struct virtio_hw *hw)
+{
+	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_STATUS);
+}
+
+void
+vtpci_set_status(struct virtio_hw *hw, uint8_t status)
+{
+	if (status != VIRTIO_CONFIG_STATUS_RESET)
+		status = (uint8_t)(status | vtpci_get_status(hw));
+
+	VIRTIO_WRITE_REG_1(hw, VIRTIO_PCI_STATUS, status);
+}
+
+uint8_t
+vtpci_isr(struct virtio_hw *hw)
+{
+
+	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_ISR);
+}
+
+
+/* Enable one vector (0) for Link State Intrerrupt */
+uint16_t
+vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
+{
+	VIRTIO_WRITE_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR, vec);
+	return VIRTIO_READ_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR);
+}
diff --git a/drivers/virtio/virtio_pci.h b/drivers/virtio/virtio_pci.h
new file mode 100644
index 0000000..64d9c34
--- /dev/null
+++ b/drivers/virtio/virtio_pci.h
@@ -0,0 +1,270 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_PCI_H_
+#define _VIRTIO_PCI_H_
+
+#include <stdint.h>
+
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <machine/cpufunc.h>
+#else
+#include <sys/io.h>
+#endif
+
+#include <rte_ethdev.h>
+
+struct virtqueue;
+
+/* VirtIO PCI vendor/device ID. */
+#define VIRTIO_PCI_VENDORID     0x1AF4
+#define VIRTIO_PCI_DEVICEID_MIN 0x1000
+#define VIRTIO_PCI_DEVICEID_MAX 0x103F
+
+/* VirtIO ABI version, this must match exactly. */
+#define VIRTIO_PCI_ABI_VERSION 0
+
+/*
+ * VirtIO Header, located in BAR 0.
+ */
+#define VIRTIO_PCI_HOST_FEATURES  0  /* host's supported features (32bit, RO)*/
+#define VIRTIO_PCI_GUEST_FEATURES 4  /* guest's supported features (32, RW) */
+#define VIRTIO_PCI_QUEUE_PFN      8  /* physical address of VQ (32, RW) */
+#define VIRTIO_PCI_QUEUE_NUM      12 /* number of ring entries (16, RO) */
+#define VIRTIO_PCI_QUEUE_SEL      14 /* current VQ selection (16, RW) */
+#define VIRTIO_PCI_QUEUE_NOTIFY   16 /* notify host regarding VQ (16, RW) */
+#define VIRTIO_PCI_STATUS         18 /* device status register (8, RW) */
+#define VIRTIO_PCI_ISR		  19 /* interrupt status register, reading
+				      * also clears the register (8, RO) */
+/* Only if MSIX is enabled: */
+#define VIRTIO_MSI_CONFIG_VECTOR  20 /* configuration change vector (16, RW) */
+#define VIRTIO_MSI_QUEUE_VECTOR	  22 /* vector for selected VQ notifications
+				      (16, RW) */
+
+/* The bit of the ISR which indicates a device has an interrupt. */
+#define VIRTIO_PCI_ISR_INTR   0x1
+/* The bit of the ISR which indicates a device configuration change. */
+#define VIRTIO_PCI_ISR_CONFIG 0x2
+/* Vector value used to disable MSI for queue. */
+#define VIRTIO_MSI_NO_VECTOR 0xFFFF
+
+/* VirtIO device IDs. */
+#define VIRTIO_ID_NETWORK  0x01
+#define VIRTIO_ID_BLOCK    0x02
+#define VIRTIO_ID_CONSOLE  0x03
+#define VIRTIO_ID_ENTROPY  0x04
+#define VIRTIO_ID_BALLOON  0x05
+#define VIRTIO_ID_IOMEMORY 0x06
+#define VIRTIO_ID_9P       0x09
+
+/* Status byte for guest to report progress. */
+#define VIRTIO_CONFIG_STATUS_RESET     0x00
+#define VIRTIO_CONFIG_STATUS_ACK       0x01
+#define VIRTIO_CONFIG_STATUS_DRIVER    0x02
+#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04
+#define VIRTIO_CONFIG_STATUS_FAILED    0x80
+
+/*
+ * Generate interrupt when the virtqueue ring is
+ * completely used, even if we've suppressed them.
+ */
+#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24)
+
+/*
+ * The guest should never negotiate this feature; it
+ * is used to detect faulty drivers.
+ */
+#define VIRTIO_F_BAD_FEATURE (1 << 30)
+
+/*
+ * Some VirtIO feature bits (currently bits 28 through 31) are
+ * reserved for the transport being used (eg. virtio_ring), the
+ * rest are per-device feature bits.
+ */
+#define VIRTIO_TRANSPORT_F_START 28
+#define VIRTIO_TRANSPORT_F_END   32
+
+/*
+ * Each virtqueue indirect descriptor list must be physically contiguous.
+ * To allow us to malloc(9) each list individually, limit the number
+ * supported to what will fit in one page. With 4KB pages, this is a limit
+ * of 256 descriptors. If there is ever a need for more, we can switch to
+ * contigmalloc(9) for the larger allocations, similar to what
+ * bus_dmamem_alloc(9) does.
+ *
+ * Note the sizeof(struct vring_desc) is 16 bytes.
+ */
+#define VIRTIO_MAX_INDIRECT ((int) (PAGE_SIZE / 16))
+
+/* The feature bitmap for virtio net */
+#define VIRTIO_NET_F_CSUM       0x00001 /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM 0x00002 /* Guest handles pkts w/ partial csum*/
+#define VIRTIO_NET_F_MAC        0x00020 /* Host has given MAC address. */
+#define VIRTIO_NET_F_GSO        0x00040 /* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4 0x00080 /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6 0x00100 /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN  0x00200 /* Guest can handle TSO[6] w/ ECN in.*/
+#define VIRTIO_NET_F_GUEST_UFO  0x00400 /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4  0x00800 /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6  0x01000 /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN   0x02000 /* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO   0x04000 /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF  0x08000 /* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS     0x10000 /* virtio_net_config.status available*/
+#define VIRTIO_NET_F_CTRL_VQ    0x20000 /* Control channel available */
+#define VIRTIO_NET_F_CTRL_RX    0x40000 /* Control channel RX mode support */
+#define VIRTIO_NET_F_CTRL_VLAN  0x80000 /* Control channel VLAN filtering */
+#define VIRTIO_NET_F_CTRL_RX_EXTRA  0x100000 /* Extra RX mode control support */
+#define VIRTIO_RING_F_INDIRECT_DESC 0x10000000 /* Support for indirect buffer descriptors. */
+/* The guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field.
+ * The host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field.
+ */
+#define VIRTIO_RING_F_EVENT_IDX 0x20000000
+
+#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
+
+/*
+ * Maximum number of virtqueues per device.
+ */
+#define VIRTIO_MAX_VIRTQUEUES 8
+
+struct virtio_hw {
+	struct virtqueue *cvq;
+	uint32_t    io_base;
+	uint32_t    guest_features;
+	uint32_t    max_tx_queues;
+	uint32_t    max_rx_queues;
+	uint16_t    vtnet_hdr_size;
+	uint8_t	    vlan_strip;
+	uint8_t	    use_msix;
+	uint8_t     started;
+	uint8_t     mac_addr[ETHER_ADDR_LEN];
+};
+
+/*
+ * This structure is just a reference to read
+ * net device specific config space; it just a chodu structure
+ *
+ */
+struct virtio_net_config {
+	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
+	uint8_t    mac[ETHER_ADDR_LEN];
+	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
+	uint16_t   status;
+	uint16_t   max_virtqueue_pairs;
+} __attribute__((packed));
+
+/*
+ * The remaining space is defined by each driver as the per-driver
+ * configuration space.
+ */
+#define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)
+
+/*
+ * How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size.
+ */
+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
+
+/* The alignment to use between consumer and producer parts of vring. */
+#define VIRTIO_PCI_VRING_ALIGN 4096
+
+#ifdef __FreeBSD__
+
+static inline void
+outb_p(unsigned char data, unsigned int port)
+{
+
+	outb(port, (u_char)data);
+}
+
+static inline void
+outw_p(unsigned short data, unsigned int port)
+{
+	outw(port, (u_short)data);
+}
+
+static inline void
+outl_p(unsigned int data, unsigned int port)
+{
+	outl(port, (u_int)data);
+}
+#endif
+
+#define VIRTIO_PCI_REG_ADDR(hw, reg) \
+	(unsigned short)((hw)->io_base + (reg))
+
+#define VIRTIO_READ_REG_1(hw, reg) \
+	inb((VIRTIO_PCI_REG_ADDR((hw), (reg))))
+#define VIRTIO_WRITE_REG_1(hw, reg, value) \
+	outb_p((unsigned char)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+
+#define VIRTIO_READ_REG_2(hw, reg) \
+	inw((VIRTIO_PCI_REG_ADDR((hw), (reg))))
+#define VIRTIO_WRITE_REG_2(hw, reg, value) \
+	outw_p((unsigned short)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+
+#define VIRTIO_READ_REG_4(hw, reg) \
+	inl((VIRTIO_PCI_REG_ADDR((hw), (reg))))
+#define VIRTIO_WRITE_REG_4(hw, reg, value) \
+	outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+
+static inline int
+vtpci_with_feature(struct virtio_hw *hw, uint32_t feature)
+{
+	return (hw->guest_features & feature) != 0;
+}
+
+/*
+ * Function declaration from virtio_pci.c
+ */
+void vtpci_reset(struct virtio_hw *);
+
+void vtpci_reinit_complete(struct virtio_hw *);
+
+void vtpci_set_status(struct virtio_hw *, uint8_t);
+
+uint32_t vtpci_negotiate_features(struct virtio_hw *, uint32_t);
+
+void vtpci_write_dev_config(struct virtio_hw *, uint64_t, void *, int);
+
+void vtpci_read_dev_config(struct virtio_hw *, uint64_t, void *, int);
+
+uint8_t vtpci_isr(struct virtio_hw *);
+
+uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t);
+
+#endif /* _VIRTIO_PCI_H_ */
diff --git a/drivers/virtio/virtio_ring.h b/drivers/virtio/virtio_ring.h
new file mode 100644
index 0000000..a16c499
--- /dev/null
+++ b/drivers/virtio/virtio_ring.h
@@ -0,0 +1,163 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_RING_H_
+#define _VIRTIO_RING_H_
+
+#include <stdint.h>
+
+#include <rte_common.h>
+
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT       1
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE      2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT   4
+
+/* The Host uses this in used->flags to advise the Guest: don't kick me
+ * when you add a buffer.  It's unreliable, so it's simply an
+ * optimization.  Guest will still kick if it's out of buffers. */
+#define VRING_USED_F_NO_NOTIFY  1
+/* The Guest uses this in avail->flags to advise the Host: don't
+ * interrupt me when you consume a buffer.  It's unreliable, so it's
+ * simply an optimization.  */
+#define VRING_AVAIL_F_NO_INTERRUPT  1
+
+/* VirtIO ring descriptors: 16 bytes.
+ * These can chain together via "next". */
+struct vring_desc {
+	uint64_t addr;  /*  Address (guest-physical). */
+	uint32_t len;   /* Length. */
+	uint16_t flags; /* The flags as indicated above. */
+	uint16_t next;  /* We chain unused descriptors via this. */
+};
+
+struct vring_avail {
+	uint16_t flags;
+	uint16_t idx;
+	uint16_t ring[0];
+};
+
+/* id is a 16bit index. uint32_t is used here for ids for padding reasons. */
+struct vring_used_elem {
+	/* Index of start of used descriptor chain. */
+	uint32_t id;
+	/* Total length of the descriptor chain which was written to. */
+	uint32_t len;
+};
+
+struct vring_used {
+	uint16_t flags;
+	uint16_t idx;
+	struct vring_used_elem ring[0];
+};
+
+struct vring {
+	unsigned int num;
+	struct vring_desc  *desc;
+	struct vring_avail *avail;
+	struct vring_used  *used;
+};
+
+/* The standard layout for the ring is a continuous chunk of memory which
+ * looks like this.  We assume num is a power of 2.
+ *
+ * struct vring {
+ *      // The actual descriptors (16 bytes each)
+ *      struct vring_desc desc[num];
+ *
+ *      // A ring of available descriptor heads with free-running index.
+ *      __u16 avail_flags;
+ *      __u16 avail_idx;
+ *      __u16 available[num];
+ *      __u16 used_event_idx;
+ *
+ *      // Padding to the next align boundary.
+ *      char pad[];
+ *
+ *      // A ring of used descriptor heads with free-running index.
+ *      __u16 used_flags;
+ *      __u16 used_idx;
+ *      struct vring_used_elem used[num];
+ *      __u16 avail_event_idx;
+ * };
+ *
+ * NOTE: for VirtIO PCI, align is 4096.
+ */
+
+/*
+ * We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility.
+ */
+#define vring_used_event(vr)  ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num])
+
+static inline int
+vring_size(unsigned int num, unsigned long align)
+{
+	int size;
+
+	size = num * sizeof(struct vring_desc);
+	size += sizeof(struct vring_avail) + (num * sizeof(uint16_t));
+	size = RTE_ALIGN_CEIL(size, align);
+	size += sizeof(struct vring_used) +
+		(num * sizeof(struct vring_used_elem));
+	return size;
+}
+
+static inline void
+vring_init(struct vring *vr, unsigned int num, uint8_t *p,
+	unsigned long align)
+{
+	vr->num = num;
+	vr->desc = (struct vring_desc *) p;
+	vr->avail = (struct vring_avail *) (p +
+		num * sizeof(struct vring_desc));
+	vr->used = (void *)
+		RTE_ALIGN_CEIL((uintptr_t)(&vr->avail->ring[num]), align);
+}
+
+/*
+ * The following is used with VIRTIO_RING_F_EVENT_IDX.
+ * Assuming a given event_idx value from the other size, if we have
+ * just incremented index from old to new_idx, should we trigger an
+ * event?
+ */
+static inline int
+vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
+{
+	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
+}
+
+#endif /* _VIRTIO_RING_H_ */
diff --git a/drivers/virtio/virtio_rxtx.c b/drivers/virtio/virtio_rxtx.c
new file mode 100644
index 0000000..3ff275c
--- /dev/null
+++ b/drivers/virtio/virtio_rxtx.c
@@ -0,0 +1,815 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_branch_prediction.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_byteorder.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
+#define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
+#else
+#define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
+#endif
+
+static void
+vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
+{
+	struct vring_desc *dp, *dp_tail;
+	struct vq_desc_extra *dxp;
+	uint16_t desc_idx_last = desc_idx;
+
+	dp  = &vq->vq_ring.desc[desc_idx];
+	dxp = &vq->vq_descx[desc_idx];
+	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
+	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
+		while (dp->flags & VRING_DESC_F_NEXT) {
+			desc_idx_last = dp->next;
+			dp = &vq->vq_ring.desc[dp->next];
+		}
+	}
+	dxp->ndescs = 0;
+
+	/*
+	 * We must append the existing free chain, if any, to the end of
+	 * newly freed chain. If the virtqueue was completely used, then
+	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
+	 */
+	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
+		vq->vq_desc_head_idx = desc_idx;
+	} else {
+		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
+		dp_tail->next = desc_idx;
+	}
+
+	vq->vq_desc_tail_idx = desc_idx_last;
+	dp->next = VQ_RING_DESC_CHAIN_END;
+}
+
+static uint16_t
+virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
+			   uint32_t *len, uint16_t num)
+{
+	struct vring_used_elem *uep;
+	struct rte_mbuf *cookie;
+	uint16_t used_idx, desc_idx;
+	uint16_t i;
+
+	/*  Caller does the check */
+	for (i = 0; i < num ; i++) {
+		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+		uep = &vq->vq_ring.used->ring[used_idx];
+		desc_idx = (uint16_t) uep->id;
+		len[i] = uep->len;
+		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
+
+		if (unlikely(cookie == NULL)) {
+			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
+				vq->vq_used_cons_idx);
+			break;
+		}
+
+		rte_prefetch0(cookie);
+		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
+		rx_pkts[i]  = cookie;
+		vq->vq_used_cons_idx++;
+		vq_ring_free_chain(vq, desc_idx);
+		vq->vq_descx[desc_idx].cookie = NULL;
+	}
+
+	return i;
+}
+
+#ifndef DEFAULT_TX_FREE_THRESH
+#define DEFAULT_TX_FREE_THRESH 32
+#endif
+
+/* Cleanup from completed transmits. */
+static void
+virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
+{
+	uint16_t i, used_idx, desc_idx;
+	for (i = 0; i < num; i++) {
+		struct vring_used_elem *uep;
+		struct vq_desc_extra *dxp;
+
+		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+		uep = &vq->vq_ring.used->ring[used_idx];
+
+		desc_idx = (uint16_t) uep->id;
+		dxp = &vq->vq_descx[desc_idx];
+		vq->vq_used_cons_idx++;
+		vq_ring_free_chain(vq, desc_idx);
+
+		if (dxp->cookie != NULL) {
+			rte_pktmbuf_free(dxp->cookie);
+			dxp->cookie = NULL;
+		}
+	}
+}
+
+
+static inline int
+virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
+{
+	struct vq_desc_extra *dxp;
+	struct virtio_hw *hw = vq->hw;
+	struct vring_desc *start_dp;
+	uint16_t needed = 1;
+	uint16_t head_idx, idx;
+
+	if (unlikely(vq->vq_free_cnt == 0))
+		return -ENOSPC;
+	if (unlikely(vq->vq_free_cnt < needed))
+		return -EMSGSIZE;
+
+	head_idx = vq->vq_desc_head_idx;
+	if (unlikely(head_idx >= vq->vq_nentries))
+		return -EFAULT;
+
+	idx = head_idx;
+	dxp = &vq->vq_descx[idx];
+	dxp->cookie = (void *)cookie;
+	dxp->ndescs = needed;
+
+	start_dp = vq->vq_ring.desc;
+	start_dp[idx].addr =
+		(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
+		- hw->vtnet_hdr_size);
+	start_dp[idx].len =
+		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
+	start_dp[idx].flags =  VRING_DESC_F_WRITE;
+	idx = start_dp[idx].next;
+	vq->vq_desc_head_idx = idx;
+	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
+		vq->vq_desc_tail_idx = idx;
+	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
+	vq_update_avail_ring(vq, head_idx);
+
+	return 0;
+}
+
+static int
+virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
+{
+	struct vq_desc_extra *dxp;
+	struct vring_desc *start_dp;
+	uint16_t seg_num = cookie->nb_segs;
+	uint16_t needed = 1 + seg_num;
+	uint16_t head_idx, idx;
+	uint16_t head_size = txvq->hw->vtnet_hdr_size;
+
+	if (unlikely(txvq->vq_free_cnt == 0))
+		return -ENOSPC;
+	if (unlikely(txvq->vq_free_cnt < needed))
+		return -EMSGSIZE;
+	head_idx = txvq->vq_desc_head_idx;
+	if (unlikely(head_idx >= txvq->vq_nentries))
+		return -EFAULT;
+
+	idx = head_idx;
+	dxp = &txvq->vq_descx[idx];
+	dxp->cookie = (void *)cookie;
+	dxp->ndescs = needed;
+
+	start_dp = txvq->vq_ring.desc;
+	start_dp[idx].addr =
+		txvq->virtio_net_hdr_mem + idx * head_size;
+	start_dp[idx].len = (uint32_t)head_size;
+	start_dp[idx].flags = VRING_DESC_F_NEXT;
+
+	for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
+		idx = start_dp[idx].next;
+		start_dp[idx].addr  = RTE_MBUF_DATA_DMA_ADDR(cookie);
+		start_dp[idx].len   = cookie->data_len;
+		start_dp[idx].flags = VRING_DESC_F_NEXT;
+		cookie = cookie->next;
+	}
+
+	start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
+	idx = start_dp[idx].next;
+	txvq->vq_desc_head_idx = idx;
+	if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
+		txvq->vq_desc_tail_idx = idx;
+	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
+	vq_update_avail_ring(txvq, head_idx);
+
+	return 0;
+}
+
+static inline struct rte_mbuf *
+rte_rxmbuf_alloc(struct rte_mempool *mp)
+{
+	struct rte_mbuf *m;
+
+	m = __rte_mbuf_raw_alloc(mp);
+	__rte_mbuf_sanity_check_raw(m, 0);
+
+	return m;
+}
+
+static void
+virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
+{
+	struct rte_mbuf *m;
+	int i, nbufs, error, size = vq->vq_nentries;
+	struct vring *vr = &vq->vq_ring;
+	uint8_t *ring_mem = vq->vq_ring_virt_mem;
+
+	PMD_INIT_FUNC_TRACE();
+
+	/*
+	 * Reinitialise since virtio port might have been stopped and restarted
+	 */
+	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
+	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
+	vq->vq_used_cons_idx = 0;
+	vq->vq_desc_head_idx = 0;
+	vq->vq_avail_idx = 0;
+	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
+	vq->vq_free_cnt = vq->vq_nentries;
+	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
+
+	/* Chain all the descriptors in the ring with an END */
+	for (i = 0; i < size - 1; i++)
+		vr->desc[i].next = (uint16_t)(i + 1);
+	vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
+
+	/*
+	 * Disable device(host) interrupting guest
+	 */
+	virtqueue_disable_intr(vq);
+
+	/* Only rx virtqueue needs mbufs to be allocated at initialization */
+	if (queue_type == VTNET_RQ) {
+		if (vq->mpool == NULL)
+			rte_exit(EXIT_FAILURE,
+			"Cannot allocate initial mbufs for rx virtqueue");
+
+		/* Allocate blank mbufs for the each rx descriptor */
+		nbufs = 0;
+		error = ENOSPC;
+		while (!virtqueue_full(vq)) {
+			m = rte_rxmbuf_alloc(vq->mpool);
+			if (m == NULL)
+				break;
+
+			/******************************************
+			*         Enqueue allocated buffers        *
+			*******************************************/
+			error = virtqueue_enqueue_recv_refill(vq, m);
+
+			if (error) {
+				rte_pktmbuf_free(m);
+				break;
+			}
+			nbufs++;
+		}
+
+		vq_update_avail_idx(vq);
+
+		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
+
+		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
+			vq->vq_queue_index);
+		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
+			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	} else if (queue_type == VTNET_TQ) {
+		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
+			vq->vq_queue_index);
+		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
+			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	} else {
+		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
+			vq->vq_queue_index);
+		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
+			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	}
+}
+
+void
+virtio_dev_cq_start(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	if (hw->cvq) {
+		virtio_dev_vring_start(hw->cvq, VTNET_CQ);
+		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
+	}
+}
+
+void
+virtio_dev_rxtx_start(struct rte_eth_dev *dev)
+{
+	/*
+	 * Start receive and transmit vrings
+	 * -	Setup vring structure for all queues
+	 * -	Initialize descriptor for the rx vring
+	 * -	Allocate blank mbufs for the each rx descriptor
+	 *
+	 */
+	int i;
+
+	PMD_INIT_FUNC_TRACE();
+
+	/* Start rx vring. */
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+	}
+
+	/* Start tx vring. */
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+	}
+}
+
+int
+virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
+			uint16_t queue_idx,
+			uint16_t nb_desc,
+			unsigned int socket_id,
+			__rte_unused const struct rte_eth_rxconf *rx_conf,
+			struct rte_mempool *mp)
+{
+	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
+	struct virtqueue *vq;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+	ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
+			nb_desc, socket_id, &vq);
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "tvq initialization failed");
+		return ret;
+	}
+
+	/* Create mempool for rx mbuf allocation */
+	vq->mpool = mp;
+
+	dev->data->rx_queues[queue_idx] = vq;
+	return 0;
+}
+
+/*
+ * struct rte_eth_dev *dev: Used to update dev
+ * uint16_t nb_desc: Defaults to values read from config space
+ * unsigned int socket_id: Used to allocate memzone
+ * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
+ * uint16_t queue_idx: Just used as an index in dev txq list
+ */
+int
+virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
+			uint16_t queue_idx,
+			uint16_t nb_desc,
+			unsigned int socket_id,
+			const struct rte_eth_txconf *tx_conf)
+{
+	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
+	struct virtqueue *vq;
+	uint16_t tx_free_thresh;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
+	    != ETH_TXQ_FLAGS_NOXSUMS) {
+		PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
+		return -EINVAL;
+	}
+
+	ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
+			nb_desc, socket_id, &vq);
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "rvq initialization failed");
+		return ret;
+	}
+
+	tx_free_thresh = tx_conf->tx_free_thresh;
+	if (tx_free_thresh == 0)
+		tx_free_thresh =
+			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
+
+	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
+		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
+			"number of TX entries minus 3 (%u)."
+			" (tx_free_thresh=%u port=%u queue=%u)\n",
+			vq->vq_nentries - 3,
+			tx_free_thresh, dev->data->port_id, queue_idx);
+		return -EINVAL;
+	}
+
+	vq->vq_free_thresh = tx_free_thresh;
+
+	dev->data->tx_queues[queue_idx] = vq;
+	return 0;
+}
+
+static void
+virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
+{
+	int error;
+	/*
+	 * Requeue the discarded mbuf. This should always be
+	 * successful since it was just dequeued.
+	 */
+	error = virtqueue_enqueue_recv_refill(vq, m);
+	if (unlikely(error)) {
+		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
+		rte_pktmbuf_free(m);
+	}
+}
+
+#define VIRTIO_MBUF_BURST_SZ 64
+#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
+uint16_t
+virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	struct virtqueue *rxvq = rx_queue;
+	struct virtio_hw *hw;
+	struct rte_mbuf *rxm, *new_mbuf;
+	uint16_t nb_used, num, nb_rx;
+	uint32_t len[VIRTIO_MBUF_BURST_SZ];
+	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
+	int error;
+	uint32_t i, nb_enqueued;
+	const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
+
+	nb_used = VIRTQUEUE_NUSED(rxvq);
+
+	virtio_rmb();
+
+	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
+	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
+	if (likely(num > DESC_PER_CACHELINE))
+		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
+
+	if (num == 0)
+		return 0;
+
+	num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
+	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
+
+	hw = rxvq->hw;
+	nb_rx = 0;
+	nb_enqueued = 0;
+
+	for (i = 0; i < num ; i++) {
+		rxm = rcv_pkts[i];
+
+		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
+
+		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
+			PMD_RX_LOG(ERR, "Packet drop");
+			nb_enqueued++;
+			virtio_discard_rxbuf(rxvq, rxm);
+			rxvq->errors++;
+			continue;
+		}
+
+		rxm->port = rxvq->port_id;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
+		rxm->data_len = (uint16_t)(len[i] - hdr_size);
+
+		if (hw->vlan_strip)
+			rte_vlan_strip(rxm);
+
+		VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
+
+		rx_pkts[nb_rx++] = rxm;
+		rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
+	}
+
+	rxvq->packets += nb_rx;
+
+	/* Allocate new mbuf for the used descriptor */
+	error = ENOSPC;
+	while (likely(!virtqueue_full(rxvq))) {
+		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
+		if (unlikely(new_mbuf == NULL)) {
+			struct rte_eth_dev *dev
+				= &rte_eth_devices[rxvq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
+		if (unlikely(error)) {
+			rte_pktmbuf_free(new_mbuf);
+			break;
+		}
+		nb_enqueued++;
+	}
+
+	if (likely(nb_enqueued)) {
+		vq_update_avail_idx(rxvq);
+
+		if (unlikely(virtqueue_kick_prepare(rxvq))) {
+			virtqueue_notify(rxvq);
+			PMD_RX_LOG(DEBUG, "Notified\n");
+		}
+	}
+
+	return nb_rx;
+}
+
+uint16_t
+virtio_recv_mergeable_pkts(void *rx_queue,
+			struct rte_mbuf **rx_pkts,
+			uint16_t nb_pkts)
+{
+	struct virtqueue *rxvq = rx_queue;
+	struct virtio_hw *hw;
+	struct rte_mbuf *rxm, *new_mbuf;
+	uint16_t nb_used, num, nb_rx;
+	uint32_t len[VIRTIO_MBUF_BURST_SZ];
+	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
+	struct rte_mbuf *prev;
+	int error;
+	uint32_t i, nb_enqueued;
+	uint32_t seg_num;
+	uint16_t extra_idx;
+	uint32_t seg_res;
+	const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+
+	nb_used = VIRTQUEUE_NUSED(rxvq);
+
+	virtio_rmb();
+
+	if (nb_used == 0)
+		return 0;
+
+	PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
+
+	hw = rxvq->hw;
+	nb_rx = 0;
+	i = 0;
+	nb_enqueued = 0;
+	seg_num = 0;
+	extra_idx = 0;
+	seg_res = 0;
+
+	while (i < nb_used) {
+		struct virtio_net_hdr_mrg_rxbuf *header;
+
+		if (nb_rx == nb_pkts)
+			break;
+
+		num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
+		if (num != 1)
+			continue;
+
+		i++;
+
+		PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
+		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
+
+		rxm = rcv_pkts[0];
+
+		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
+			PMD_RX_LOG(ERR, "Packet drop\n");
+			nb_enqueued++;
+			virtio_discard_rxbuf(rxvq, rxm);
+			rxvq->errors++;
+			continue;
+		}
+
+		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
+			RTE_PKTMBUF_HEADROOM - hdr_size);
+		seg_num = header->num_buffers;
+
+		if (seg_num == 0)
+			seg_num = 1;
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rxm->nb_segs = seg_num;
+		rxm->next = NULL;
+		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
+		rxm->data_len = (uint16_t)(len[0] - hdr_size);
+
+		rxm->port = rxvq->port_id;
+		rx_pkts[nb_rx] = rxm;
+		prev = rxm;
+
+		seg_res = seg_num - 1;
+
+		while (seg_res != 0) {
+			/*
+			 * Get extra segments for current uncompleted packet.
+			 */
+			uint16_t  rcv_cnt =
+				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
+			if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
+				uint32_t rx_num =
+					virtqueue_dequeue_burst_rx(rxvq,
+					rcv_pkts, len, rcv_cnt);
+				i += rx_num;
+				rcv_cnt = rx_num;
+			} else {
+				PMD_RX_LOG(ERR,
+					"No enough segments for packet.\n");
+				nb_enqueued++;
+				virtio_discard_rxbuf(rxvq, rxm);
+				rxvq->errors++;
+				break;
+			}
+
+			extra_idx = 0;
+
+			while (extra_idx < rcv_cnt) {
+				rxm = rcv_pkts[extra_idx];
+
+				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
+				rxm->next = NULL;
+				rxm->pkt_len = (uint32_t)(len[extra_idx]);
+				rxm->data_len = (uint16_t)(len[extra_idx]);
+
+				if (prev)
+					prev->next = rxm;
+
+				prev = rxm;
+				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
+				extra_idx++;
+			};
+			seg_res -= rcv_cnt;
+		}
+
+		if (hw->vlan_strip)
+			rte_vlan_strip(rx_pkts[nb_rx]);
+
+		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
+			rx_pkts[nb_rx]->data_len);
+
+		rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
+		nb_rx++;
+	}
+
+	rxvq->packets += nb_rx;
+
+	/* Allocate new mbuf for the used descriptor */
+	error = ENOSPC;
+	while (likely(!virtqueue_full(rxvq))) {
+		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
+		if (unlikely(new_mbuf == NULL)) {
+			struct rte_eth_dev *dev
+				= &rte_eth_devices[rxvq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
+		if (unlikely(error)) {
+			rte_pktmbuf_free(new_mbuf);
+			break;
+		}
+		nb_enqueued++;
+	}
+
+	if (likely(nb_enqueued)) {
+		vq_update_avail_idx(rxvq);
+
+		if (unlikely(virtqueue_kick_prepare(rxvq))) {
+			virtqueue_notify(rxvq);
+			PMD_RX_LOG(DEBUG, "Notified");
+		}
+	}
+
+	return nb_rx;
+}
+
+uint16_t
+virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct virtqueue *txvq = tx_queue;
+	struct rte_mbuf *txm;
+	uint16_t nb_used, nb_tx;
+	int error;
+
+	if (unlikely(nb_pkts < 1))
+		return nb_pkts;
+
+	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
+	nb_used = VIRTQUEUE_NUSED(txvq);
+
+	virtio_rmb();
+	if (likely(nb_used > txvq->vq_free_thresh))
+		virtio_xmit_cleanup(txvq, nb_used);
+
+	nb_tx = 0;
+
+	while (nb_tx < nb_pkts) {
+		/* Need one more descriptor for virtio header. */
+		int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
+
+		/*Positive value indicates it need free vring descriptors */
+		if (unlikely(need > 0)) {
+			nb_used = VIRTQUEUE_NUSED(txvq);
+			virtio_rmb();
+			need = RTE_MIN(need, (int)nb_used);
+
+			virtio_xmit_cleanup(txvq, need);
+			need = (int)tx_pkts[nb_tx]->nb_segs -
+				txvq->vq_free_cnt + 1;
+		}
+
+		/*
+		 * Zero or negative value indicates it has enough free
+		 * descriptors to use for transmitting.
+		 */
+		if (likely(need <= 0)) {
+			txm = tx_pkts[nb_tx];
+
+			/* Do VLAN tag insertion */
+			if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
+				error = rte_vlan_insert(&txm);
+				if (unlikely(error)) {
+					rte_pktmbuf_free(txm);
+					++nb_tx;
+					continue;
+				}
+			}
+
+			/* Enqueue Packet buffers */
+			error = virtqueue_enqueue_xmit(txvq, txm);
+			if (unlikely(error)) {
+				if (error == ENOSPC)
+					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
+				else if (error == EMSGSIZE)
+					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1");
+				else
+					PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d", error);
+				break;
+			}
+			nb_tx++;
+			txvq->bytes += txm->pkt_len;
+		} else {
+			PMD_TX_LOG(ERR, "No free tx descriptors to transmit");
+			break;
+		}
+	}
+
+	txvq->packets += nb_tx;
+
+	if (likely(nb_tx)) {
+		vq_update_avail_idx(txvq);
+
+		if (unlikely(virtqueue_kick_prepare(txvq))) {
+			virtqueue_notify(txvq);
+			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
+		}
+	}
+
+	return nb_tx;
+}
diff --git a/drivers/virtio/virtqueue.c b/drivers/virtio/virtqueue.c
new file mode 100644
index 0000000..8a3005f
--- /dev/null
+++ b/drivers/virtio/virtqueue.c
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+
+#include "virtqueue.h"
+#include "virtio_logs.h"
+#include "virtio_pci.h"
+
+void
+virtqueue_disable_intr(struct virtqueue *vq)
+{
+	/*
+	 * Set VRING_AVAIL_F_NO_INTERRUPT to hint host
+	 * not to interrupt when it consumes packets
+	 * Note: this is only considered a hint to the host
+	 */
+	vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+}
+
+/*
+ * Two types of mbuf to be cleaned:
+ * 1) mbuf that has been consumed by backend but not used by virtio.
+ * 2) mbuf that hasn't been consued by backend.
+ */
+struct rte_mbuf *
+virtqueue_detatch_unused(struct virtqueue *vq)
+{
+	struct rte_mbuf *cookie;
+	int idx;
+
+	for (idx = 0; idx < vq->vq_nentries; idx++) {
+		if ((cookie = vq->vq_descx[idx].cookie) != NULL) {
+			vq->vq_descx[idx].cookie = NULL;
+			return cookie;
+		}
+	}
+	return NULL;
+}
diff --git a/drivers/virtio/virtqueue.h b/drivers/virtio/virtqueue.h
new file mode 100644
index 0000000..9d6079e
--- /dev/null
+++ b/drivers/virtio/virtqueue.h
@@ -0,0 +1,325 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTQUEUE_H_
+#define _VIRTQUEUE_H_
+
+#include <stdint.h>
+
+#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+
+#include "virtio_pci.h"
+#include "virtio_ring.h"
+#include "virtio_logs.h"
+
+struct rte_mbuf;
+
+/*
+ * Per virtio_config.h in Linux.
+ *     For virtio_pci on SMP, we don't need to order with respect to MMIO
+ *     accesses through relaxed memory I/O windows, so smp_mb() et al are
+ *     sufficient.
+ *
+ * This driver is for virtio_pci on SMP and therefore can assume
+ * weaker (compiler barriers)
+ */
+#define virtio_mb()	rte_mb()
+#define virtio_rmb()	rte_compiler_barrier()
+#define virtio_wmb()	rte_compiler_barrier()
+
+#ifdef RTE_PMD_PACKET_PREFETCH
+#define rte_packet_prefetch(p)  rte_prefetch1(p)
+#else
+#define rte_packet_prefetch(p)  do {} while(0)
+#endif
+
+#define VIRTQUEUE_MAX_NAME_SZ 32
+
+#define RTE_MBUF_DATA_DMA_ADDR(mb) \
+	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
+
+#define VTNET_SQ_RQ_QUEUE_IDX 0
+#define VTNET_SQ_TQ_QUEUE_IDX 1
+#define VTNET_SQ_CQ_QUEUE_IDX 2
+
+enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
+/**
+ * The maximum virtqueue size is 2^15. Use that value as the end of
+ * descriptor chain terminator since it will never be a valid index
+ * in the descriptor table. This is used to verify we are correctly
+ * handling vq_free_cnt.
+ */
+#define VQ_RING_DESC_CHAIN_END 32768
+
+/**
+ * Control the RX mode, ie. promiscuous, allmulti, etc...
+ * All commands require an "out" sg entry containing a 1 byte
+ * state value, zero = disable, non-zero = enable.  Commands
+ * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
+ * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
+ */
+#define VIRTIO_NET_CTRL_RX              0
+#define VIRTIO_NET_CTRL_RX_PROMISC      0
+#define VIRTIO_NET_CTRL_RX_ALLMULTI     1
+#define VIRTIO_NET_CTRL_RX_ALLUNI       2
+#define VIRTIO_NET_CTRL_RX_NOMULTI      3
+#define VIRTIO_NET_CTRL_RX_NOUNI        4
+#define VIRTIO_NET_CTRL_RX_NOBCAST      5
+
+/**
+ * Control the MAC
+ *
+ * The MAC filter table is managed by the hypervisor, the guest should
+ * assume the size is infinite.  Filtering should be considered
+ * non-perfect, ie. based on hypervisor resources, the guest may
+ * received packets from sources not specified in the filter list.
+ *
+ * In addition to the class/cmd header, the TABLE_SET command requires
+ * two out scatterlists.  Each contains a 4 byte count of entries followed
+ * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
+ * first sg list contains unicast addresses, the second is for multicast.
+ * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
+ * is available.
+ *
+ * The ADDR_SET command requests one out scatterlist, it contains a
+ * 6 bytes MAC address. This functionality is present if the
+ * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
+ */
+struct virtio_net_ctrl_mac {
+	uint32_t entries;
+	uint8_t macs[][ETHER_ADDR_LEN];
+} __attribute__((__packed__));
+
+#define VIRTIO_NET_CTRL_MAC    1
+ #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
+ #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
+
+/**
+ * Control VLAN filtering
+ *
+ * The VLAN filter table is controlled via a simple ADD/DEL interface.
+ * VLAN IDs not added may be filtered by the hypervisor.  Del is the
+ * opposite of add.  Both commands expect an out entry containing a 2
+ * byte VLAN ID.  VLAN filtering is available with the
+ * VIRTIO_NET_F_CTRL_VLAN feature bit.
+ */
+#define VIRTIO_NET_CTRL_VLAN     2
+#define VIRTIO_NET_CTRL_VLAN_ADD 0
+#define VIRTIO_NET_CTRL_VLAN_DEL 1
+
+struct virtio_net_ctrl_hdr {
+	uint8_t class;
+	uint8_t cmd;
+} __attribute__((packed));
+
+typedef uint8_t virtio_net_ctrl_ack;
+
+#define VIRTIO_NET_OK     0
+#define VIRTIO_NET_ERR    1
+
+#define VIRTIO_MAX_CTRL_DATA 2048
+
+struct virtio_pmd_ctrl {
+	struct virtio_net_ctrl_hdr hdr;
+	virtio_net_ctrl_ack status;
+	uint8_t data[VIRTIO_MAX_CTRL_DATA];
+};
+
+struct virtqueue {
+	struct virtio_hw         *hw;     /**< virtio_hw structure pointer. */
+	const struct rte_memzone *mz;     /**< mem zone to populate RX ring. */
+	const struct rte_memzone *virtio_net_hdr_mz; /**< memzone to populate hdr. */
+	struct rte_mempool       *mpool;  /**< mempool for mbuf allocation */
+	uint16_t    queue_id;             /**< DPDK queue index. */
+	uint8_t     port_id;              /**< Device port identifier. */
+	uint16_t    vq_queue_index;       /**< PCI queue index */
+
+	void        *vq_ring_virt_mem;    /**< linear address of vring*/
+	unsigned int vq_ring_size;
+	phys_addr_t vq_ring_mem;          /**< physical address of vring */
+
+	struct vring vq_ring;    /**< vring keeping desc, used and avail */
+	uint16_t    vq_free_cnt; /**< num of desc available */
+	uint16_t    vq_nentries; /**< vring desc numbers */
+	uint16_t    vq_free_thresh; /**< free threshold */
+	/**
+	 * Head of the free chain in the descriptor table. If
+	 * there are no free descriptors, this will be set to
+	 * VQ_RING_DESC_CHAIN_END.
+	 */
+	uint16_t  vq_desc_head_idx;
+	uint16_t  vq_desc_tail_idx;
+	/**
+	 * Last consumed descriptor in the used table,
+	 * trails vq_ring.used->idx.
+	 */
+	uint16_t vq_used_cons_idx;
+	uint16_t vq_avail_idx;
+	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
+
+	/* Statistics */
+	uint64_t	packets;
+	uint64_t	bytes;
+	uint64_t	errors;
+
+	struct vq_desc_extra {
+		void              *cookie;
+		uint16_t          ndescs;
+	} vq_descx[0];
+};
+
+/* If multiqueue is provided by host, then we suppport it. */
+#ifndef VIRTIO_NET_F_MQ
+/* Device supports Receive Flow Steering */
+#define VIRTIO_NET_F_MQ 0x400000
+#define VIRTIO_NET_CTRL_MQ   4
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
+#endif
+#ifndef VIRTIO_NET_F_CTRL_MAC_ADDR
+#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000
+#define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
+#endif
+
+/**
+ * This is the first element of the scatter-gather list.  If you don't
+ * specify GSO or CSUM features, you can simply ignore the header.
+ */
+struct virtio_net_hdr {
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
+	uint8_t flags;
+#define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
+#define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP      3    /**< GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6    4    /**< GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN      0x80 /**< TCP has ECN set */
+	uint8_t gso_type;
+	uint16_t hdr_len;     /**< Ethernet + IP + tcp/udp hdrs */
+	uint16_t gso_size;    /**< Bytes to append to hdr_len per frame */
+	uint16_t csum_start;  /**< Position to start checksumming from */
+	uint16_t csum_offset; /**< Offset after that to place checksum */
+};
+
+/**
+ * This is the version of the header to use when the MRG_RXBUF
+ * feature has been negotiated.
+ */
+struct virtio_net_hdr_mrg_rxbuf {
+	struct   virtio_net_hdr hdr;
+	uint16_t num_buffers; /**< Number of merged rx buffers */
+};
+
+/**
+ * Tell the backend not to interrupt us.
+ */
+void virtqueue_disable_intr(struct virtqueue *vq);
+/**
+ *  Dump virtqueue internal structures, for debug purpose only.
+ */
+void virtqueue_dump(struct virtqueue *vq);
+/**
+ *  Get all mbufs to be freed.
+ */
+struct rte_mbuf *virtqueue_detatch_unused(struct virtqueue *vq);
+
+static inline int
+virtqueue_full(const struct virtqueue *vq)
+{
+	return vq->vq_free_cnt == 0;
+}
+
+#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx))
+
+static inline void
+vq_update_avail_idx(struct virtqueue *vq)
+{
+	virtio_wmb();
+	vq->vq_ring.avail->idx = vq->vq_avail_idx;
+}
+
+static inline void
+vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
+{
+	uint16_t avail_idx;
+	/*
+	 * Place the head of the descriptor chain into the next slot and make
+	 * it usable to the host. The chain is made available now rather than
+	 * deferring to virtqueue_notify() in the hopes that if the host is
+	 * currently running on another CPU, we can keep it processing the new
+	 * descriptor.
+	 */
+	avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
+	vq->vq_ring.avail->ring[avail_idx] = desc_idx;
+	vq->vq_avail_idx++;
+}
+
+static inline int
+virtqueue_kick_prepare(struct virtqueue *vq)
+{
+	return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY);
+}
+
+static inline void
+virtqueue_notify(struct virtqueue *vq)
+{
+	/*
+	 * Ensure updated avail->idx is visible to host.
+	 * For virtio on IA, the notificaiton is through io port operation
+	 * which is a serialization instruction itself.
+	 */
+	VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_NOTIFY, vq->vq_queue_index);
+}
+
+#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
+#define VIRTQUEUE_DUMP(vq) do { \
+	uint16_t used_idx, nused; \
+	used_idx = (vq)->vq_ring.used->idx; \
+	nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
+	PMD_INIT_LOG(DEBUG, \
+	  "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
+	  " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
+	  " avail.flags=0x%x; used.flags=0x%x", \
+	  (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \
+	  (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \
+	  (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \
+	  (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \
+} while (0)
+#else
+#define VIRTQUEUE_DUMP(vq) do { } while (0)
+#endif
+
+#endif /* _VIRTQUEUE_H_ */
diff --git a/lib/Makefile b/lib/Makefile
index 68b6706..d0e7fa4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -41,7 +41,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += librte_timer
 DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
 DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
-DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
diff --git a/lib/librte_pmd_virtio/Makefile b/lib/librte_pmd_virtio/Makefile
deleted file mode 100644
index 21ff7e5..0000000
--- a/lib/librte_pmd_virtio/Makefile
+++ /dev/null
@@ -1,60 +0,0 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in
-#       the documentation and/or other materials provided with the
-#       distribution.
-#     * Neither the name of Intel Corporation nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# library name
-#
-LIB = librte_pmd_virtio.a
-
-CFLAGS += -O3
-CFLAGS += $(WERROR_FLAGS)
-
-EXPORT_MAP := rte_pmd_virtio_version.map
-
-LIBABIVER := 1
-
-#
-# all source are stored in SRCS-y
-#
-SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
-SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
-SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
-SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
-
-# this lib depends upon:
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_net lib/librte_malloc
-
-include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_virtio/rte_pmd_virtio_version.map b/lib/librte_pmd_virtio/rte_pmd_virtio_version.map
deleted file mode 100644
index ef35398..0000000
--- a/lib/librte_pmd_virtio/rte_pmd_virtio_version.map
+++ /dev/null
@@ -1,4 +0,0 @@
-DPDK_2.0 {
-
-	local: *;
-};
diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c
deleted file mode 100644
index e63dbfb..0000000
--- a/lib/librte_pmd_virtio/virtio_ethdev.c
+++ /dev/null
@@ -1,1504 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <string.h>
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
-#ifdef RTE_EXEC_ENV_LINUXAPP
-#include <dirent.h>
-#include <fcntl.h>
-#endif
-
-#include <rte_ethdev.h>
-#include <rte_memcpy.h>
-#include <rte_string_fns.h>
-#include <rte_memzone.h>
-#include <rte_malloc.h>
-#include <rte_atomic.h>
-#include <rte_branch_prediction.h>
-#include <rte_pci.h>
-#include <rte_ether.h>
-#include <rte_common.h>
-
-#include <rte_memory.h>
-#include <rte_eal.h>
-#include <rte_dev.h>
-
-#include "virtio_ethdev.h"
-#include "virtio_pci.h"
-#include "virtio_logs.h"
-#include "virtqueue.h"
-
-
-static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
-static int  virtio_dev_configure(struct rte_eth_dev *dev);
-static int  virtio_dev_start(struct rte_eth_dev *dev);
-static void virtio_dev_stop(struct rte_eth_dev *dev);
-static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
-static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
-static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
-static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
-static void virtio_dev_info_get(struct rte_eth_dev *dev,
-				struct rte_eth_dev_info *dev_info);
-static int virtio_dev_link_update(struct rte_eth_dev *dev,
-	__rte_unused int wait_to_complete);
-
-static void virtio_set_hwaddr(struct virtio_hw *hw);
-static void virtio_get_hwaddr(struct virtio_hw *hw);
-
-static void virtio_dev_rx_queue_release(__rte_unused void *rxq);
-static void virtio_dev_tx_queue_release(__rte_unused void *txq);
-
-static void virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
-static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
-static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
-static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
-				uint16_t vlan_id, int on);
-static void virtio_mac_addr_add(struct rte_eth_dev *dev,
-				struct ether_addr *mac_addr,
-				uint32_t index, uint32_t vmdq __rte_unused);
-static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
-static void virtio_mac_addr_set(struct rte_eth_dev *dev,
-				struct ether_addr *mac_addr);
-
-static int virtio_dev_queue_stats_mapping_set(
-	__rte_unused struct rte_eth_dev *eth_dev,
-	__rte_unused uint16_t queue_id,
-	__rte_unused uint8_t stat_idx,
-	__rte_unused uint8_t is_rx);
-
-/*
- * The set of PCI devices this driver supports
- */
-static const struct rte_pci_id pci_id_virtio_map[] = {
-
-#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
-#include "rte_pci_dev_ids.h"
-
-{ .vendor_id = 0, /* sentinel */ },
-};
-
-static int
-virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
-		int *dlen, int pkt_num)
-{
-	uint16_t head = vq->vq_desc_head_idx, i;
-	int k, sum = 0;
-	virtio_net_ctrl_ack status = ~0;
-	struct virtio_pmd_ctrl result;
-
-	ctrl->status = status;
-
-	if (!vq->hw->cvq) {
-		PMD_INIT_LOG(ERR,
-			     "%s(): Control queue is not supported.",
-			     __func__);
-		return -1;
-	}
-
-	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
-		"vq->hw->cvq = %p vq = %p",
-		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
-
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
-		return -1;
-
-	memcpy(vq->virtio_net_hdr_mz->addr, ctrl,
-		sizeof(struct virtio_pmd_ctrl));
-
-	/*
-	 * Format is enforced in qemu code:
-	 * One TX packet for header;
-	 * At least one TX packet per argument;
-	 * One RX packet for ACK.
-	 */
-	vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
-	vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
-	vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
-	vq->vq_free_cnt--;
-	i = vq->vq_ring.desc[head].next;
-
-	for (k = 0; k < pkt_num; k++) {
-		vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
-		vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
-			+ sizeof(struct virtio_net_ctrl_hdr)
-			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
-		vq->vq_ring.desc[i].len = dlen[k];
-		sum += dlen[k];
-		vq->vq_free_cnt--;
-		i = vq->vq_ring.desc[i].next;
-	}
-
-	vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
-	vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
-			+ sizeof(struct virtio_net_ctrl_hdr);
-	vq->vq_ring.desc[i].len = sizeof(ctrl->status);
-	vq->vq_free_cnt--;
-
-	vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
-
-	vq_update_avail_ring(vq, head);
-	vq_update_avail_idx(vq);
-
-	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
-
-	virtqueue_notify(vq);
-
-	rte_rmb();
-	while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
-		rte_rmb();
-		usleep(100);
-	}
-
-	while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
-		uint32_t idx, desc_idx, used_idx;
-		struct vring_used_elem *uep;
-
-		used_idx = (uint32_t)(vq->vq_used_cons_idx
-				& (vq->vq_nentries - 1));
-		uep = &vq->vq_ring.used->ring[used_idx];
-		idx = (uint32_t) uep->id;
-		desc_idx = idx;
-
-		while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
-			desc_idx = vq->vq_ring.desc[desc_idx].next;
-			vq->vq_free_cnt++;
-		}
-
-		vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
-		vq->vq_desc_head_idx = idx;
-
-		vq->vq_used_cons_idx++;
-		vq->vq_free_cnt++;
-	}
-
-	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
-			vq->vq_free_cnt, vq->vq_desc_head_idx);
-
-	memcpy(&result, vq->virtio_net_hdr_mz->addr,
-			sizeof(struct virtio_pmd_ctrl));
-
-	return result.status;
-}
-
-static int
-virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
-	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
-
-	dlen[0] = sizeof(uint16_t);
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret) {
-		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
-			  "failed, this is too late now...");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int virtio_dev_queue_setup(struct rte_eth_dev *dev,
-			int queue_type,
-			uint16_t queue_idx,
-			uint16_t  vtpci_queue_idx,
-			uint16_t nb_desc,
-			unsigned int socket_id,
-			struct virtqueue **pvq)
-{
-	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
-	const struct rte_memzone *mz;
-	uint16_t vq_size;
-	int size;
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtqueue  *vq = NULL;
-
-	/* Write the virtqueue index to the Queue Select Field */
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vtpci_queue_idx);
-	PMD_INIT_LOG(DEBUG, "selecting queue: %d", vtpci_queue_idx);
-
-	/*
-	 * Read the virtqueue size from the Queue Size field
-	 * Always power of 2 and if 0 virtqueue does not exist
-	 */
-	vq_size = VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
-	PMD_INIT_LOG(DEBUG, "vq_size: %d nb_desc:%d", vq_size, nb_desc);
-	if (nb_desc == 0)
-		nb_desc = vq_size;
-	if (vq_size == 0) {
-		PMD_INIT_LOG(ERR, "%s: virtqueue does not exist", __func__);
-		return -EINVAL;
-	} else if (!rte_is_power_of_2(vq_size)) {
-		PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2", __func__);
-		return -EINVAL;
-	} else if (nb_desc != vq_size) {
-		PMD_INIT_LOG(ERR, "Warning: nb_desc(%d) is not equal to vq size (%d), fall to vq size",
-			nb_desc, vq_size);
-		nb_desc = vq_size;
-	}
-
-	if (queue_type == VTNET_RQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d",
-			dev->data->port_id, queue_idx);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
-	} else if (queue_type == VTNET_TQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
-			dev->data->port_id, queue_idx);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
-	} else if (queue_type == VTNET_CQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_cvq",
-			dev->data->port_id);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra),
-			RTE_CACHE_LINE_SIZE);
-	}
-	if (vq == NULL) {
-		PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
-		return (-ENOMEM);
-	}
-
-	vq->hw = hw;
-	vq->port_id = dev->data->port_id;
-	vq->queue_id = queue_idx;
-	vq->vq_queue_index = vtpci_queue_idx;
-	vq->vq_nentries = vq_size;
-	vq->vq_free_cnt = vq_size;
-
-	/*
-	 * Reserve a memzone for vring elements
-	 */
-	size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
-	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
-	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size);
-
-	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
-		socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
-	if (mz == NULL) {
-		rte_free(vq);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
-	 * and only accepts 32 bit page frame number.
-	 * Check if the allocated physical memory exceeds 16TB.
-	 */
-	if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
-		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
-		rte_free(vq);
-		return -ENOMEM;
-	}
-
-	memset(mz->addr, 0, sizeof(mz->len));
-	vq->mz = mz;
-	vq->vq_ring_mem = mz->phys_addr;
-	vq->vq_ring_virt_mem = mz->addr;
-	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%"PRIx64, (uint64_t)mz->phys_addr);
-	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)mz->addr);
-	vq->virtio_net_hdr_mz  = NULL;
-	vq->virtio_net_hdr_mem = 0;
-
-	if (queue_type == VTNET_TQ) {
-		/*
-		 * For each xmit packet, allocate a virtio_net_hdr
-		 */
-		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
-			dev->data->port_id, queue_idx);
-		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
-			vq_size * hw->vtnet_hdr_size,
-			socket_id, 0, RTE_CACHE_LINE_SIZE);
-		if (vq->virtio_net_hdr_mz == NULL) {
-			rte_free(vq);
-			return -ENOMEM;
-		}
-		vq->virtio_net_hdr_mem =
-			vq->virtio_net_hdr_mz->phys_addr;
-		memset(vq->virtio_net_hdr_mz->addr, 0,
-			vq_size * hw->vtnet_hdr_size);
-	} else if (queue_type == VTNET_CQ) {
-		/* Allocate a page for control vq command, data and status */
-		snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
-			dev->data->port_id);
-		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
-			PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE);
-		if (vq->virtio_net_hdr_mz == NULL) {
-			rte_free(vq);
-			return -ENOMEM;
-		}
-		vq->virtio_net_hdr_mem =
-			vq->virtio_net_hdr_mz->phys_addr;
-		memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
-	}
-
-	/*
-	 * Set guest physical address of the virtqueue
-	 * in VIRTIO_PCI_QUEUE_PFN config register of device
-	 */
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN,
-			mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
-	*pvq = vq;
-	return 0;
-}
-
-static int
-virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
-		uint32_t socket_id)
-{
-	struct virtqueue *vq;
-	uint16_t nb_desc = 0;
-	int ret;
-	struct virtio_hw *hw = dev->data->dev_private;
-
-	PMD_INIT_FUNC_TRACE();
-	ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
-			vtpci_queue_idx, nb_desc, socket_id, &vq);
-
-	if (ret < 0) {
-		PMD_INIT_LOG(ERR, "control vq initialization failed");
-		return ret;
-	}
-
-	hw->cvq = vq;
-	return 0;
-}
-
-static void
-virtio_dev_close(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
-
-	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
-
-	/* reset the NIC */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
-	vtpci_reset(hw);
-	hw->started = 0;
-	virtio_dev_free_mbufs(dev);
-}
-
-static void
-virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
-	ctrl.data[0] = 1;
-	dlen[0] = 1;
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret)
-		PMD_INIT_LOG(ERR, "Failed to enable promisc");
-}
-
-static void
-virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
-	ctrl.data[0] = 0;
-	dlen[0] = 1;
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret)
-		PMD_INIT_LOG(ERR, "Failed to disable promisc");
-}
-
-static void
-virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
-	ctrl.data[0] = 1;
-	dlen[0] = 1;
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret)
-		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
-}
-
-static void
-virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int dlen[1];
-	int ret;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
-	ctrl.data[0] = 0;
-	dlen[0] = 1;
-
-	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
-
-	if (ret)
-		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
-}
-
-/*
- * dev_ops for virtio, bare necessities for basic operation
- */
-static const struct eth_dev_ops virtio_eth_dev_ops = {
-	.dev_configure           = virtio_dev_configure,
-	.dev_start               = virtio_dev_start,
-	.dev_stop                = virtio_dev_stop,
-	.dev_close               = virtio_dev_close,
-	.promiscuous_enable      = virtio_dev_promiscuous_enable,
-	.promiscuous_disable     = virtio_dev_promiscuous_disable,
-	.allmulticast_enable     = virtio_dev_allmulticast_enable,
-	.allmulticast_disable    = virtio_dev_allmulticast_disable,
-
-	.dev_infos_get           = virtio_dev_info_get,
-	.stats_get               = virtio_dev_stats_get,
-	.stats_reset             = virtio_dev_stats_reset,
-	.link_update             = virtio_dev_link_update,
-	.rx_queue_setup          = virtio_dev_rx_queue_setup,
-	/* meaningfull only to multiple queue */
-	.rx_queue_release        = virtio_dev_rx_queue_release,
-	.tx_queue_setup          = virtio_dev_tx_queue_setup,
-	/* meaningfull only to multiple queue */
-	.tx_queue_release        = virtio_dev_tx_queue_release,
-	/* collect stats per queue */
-	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
-	.vlan_filter_set         = virtio_vlan_filter_set,
-	.mac_addr_add            = virtio_mac_addr_add,
-	.mac_addr_remove         = virtio_mac_addr_remove,
-	.mac_addr_set            = virtio_mac_addr_set,
-};
-
-static inline int
-virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev,
-				struct rte_eth_link *link)
-{
-	struct rte_eth_link *dst = link;
-	struct rte_eth_link *src = &(dev->data->dev_link);
-
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-			*(uint64_t *)src) == 0)
-		return -1;
-
-	return 0;
-}
-
-/**
- * Atomically writes the link status information into global
- * structure rte_eth_dev.
- *
- * @param dev
- *   - Pointer to the structure rte_eth_dev to read from.
- *   - Pointer to the buffer to be saved with the link status.
- *
- * @return
- *   - On success, zero.
- *   - On failure, negative value.
- */
-static inline int
-virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev,
-		struct rte_eth_link *link)
-{
-	struct rte_eth_link *dst = &(dev->data->dev_link);
-	struct rte_eth_link *src = link;
-
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-					*(uint64_t *)src) == 0)
-		return -1;
-
-	return 0;
-}
-
-static void
-virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
-{
-	unsigned i;
-
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		const struct virtqueue *txvq = dev->data->tx_queues[i];
-		if (txvq == NULL)
-			continue;
-
-		stats->opackets += txvq->packets;
-		stats->obytes += txvq->bytes;
-		stats->oerrors += txvq->errors;
-
-		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-			stats->q_opackets[i] = txvq->packets;
-			stats->q_obytes[i] = txvq->bytes;
-		}
-	}
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		const struct virtqueue *rxvq = dev->data->rx_queues[i];
-		if (rxvq == NULL)
-			continue;
-
-		stats->ipackets += rxvq->packets;
-		stats->ibytes += rxvq->bytes;
-		stats->ierrors += rxvq->errors;
-
-		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-			stats->q_ipackets[i] = rxvq->packets;
-			stats->q_ibytes[i] = rxvq->bytes;
-		}
-	}
-
-	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
-}
-
-static void
-virtio_dev_stats_reset(struct rte_eth_dev *dev)
-{
-	unsigned int i;
-
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		struct virtqueue *txvq = dev->data->tx_queues[i];
-		if (txvq == NULL)
-			continue;
-
-		txvq->packets = 0;
-		txvq->bytes = 0;
-		txvq->errors = 0;
-	}
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct virtqueue *rxvq = dev->data->rx_queues[i];
-		if (rxvq == NULL)
-			continue;
-
-		rxvq->packets = 0;
-		rxvq->bytes = 0;
-		rxvq->errors = 0;
-	}
-
-	dev->data->rx_mbuf_alloc_failed = 0;
-}
-
-static void
-virtio_set_hwaddr(struct virtio_hw *hw)
-{
-	vtpci_write_dev_config(hw,
-			offsetof(struct virtio_net_config, mac),
-			&hw->mac_addr, ETHER_ADDR_LEN);
-}
-
-static void
-virtio_get_hwaddr(struct virtio_hw *hw)
-{
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
-		vtpci_read_dev_config(hw,
-			offsetof(struct virtio_net_config, mac),
-			&hw->mac_addr, ETHER_ADDR_LEN);
-	} else {
-		eth_random_addr(&hw->mac_addr[0]);
-		virtio_set_hwaddr(hw);
-	}
-}
-
-static int
-virtio_mac_table_set(struct virtio_hw *hw,
-		     const struct virtio_net_ctrl_mac *uc,
-		     const struct virtio_net_ctrl_mac *mc)
-{
-	struct virtio_pmd_ctrl ctrl;
-	int err, len[2];
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
-	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
-
-	len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries);
-	memcpy(ctrl.data, uc, len[0]);
-
-	len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries);
-	memcpy(ctrl.data + len[0], mc, len[1]);
-
-	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
-	if (err != 0)
-		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
-
-	return err;
-}
-
-static void
-virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
-		    uint32_t index, uint32_t vmdq __rte_unused)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	const struct ether_addr *addrs = dev->data->mac_addrs;
-	unsigned int i;
-	struct virtio_net_ctrl_mac *uc, *mc;
-
-	if (index >= VIRTIO_MAX_MAC_ADDRS) {
-		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
-		return;
-	}
-
-	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
-	uc->entries = 0;
-	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
-	mc->entries = 0;
-
-	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
-		const struct ether_addr *addr
-			= (i == index) ? mac_addr : addrs + i;
-		struct virtio_net_ctrl_mac *tbl
-			= is_multicast_ether_addr(addr) ? mc : uc;
-
-		memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN);
-	}
-
-	virtio_mac_table_set(hw, uc, mc);
-}
-
-static void
-virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct ether_addr *addrs = dev->data->mac_addrs;
-	struct virtio_net_ctrl_mac *uc, *mc;
-	unsigned int i;
-
-	if (index >= VIRTIO_MAX_MAC_ADDRS) {
-		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
-		return;
-	}
-
-	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
-	uc->entries = 0;
-	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
-	mc->entries = 0;
-
-	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
-		struct virtio_net_ctrl_mac *tbl;
-
-		if (i == index || is_zero_ether_addr(addrs + i))
-			continue;
-
-		tbl = is_multicast_ether_addr(addrs + i) ? mc : uc;
-		memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN);
-	}
-
-	virtio_mac_table_set(hw, uc, mc);
-}
-
-static void
-virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-
-	memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN);
-
-	/* Use atomic update if available */
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
-		struct virtio_pmd_ctrl ctrl;
-		int len = ETHER_ADDR_LEN;
-
-		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
-		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
-
-		memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN);
-		virtio_send_command(hw->cvq, &ctrl, &len, 1);
-	} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
-		virtio_set_hwaddr(hw);
-}
-
-static int
-virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtio_pmd_ctrl ctrl;
-	int len;
-
-	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
-		return -ENOTSUP;
-
-	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
-	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
-	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
-	len = sizeof(vlan_id);
-
-	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
-}
-
-static void
-virtio_negotiate_features(struct virtio_hw *hw)
-{
-	uint32_t host_features, mask;
-
-	/* checksum offload not implemented */
-	mask = VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
-
-	/* TSO and LRO are only available when their corresponding
-	 * checksum offload feature is also negotiated.
-	 */
-	mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_ECN;
-	mask |= VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN;
-	mask |= VTNET_LRO_FEATURES;
-
-	/* not negotiating INDIRECT descriptor table support */
-	mask |= VIRTIO_RING_F_INDIRECT_DESC;
-
-	/* Prepare guest_features: feature that driver wants to support */
-	hw->guest_features = VTNET_FEATURES & ~mask;
-	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %x",
-		hw->guest_features);
-
-	/* Read device(host) feature bits */
-	host_features = VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES);
-	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %x",
-		host_features);
-
-	/*
-	 * Negotiate features: Subset of device feature bits are written back
-	 * guest feature bits.
-	 */
-	hw->guest_features = vtpci_negotiate_features(hw, host_features);
-	PMD_INIT_LOG(DEBUG, "features after negotiate = %x",
-		hw->guest_features);
-}
-
-#ifdef RTE_EXEC_ENV_LINUXAPP
-static int
-parse_sysfs_value(const char *filename, unsigned long *val)
-{
-	FILE *f;
-	char buf[BUFSIZ];
-	char *end = NULL;
-
-	f = fopen(filename, "r");
-	if (f == NULL) {
-		PMD_INIT_LOG(ERR, "%s(): cannot open sysfs value %s",
-			     __func__, filename);
-		return -1;
-	}
-
-	if (fgets(buf, sizeof(buf), f) == NULL) {
-		PMD_INIT_LOG(ERR, "%s(): cannot read sysfs value %s",
-			     __func__, filename);
-		fclose(f);
-		return -1;
-	}
-	*val = strtoul(buf, &end, 0);
-	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
-		PMD_INIT_LOG(ERR, "%s(): cannot parse sysfs value %s",
-			     __func__, filename);
-		fclose(f);
-		return -1;
-	}
-	fclose(f);
-	return 0;
-}
-
-static int get_uio_dev(struct rte_pci_addr *loc, char *buf, unsigned int buflen,
-			unsigned int *uio_num)
-{
-	struct dirent *e;
-	DIR *dir;
-	char dirname[PATH_MAX];
-
-	/* depending on kernel version, uio can be located in uio/uioX
-	 * or uio:uioX */
-	snprintf(dirname, sizeof(dirname),
-		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
-		     loc->domain, loc->bus, loc->devid, loc->function);
-	dir = opendir(dirname);
-	if (dir == NULL) {
-		/* retry with the parent directory */
-		snprintf(dirname, sizeof(dirname),
-			     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
-			     loc->domain, loc->bus, loc->devid, loc->function);
-		dir = opendir(dirname);
-
-		if (dir == NULL) {
-			PMD_INIT_LOG(ERR, "Cannot opendir %s", dirname);
-			return -1;
-		}
-	}
-
-	/* take the first file starting with "uio" */
-	while ((e = readdir(dir)) != NULL) {
-		/* format could be uio%d ...*/
-		int shortprefix_len = sizeof("uio") - 1;
-		/* ... or uio:uio%d */
-		int longprefix_len = sizeof("uio:uio") - 1;
-		char *endptr;
-
-		if (strncmp(e->d_name, "uio", 3) != 0)
-			continue;
-
-		/* first try uio%d */
-		errno = 0;
-		*uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
-		if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
-			snprintf(buf, buflen, "%s/uio%u", dirname, *uio_num);
-			break;
-		}
-
-		/* then try uio:uio%d */
-		errno = 0;
-		*uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
-		if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
-			snprintf(buf, buflen, "%s/uio:uio%u", dirname,
-				     *uio_num);
-			break;
-		}
-	}
-	closedir(dir);
-
-	/* No uio resource found */
-	if (e == NULL) {
-		PMD_INIT_LOG(ERR, "Could not find uio resource");
-		return -1;
-	}
-
-	return 0;
-}
-
-static int
-virtio_has_msix(const struct rte_pci_addr *loc)
-{
-	DIR *d;
-	char dirname[PATH_MAX];
-
-	snprintf(dirname, sizeof(dirname),
-		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/msi_irqs",
-		     loc->domain, loc->bus, loc->devid, loc->function);
-
-	d = opendir(dirname);
-	if (d)
-		closedir(d);
-
-	return (d != NULL);
-}
-
-/* Extract I/O port numbers from sysfs */
-static int virtio_resource_init_by_uio(struct rte_pci_device *pci_dev)
-{
-	char dirname[PATH_MAX];
-	char filename[PATH_MAX];
-	unsigned long start, size;
-	unsigned int uio_num;
-
-	if (get_uio_dev(&pci_dev->addr, dirname, sizeof(dirname), &uio_num) < 0)
-		return -1;
-
-	/* get portio size */
-	snprintf(filename, sizeof(filename),
-		     "%s/portio/port0/size", dirname);
-	if (parse_sysfs_value(filename, &size) < 0) {
-		PMD_INIT_LOG(ERR, "%s(): cannot parse size",
-			     __func__);
-		return -1;
-	}
-
-	/* get portio start */
-	snprintf(filename, sizeof(filename),
-		 "%s/portio/port0/start", dirname);
-	if (parse_sysfs_value(filename, &start) < 0) {
-		PMD_INIT_LOG(ERR, "%s(): cannot parse portio start",
-			     __func__);
-		return -1;
-	}
-	pci_dev->mem_resource[0].addr = (void *)(uintptr_t)start;
-	pci_dev->mem_resource[0].len =  (uint64_t)size;
-	PMD_INIT_LOG(DEBUG,
-		     "PCI Port IO found start=0x%lx with size=0x%lx",
-		     start, size);
-
-	/* save fd */
-	memset(dirname, 0, sizeof(dirname));
-	snprintf(dirname, sizeof(dirname), "/dev/uio%u", uio_num);
-	pci_dev->intr_handle.fd = open(dirname, O_RDWR);
-	if (pci_dev->intr_handle.fd < 0) {
-		PMD_INIT_LOG(ERR, "Cannot open %s: %s\n",
-			dirname, strerror(errno));
-		return -1;
-	}
-
-	pci_dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
-	pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
-
-	return 0;
-}
-
-/* Extract port I/O numbers from proc/ioports */
-static int virtio_resource_init_by_ioports(struct rte_pci_device *pci_dev)
-{
-	uint16_t start, end;
-	int size;
-	FILE *fp;
-	char *line = NULL;
-	char pci_id[16];
-	int found = 0;
-	size_t linesz;
-
-	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
-		 pci_dev->addr.domain,
-		 pci_dev->addr.bus,
-		 pci_dev->addr.devid,
-		 pci_dev->addr.function);
-
-	fp = fopen("/proc/ioports", "r");
-	if (fp == NULL) {
-		PMD_INIT_LOG(ERR, "%s(): can't open ioports", __func__);
-		return -1;
-	}
-
-	while (getdelim(&line, &linesz, '\n', fp) > 0) {
-		char *ptr = line;
-		char *left;
-		int n;
-
-		n = strcspn(ptr, ":");
-		ptr[n] = 0;
-		left = &ptr[n+1];
-
-		while (*left && isspace(*left))
-			left++;
-
-		if (!strncmp(left, pci_id, strlen(pci_id))) {
-			found = 1;
-
-			while (*ptr && isspace(*ptr))
-				ptr++;
-
-			sscanf(ptr, "%04hx-%04hx", &start, &end);
-			size = end - start + 1;
-
-			break;
-		}
-	}
-
-	free(line);
-	fclose(fp);
-
-	if (!found)
-		return -1;
-
-	pci_dev->mem_resource[0].addr = (void *)(uintptr_t)(uint32_t)start;
-	pci_dev->mem_resource[0].len =  (uint64_t)size;
-	PMD_INIT_LOG(DEBUG,
-		"PCI Port IO found start=0x%x with size=0x%x",
-		start, size);
-
-	/* can't support lsc interrupt without uio */
-	pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
-
-	return 0;
-}
-
-/* Extract I/O port numbers from sysfs */
-static int virtio_resource_init(struct rte_pci_device *pci_dev)
-{
-	if (virtio_resource_init_by_uio(pci_dev) == 0)
-		return 0;
-	else
-		return virtio_resource_init_by_ioports(pci_dev);
-}
-
-#else
-static int
-virtio_has_msix(const struct rte_pci_addr *loc __rte_unused)
-{
-	/* nic_uio does not enable interrupts, return 0 (false). */
-	return 0;
-}
-
-static int virtio_resource_init(struct rte_pci_device *pci_dev __rte_unused)
-{
-	/* no setup required */
-	return 0;
-}
-#endif
-
-/*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
- */
-static void
-virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
-			 void *param)
-{
-	struct rte_eth_dev *dev = param;
-	struct virtio_hw *hw = dev->data->dev_private;
-	uint8_t isr;
-
-	/* Read interrupt status which clears interrupt */
-	isr = vtpci_isr(hw);
-	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
-
-	if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
-		PMD_DRV_LOG(ERR, "interrupt enable failed");
-
-	if (isr & VIRTIO_PCI_ISR_CONFIG) {
-		if (virtio_dev_link_update(dev, 0) == 0)
-			_rte_eth_dev_callback_process(dev,
-						      RTE_ETH_EVENT_INTR_LSC);
-	}
-
-}
-
-static void
-rx_func_get(struct rte_eth_dev *eth_dev)
-{
-	struct virtio_hw *hw = eth_dev->data->dev_private;
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
-		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
-	else
-		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
-}
-
-/*
- * This function is based on probe() function in virtio_pci.c
- * It returns 0 on success.
- */
-static int
-eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
-{
-	struct virtio_hw *hw = eth_dev->data->dev_private;
-	struct virtio_net_config *config;
-	struct virtio_net_config local_config;
-	uint32_t offset_conf = sizeof(config->mac);
-	struct rte_pci_device *pci_dev;
-
-	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
-
-	eth_dev->dev_ops = &virtio_eth_dev_ops;
-	eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
-
-	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		rx_func_get(eth_dev);
-		return 0;
-	}
-
-	/* Allocate memory for storing MAC addresses */
-	eth_dev->data->mac_addrs = rte_zmalloc("virtio", ETHER_ADDR_LEN, 0);
-	if (eth_dev->data->mac_addrs == NULL) {
-		PMD_INIT_LOG(ERR,
-			"Failed to allocate %d bytes needed to store MAC addresses",
-			ETHER_ADDR_LEN);
-		return -ENOMEM;
-	}
-
-	pci_dev = eth_dev->pci_dev;
-	if (virtio_resource_init(pci_dev) < 0)
-		return -1;
-
-	hw->use_msix = virtio_has_msix(&pci_dev->addr);
-	hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
-
-	/* Reset the device although not necessary at startup */
-	vtpci_reset(hw);
-
-	/* Tell the host we've noticed this device. */
-	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
-
-	/* Tell the host we've known how to drive the device. */
-	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
-	virtio_negotiate_features(hw);
-
-	rx_func_get(eth_dev);
-
-	/* Setting up rx_header size for the device */
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
-		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-	else
-		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
-
-	/* Copy the permanent MAC address to: virtio_hw */
-	virtio_get_hwaddr(hw);
-	ether_addr_copy((struct ether_addr *) hw->mac_addr,
-			&eth_dev->data->mac_addrs[0]);
-	PMD_INIT_LOG(DEBUG,
-		     "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
-		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
-		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
-
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
-		config = &local_config;
-
-		if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-			offset_conf += sizeof(config->status);
-		} else {
-			PMD_INIT_LOG(DEBUG,
-				     "VIRTIO_NET_F_STATUS is not supported");
-			config->status = 0;
-		}
-
-		if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
-			offset_conf += sizeof(config->max_virtqueue_pairs);
-		} else {
-			PMD_INIT_LOG(DEBUG,
-				     "VIRTIO_NET_F_MQ is not supported");
-			config->max_virtqueue_pairs = 1;
-		}
-
-		vtpci_read_dev_config(hw, 0, (uint8_t *)config, offset_conf);
-
-		hw->max_rx_queues =
-			(VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
-			VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
-		hw->max_tx_queues =
-			(VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ?
-			VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs;
-
-		virtio_dev_cq_queue_setup(eth_dev,
-					config->max_virtqueue_pairs * 2,
-					SOCKET_ID_ANY);
-
-		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
-				config->max_virtqueue_pairs);
-		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
-		PMD_INIT_LOG(DEBUG,
-				"PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
-				config->mac[0], config->mac[1],
-				config->mac[2], config->mac[3],
-				config->mac[4], config->mac[5]);
-	} else {
-		hw->max_rx_queues = 1;
-		hw->max_tx_queues = 1;
-	}
-
-	eth_dev->data->nb_rx_queues = hw->max_rx_queues;
-	eth_dev->data->nb_tx_queues = hw->max_tx_queues;
-
-	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
-			hw->max_rx_queues, hw->max_tx_queues);
-	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
-			eth_dev->data->port_id, pci_dev->id.vendor_id,
-			pci_dev->id.device_id);
-
-	/* Setup interrupt callback  */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-		rte_intr_callback_register(&pci_dev->intr_handle,
-				   virtio_interrupt_handler, eth_dev);
-
-	virtio_dev_cq_start(eth_dev);
-
-	return 0;
-}
-
-static struct eth_driver rte_virtio_pmd = {
-	{
-		.name = "rte_virtio_pmd",
-		.id_table = pci_id_virtio_map,
-	},
-	.eth_dev_init = eth_virtio_dev_init,
-	.dev_private_size = sizeof(struct virtio_hw),
-};
-
-/*
- * Driver initialization routine.
- * Invoked once at EAL init time.
- * Register itself as the [Poll Mode] Driver of PCI virtio devices.
- * Returns 0 on success.
- */
-static int
-rte_virtio_pmd_init(const char *name __rte_unused,
-		    const char *param __rte_unused)
-{
-	if (rte_eal_iopl_init() != 0) {
-		PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
-		return -1;
-	}
-
-	rte_eth_driver_register(&rte_virtio_pmd);
-	return 0;
-}
-
-/*
- * Only 1 queue is supported, no queue release related operation
- */
-static void
-virtio_dev_rx_queue_release(__rte_unused void *rxq)
-{
-}
-
-static void
-virtio_dev_tx_queue_release(__rte_unused void *txq)
-{
-}
-
-/*
- * Configure virtio device
- * It returns 0 on success.
- */
-static int
-virtio_dev_configure(struct rte_eth_dev *dev)
-{
-	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
-
-	PMD_INIT_LOG(DEBUG, "configure");
-
-	if (rxmode->hw_ip_checksum) {
-		PMD_DRV_LOG(ERR, "HW IP checksum not supported");
-		return (-EINVAL);
-	}
-
-	hw->vlan_strip = rxmode->hw_vlan_strip;
-
-	if (rxmode->hw_vlan_filter
-	    && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
-		PMD_DRV_LOG(NOTICE,
-			    "vlan filtering not available on this host");
-		return -ENOTSUP;
-	}
-
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
-			PMD_DRV_LOG(ERR, "failed to set config vector");
-			return -EBUSY;
-		}
-
-	return 0;
-}
-
-
-static int
-virtio_dev_start(struct rte_eth_dev *dev)
-{
-	uint16_t nb_queues, i;
-	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
-
-	/* check if lsc interrupt feature is enabled */
-	if ((dev->data->dev_conf.intr_conf.lsc) &&
-		(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
-		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-			PMD_DRV_LOG(ERR, "link status not supported by host");
-			return -ENOTSUP;
-		}
-
-		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
-			PMD_DRV_LOG(ERR, "interrupt enable failed");
-			return -EIO;
-		}
-	}
-
-	/* Initialize Link state */
-	virtio_dev_link_update(dev, 0);
-
-	/* On restart after stop do not touch queues */
-	if (hw->started)
-		return 0;
-
-	/* Do final configuration before rx/tx engine starts */
-	virtio_dev_rxtx_start(dev);
-	vtpci_reinit_complete(hw);
-
-	hw->started = 1;
-
-	/*Notify the backend
-	 *Otherwise the tap backend might already stop its queue due to fullness.
-	 *vhost backend will have no chance to be waked up
-	 */
-	nb_queues = dev->data->nb_rx_queues;
-	if (nb_queues > 1) {
-		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
-			return -EINVAL;
-	}
-
-	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
-
-	for (i = 0; i < nb_queues; i++)
-		virtqueue_notify(dev->data->rx_queues[i]);
-
-	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
-
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
-
-	return 0;
-}
-
-static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
-{
-	struct rte_mbuf *buf;
-	int i, mbuf_num = 0;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		PMD_INIT_LOG(DEBUG,
-			     "Before freeing rxq[%d] used and unused buf", i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
-
-		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
-					dev->data->rx_queues[i])) != NULL) {
-			rte_pktmbuf_free(buf);
-			mbuf_num++;
-		}
-
-		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
-		PMD_INIT_LOG(DEBUG,
-			     "After freeing rxq[%d] used and unused buf", i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
-	}
-
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		PMD_INIT_LOG(DEBUG,
-			     "Before freeing txq[%d] used and unused bufs",
-			     i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
-
-		mbuf_num = 0;
-		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
-					dev->data->tx_queues[i])) != NULL) {
-			rte_pktmbuf_free(buf);
-
-			mbuf_num++;
-		}
-
-		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
-		PMD_INIT_LOG(DEBUG,
-			     "After freeing txq[%d] used and unused buf", i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
-	}
-}
-
-/*
- * Stop device: disable interrupt and mark link down
- */
-static void
-virtio_dev_stop(struct rte_eth_dev *dev)
-{
-	struct rte_eth_link link;
-
-	PMD_INIT_LOG(DEBUG, "stop");
-
-	if (dev->data->dev_conf.intr_conf.lsc)
-		rte_intr_disable(&dev->pci_dev->intr_handle);
-
-	memset(&link, 0, sizeof(link));
-	virtio_dev_atomic_write_link_status(dev, &link);
-}
-
-static int
-virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
-{
-	struct rte_eth_link link, old;
-	uint16_t status;
-	struct virtio_hw *hw = dev->data->dev_private;
-	memset(&link, 0, sizeof(link));
-	virtio_dev_atomic_read_link_status(dev, &link);
-	old = link;
-	link.link_duplex = FULL_DUPLEX;
-	link.link_speed  = SPEED_10G;
-
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-		PMD_INIT_LOG(DEBUG, "Get link status from hw");
-		vtpci_read_dev_config(hw,
-				offsetof(struct virtio_net_config, status),
-				&status, sizeof(status));
-		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
-			link.link_status = 0;
-			PMD_INIT_LOG(DEBUG, "Port %d is down",
-				     dev->data->port_id);
-		} else {
-			link.link_status = 1;
-			PMD_INIT_LOG(DEBUG, "Port %d is up",
-				     dev->data->port_id);
-		}
-	} else {
-		link.link_status = 1;   /* Link up */
-	}
-	virtio_dev_atomic_write_link_status(dev, &link);
-
-	return (old.link_status == link.link_status) ? -1 : 0;
-}
-
-static void
-virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-
-	dev_info->driver_name = dev->driver->pci_drv.name;
-	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
-	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
-	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
-	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
-	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
-	dev_info->default_txconf = (struct rte_eth_txconf) {
-		.txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS
-	};
-}
-
-/*
- * It enables testpmd to collect per queue stats.
- */
-static int
-virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
-__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
-__rte_unused uint8_t is_rx)
-{
-	return 0;
-}
-
-static struct rte_driver rte_virtio_driver = {
-	.type = PMD_PDEV,
-	.init = rte_virtio_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_virtio_driver);
diff --git a/lib/librte_pmd_virtio/virtio_ethdev.h b/lib/librte_pmd_virtio/virtio_ethdev.h
deleted file mode 100644
index e6d4533..0000000
--- a/lib/librte_pmd_virtio/virtio_ethdev.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTIO_ETHDEV_H_
-#define _VIRTIO_ETHDEV_H_
-
-#include <stdint.h>
-
-#include "virtio_pci.h"
-
-#define SPEED_10	10
-#define SPEED_100	100
-#define SPEED_1000	1000
-#define SPEED_10G	10000
-#define HALF_DUPLEX	1
-#define FULL_DUPLEX	2
-
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
-
-#define VIRTIO_MAX_RX_QUEUES 128
-#define VIRTIO_MAX_TX_QUEUES 128
-#define VIRTIO_MAX_MAC_ADDRS 64
-#define VIRTIO_MIN_RX_BUFSIZE 64
-#define VIRTIO_MAX_RX_PKTLEN  9728
-
-/* Features desired/implemented by this driver. */
-#define VTNET_FEATURES \
-	(VIRTIO_NET_F_MAC       | \
-	VIRTIO_NET_F_STATUS     | \
-	VIRTIO_NET_F_MQ         | \
-	VIRTIO_NET_F_CTRL_MAC_ADDR | \
-	VIRTIO_NET_F_CTRL_VQ    | \
-	VIRTIO_NET_F_CTRL_RX    | \
-	VIRTIO_NET_F_CTRL_VLAN  | \
-	VIRTIO_NET_F_CSUM       | \
-	VIRTIO_NET_F_HOST_TSO4  | \
-	VIRTIO_NET_F_HOST_TSO6  | \
-	VIRTIO_NET_F_HOST_ECN   | \
-	VIRTIO_NET_F_GUEST_CSUM | \
-	VIRTIO_NET_F_GUEST_TSO4 | \
-	VIRTIO_NET_F_GUEST_TSO6 | \
-	VIRTIO_NET_F_GUEST_ECN  | \
-	VIRTIO_NET_F_MRG_RXBUF  | \
-	VIRTIO_RING_F_INDIRECT_DESC)
-
-/*
- * CQ function prototype
- */
-void virtio_dev_cq_start(struct rte_eth_dev *dev);
-
-/*
- * RX/TX function prototypes
- */
-void virtio_dev_rxtx_start(struct rte_eth_dev *dev);
-
-int virtio_dev_queue_setup(struct rte_eth_dev *dev,
-			int queue_type,
-			uint16_t queue_idx,
-			uint16_t  vtpci_queue_idx,
-			uint16_t nb_desc,
-			unsigned int socket_id,
-			struct virtqueue **pvq);
-
-int  virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
-		uint16_t nb_rx_desc, unsigned int socket_id,
-		const struct rte_eth_rxconf *rx_conf,
-		struct rte_mempool *mb_pool);
-
-int  virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
-		uint16_t nb_tx_desc, unsigned int socket_id,
-		const struct rte_eth_txconf *tx_conf);
-
-uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-		uint16_t nb_pkts);
-
-uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-		uint16_t nb_pkts);
-
-uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
-		uint16_t nb_pkts);
-
-
-/*
- * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
- * frames larger than 1514 bytes. We do not yet support software LRO
- * via tcp_lro_rx().
- */
-#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
-			    VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
-
-
-#endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/lib/librte_pmd_virtio/virtio_logs.h b/lib/librte_pmd_virtio/virtio_logs.h
deleted file mode 100644
index d6c33f7..0000000
--- a/lib/librte_pmd_virtio/virtio_logs.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTIO_LOGS_H_
-#define _VIRTIO_LOGS_H_
-
-#include <rte_log.h>
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT
-#define PMD_INIT_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
-#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
-#else
-#define PMD_INIT_LOG(level, fmt, args...) do { } while(0)
-#define PMD_INIT_FUNC_TRACE() do { } while(0)
-#endif
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX
-#define PMD_RX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args)
-#else
-#define PMD_RX_LOG(level, fmt, args...) do { } while(0)
-#endif
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX
-#define PMD_TX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args)
-#else
-#define PMD_TX_LOG(level, fmt, args...) do { } while(0)
-#endif
-
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER
-#define PMD_DRV_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args)
-#else
-#define PMD_DRV_LOG(level, fmt, args...) do { } while(0)
-#endif
-
-#endif /* _VIRTIO_LOGS_H_ */
diff --git a/lib/librte_pmd_virtio/virtio_pci.c b/lib/librte_pmd_virtio/virtio_pci.c
deleted file mode 100644
index 2245bec..0000000
--- a/lib/librte_pmd_virtio/virtio_pci.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <stdint.h>
-
-#include "virtio_pci.h"
-#include "virtio_logs.h"
-
-static uint8_t vtpci_get_status(struct virtio_hw *);
-
-void
-vtpci_read_dev_config(struct virtio_hw *hw, uint64_t offset,
-		void *dst, int length)
-{
-	uint64_t off;
-	uint8_t *d;
-	int size;
-
-	off = VIRTIO_PCI_CONFIG(hw) + offset;
-	for (d = dst; length > 0; d += size, off += size, length -= size) {
-		if (length >= 4) {
-			size = 4;
-			*(uint32_t *)d = VIRTIO_READ_REG_4(hw, off);
-		} else if (length >= 2) {
-			size = 2;
-			*(uint16_t *)d = VIRTIO_READ_REG_2(hw, off);
-		} else {
-			size = 1;
-			*d = VIRTIO_READ_REG_1(hw, off);
-		}
-	}
-}
-
-void
-vtpci_write_dev_config(struct virtio_hw *hw, uint64_t offset,
-		void *src, int length)
-{
-	uint64_t off;
-	uint8_t *s;
-	int size;
-
-	off = VIRTIO_PCI_CONFIG(hw) + offset;
-	for (s = src; length > 0; s += size, off += size, length -= size) {
-		if (length >= 4) {
-			size = 4;
-			VIRTIO_WRITE_REG_4(hw, off, *(uint32_t *)s);
-		} else if (length >= 2) {
-			size = 2;
-			VIRTIO_WRITE_REG_2(hw, off, *(uint16_t *)s);
-		} else {
-			size = 1;
-			VIRTIO_WRITE_REG_1(hw, off, *s);
-		}
-	}
-}
-
-uint32_t
-vtpci_negotiate_features(struct virtio_hw *hw, uint32_t host_features)
-{
-	uint32_t features;
-	/*
-	 * Limit negotiated features to what the driver, virtqueue, and
-	 * host all support.
-	 */
-	features = host_features & hw->guest_features;
-
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_GUEST_FEATURES, features);
-	return features;
-}
-
-
-void
-vtpci_reset(struct virtio_hw *hw)
-{
-	/*
-	 * Setting the status to RESET sets the host device to
-	 * the original, uninitialized state.
-	 */
-	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
-	vtpci_get_status(hw);
-}
-
-void
-vtpci_reinit_complete(struct virtio_hw *hw)
-{
-	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
-}
-
-static uint8_t
-vtpci_get_status(struct virtio_hw *hw)
-{
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_STATUS);
-}
-
-void
-vtpci_set_status(struct virtio_hw *hw, uint8_t status)
-{
-	if (status != VIRTIO_CONFIG_STATUS_RESET)
-		status = (uint8_t)(status | vtpci_get_status(hw));
-
-	VIRTIO_WRITE_REG_1(hw, VIRTIO_PCI_STATUS, status);
-}
-
-uint8_t
-vtpci_isr(struct virtio_hw *hw)
-{
-
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_ISR);
-}
-
-
-/* Enable one vector (0) for Link State Intrerrupt */
-uint16_t
-vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
-{
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR, vec);
-	return VIRTIO_READ_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR);
-}
diff --git a/lib/librte_pmd_virtio/virtio_pci.h b/lib/librte_pmd_virtio/virtio_pci.h
deleted file mode 100644
index 64d9c34..0000000
--- a/lib/librte_pmd_virtio/virtio_pci.h
+++ /dev/null
@@ -1,270 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTIO_PCI_H_
-#define _VIRTIO_PCI_H_
-
-#include <stdint.h>
-
-#ifdef __FreeBSD__
-#include <sys/types.h>
-#include <machine/cpufunc.h>
-#else
-#include <sys/io.h>
-#endif
-
-#include <rte_ethdev.h>
-
-struct virtqueue;
-
-/* VirtIO PCI vendor/device ID. */
-#define VIRTIO_PCI_VENDORID     0x1AF4
-#define VIRTIO_PCI_DEVICEID_MIN 0x1000
-#define VIRTIO_PCI_DEVICEID_MAX 0x103F
-
-/* VirtIO ABI version, this must match exactly. */
-#define VIRTIO_PCI_ABI_VERSION 0
-
-/*
- * VirtIO Header, located in BAR 0.
- */
-#define VIRTIO_PCI_HOST_FEATURES  0  /* host's supported features (32bit, RO)*/
-#define VIRTIO_PCI_GUEST_FEATURES 4  /* guest's supported features (32, RW) */
-#define VIRTIO_PCI_QUEUE_PFN      8  /* physical address of VQ (32, RW) */
-#define VIRTIO_PCI_QUEUE_NUM      12 /* number of ring entries (16, RO) */
-#define VIRTIO_PCI_QUEUE_SEL      14 /* current VQ selection (16, RW) */
-#define VIRTIO_PCI_QUEUE_NOTIFY   16 /* notify host regarding VQ (16, RW) */
-#define VIRTIO_PCI_STATUS         18 /* device status register (8, RW) */
-#define VIRTIO_PCI_ISR		  19 /* interrupt status register, reading
-				      * also clears the register (8, RO) */
-/* Only if MSIX is enabled: */
-#define VIRTIO_MSI_CONFIG_VECTOR  20 /* configuration change vector (16, RW) */
-#define VIRTIO_MSI_QUEUE_VECTOR	  22 /* vector for selected VQ notifications
-				      (16, RW) */
-
-/* The bit of the ISR which indicates a device has an interrupt. */
-#define VIRTIO_PCI_ISR_INTR   0x1
-/* The bit of the ISR which indicates a device configuration change. */
-#define VIRTIO_PCI_ISR_CONFIG 0x2
-/* Vector value used to disable MSI for queue. */
-#define VIRTIO_MSI_NO_VECTOR 0xFFFF
-
-/* VirtIO device IDs. */
-#define VIRTIO_ID_NETWORK  0x01
-#define VIRTIO_ID_BLOCK    0x02
-#define VIRTIO_ID_CONSOLE  0x03
-#define VIRTIO_ID_ENTROPY  0x04
-#define VIRTIO_ID_BALLOON  0x05
-#define VIRTIO_ID_IOMEMORY 0x06
-#define VIRTIO_ID_9P       0x09
-
-/* Status byte for guest to report progress. */
-#define VIRTIO_CONFIG_STATUS_RESET     0x00
-#define VIRTIO_CONFIG_STATUS_ACK       0x01
-#define VIRTIO_CONFIG_STATUS_DRIVER    0x02
-#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04
-#define VIRTIO_CONFIG_STATUS_FAILED    0x80
-
-/*
- * Generate interrupt when the virtqueue ring is
- * completely used, even if we've suppressed them.
- */
-#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24)
-
-/*
- * The guest should never negotiate this feature; it
- * is used to detect faulty drivers.
- */
-#define VIRTIO_F_BAD_FEATURE (1 << 30)
-
-/*
- * Some VirtIO feature bits (currently bits 28 through 31) are
- * reserved for the transport being used (eg. virtio_ring), the
- * rest are per-device feature bits.
- */
-#define VIRTIO_TRANSPORT_F_START 28
-#define VIRTIO_TRANSPORT_F_END   32
-
-/*
- * Each virtqueue indirect descriptor list must be physically contiguous.
- * To allow us to malloc(9) each list individually, limit the number
- * supported to what will fit in one page. With 4KB pages, this is a limit
- * of 256 descriptors. If there is ever a need for more, we can switch to
- * contigmalloc(9) for the larger allocations, similar to what
- * bus_dmamem_alloc(9) does.
- *
- * Note the sizeof(struct vring_desc) is 16 bytes.
- */
-#define VIRTIO_MAX_INDIRECT ((int) (PAGE_SIZE / 16))
-
-/* The feature bitmap for virtio net */
-#define VIRTIO_NET_F_CSUM       0x00001 /* Host handles pkts w/ partial csum */
-#define VIRTIO_NET_F_GUEST_CSUM 0x00002 /* Guest handles pkts w/ partial csum*/
-#define VIRTIO_NET_F_MAC        0x00020 /* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO        0x00040 /* Host handles pkts w/ any GSO type */
-#define VIRTIO_NET_F_GUEST_TSO4 0x00080 /* Guest can handle TSOv4 in. */
-#define VIRTIO_NET_F_GUEST_TSO6 0x00100 /* Guest can handle TSOv6 in. */
-#define VIRTIO_NET_F_GUEST_ECN  0x00200 /* Guest can handle TSO[6] w/ ECN in.*/
-#define VIRTIO_NET_F_GUEST_UFO  0x00400 /* Guest can handle UFO in. */
-#define VIRTIO_NET_F_HOST_TSO4  0x00800 /* Host can handle TSOv4 in. */
-#define VIRTIO_NET_F_HOST_TSO6  0x01000 /* Host can handle TSOv6 in. */
-#define VIRTIO_NET_F_HOST_ECN   0x02000 /* Host can handle TSO[6] w/ ECN in. */
-#define VIRTIO_NET_F_HOST_UFO   0x04000 /* Host can handle UFO in. */
-#define VIRTIO_NET_F_MRG_RXBUF  0x08000 /* Host can merge receive buffers. */
-#define VIRTIO_NET_F_STATUS     0x10000 /* virtio_net_config.status available*/
-#define VIRTIO_NET_F_CTRL_VQ    0x20000 /* Control channel available */
-#define VIRTIO_NET_F_CTRL_RX    0x40000 /* Control channel RX mode support */
-#define VIRTIO_NET_F_CTRL_VLAN  0x80000 /* Control channel VLAN filtering */
-#define VIRTIO_NET_F_CTRL_RX_EXTRA  0x100000 /* Extra RX mode control support */
-#define VIRTIO_RING_F_INDIRECT_DESC 0x10000000 /* Support for indirect buffer descriptors. */
-/* The guest publishes the used index for which it expects an interrupt
- * at the end of the avail ring. Host should ignore the avail->flags field.
- * The host publishes the avail index for which it expects a kick
- * at the end of the used ring. Guest should ignore the used->flags field.
- */
-#define VIRTIO_RING_F_EVENT_IDX 0x20000000
-
-#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
-
-/*
- * Maximum number of virtqueues per device.
- */
-#define VIRTIO_MAX_VIRTQUEUES 8
-
-struct virtio_hw {
-	struct virtqueue *cvq;
-	uint32_t    io_base;
-	uint32_t    guest_features;
-	uint32_t    max_tx_queues;
-	uint32_t    max_rx_queues;
-	uint16_t    vtnet_hdr_size;
-	uint8_t	    vlan_strip;
-	uint8_t	    use_msix;
-	uint8_t     started;
-	uint8_t     mac_addr[ETHER_ADDR_LEN];
-};
-
-/*
- * This structure is just a reference to read
- * net device specific config space; it just a chodu structure
- *
- */
-struct virtio_net_config {
-	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
-	uint8_t    mac[ETHER_ADDR_LEN];
-	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
-	uint16_t   status;
-	uint16_t   max_virtqueue_pairs;
-} __attribute__((packed));
-
-/*
- * The remaining space is defined by each driver as the per-driver
- * configuration space.
- */
-#define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)
-
-/*
- * How many bits to shift physical queue address written to QUEUE_PFN.
- * 12 is historical, and due to x86 page size.
- */
-#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
-
-/* The alignment to use between consumer and producer parts of vring. */
-#define VIRTIO_PCI_VRING_ALIGN 4096
-
-#ifdef __FreeBSD__
-
-static inline void
-outb_p(unsigned char data, unsigned int port)
-{
-
-	outb(port, (u_char)data);
-}
-
-static inline void
-outw_p(unsigned short data, unsigned int port)
-{
-	outw(port, (u_short)data);
-}
-
-static inline void
-outl_p(unsigned int data, unsigned int port)
-{
-	outl(port, (u_int)data);
-}
-#endif
-
-#define VIRTIO_PCI_REG_ADDR(hw, reg) \
-	(unsigned short)((hw)->io_base + (reg))
-
-#define VIRTIO_READ_REG_1(hw, reg) \
-	inb((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_1(hw, reg, value) \
-	outb_p((unsigned char)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
-
-#define VIRTIO_READ_REG_2(hw, reg) \
-	inw((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_2(hw, reg, value) \
-	outw_p((unsigned short)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
-
-#define VIRTIO_READ_REG_4(hw, reg) \
-	inl((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_4(hw, reg, value) \
-	outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
-
-static inline int
-vtpci_with_feature(struct virtio_hw *hw, uint32_t feature)
-{
-	return (hw->guest_features & feature) != 0;
-}
-
-/*
- * Function declaration from virtio_pci.c
- */
-void vtpci_reset(struct virtio_hw *);
-
-void vtpci_reinit_complete(struct virtio_hw *);
-
-void vtpci_set_status(struct virtio_hw *, uint8_t);
-
-uint32_t vtpci_negotiate_features(struct virtio_hw *, uint32_t);
-
-void vtpci_write_dev_config(struct virtio_hw *, uint64_t, void *, int);
-
-void vtpci_read_dev_config(struct virtio_hw *, uint64_t, void *, int);
-
-uint8_t vtpci_isr(struct virtio_hw *);
-
-uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t);
-
-#endif /* _VIRTIO_PCI_H_ */
diff --git a/lib/librte_pmd_virtio/virtio_ring.h b/lib/librte_pmd_virtio/virtio_ring.h
deleted file mode 100644
index a16c499..0000000
--- a/lib/librte_pmd_virtio/virtio_ring.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTIO_RING_H_
-#define _VIRTIO_RING_H_
-
-#include <stdint.h>
-
-#include <rte_common.h>
-
-/* This marks a buffer as continuing via the next field. */
-#define VRING_DESC_F_NEXT       1
-/* This marks a buffer as write-only (otherwise read-only). */
-#define VRING_DESC_F_WRITE      2
-/* This means the buffer contains a list of buffer descriptors. */
-#define VRING_DESC_F_INDIRECT   4
-
-/* The Host uses this in used->flags to advise the Guest: don't kick me
- * when you add a buffer.  It's unreliable, so it's simply an
- * optimization.  Guest will still kick if it's out of buffers. */
-#define VRING_USED_F_NO_NOTIFY  1
-/* The Guest uses this in avail->flags to advise the Host: don't
- * interrupt me when you consume a buffer.  It's unreliable, so it's
- * simply an optimization.  */
-#define VRING_AVAIL_F_NO_INTERRUPT  1
-
-/* VirtIO ring descriptors: 16 bytes.
- * These can chain together via "next". */
-struct vring_desc {
-	uint64_t addr;  /*  Address (guest-physical). */
-	uint32_t len;   /* Length. */
-	uint16_t flags; /* The flags as indicated above. */
-	uint16_t next;  /* We chain unused descriptors via this. */
-};
-
-struct vring_avail {
-	uint16_t flags;
-	uint16_t idx;
-	uint16_t ring[0];
-};
-
-/* id is a 16bit index. uint32_t is used here for ids for padding reasons. */
-struct vring_used_elem {
-	/* Index of start of used descriptor chain. */
-	uint32_t id;
-	/* Total length of the descriptor chain which was written to. */
-	uint32_t len;
-};
-
-struct vring_used {
-	uint16_t flags;
-	uint16_t idx;
-	struct vring_used_elem ring[0];
-};
-
-struct vring {
-	unsigned int num;
-	struct vring_desc  *desc;
-	struct vring_avail *avail;
-	struct vring_used  *used;
-};
-
-/* The standard layout for the ring is a continuous chunk of memory which
- * looks like this.  We assume num is a power of 2.
- *
- * struct vring {
- *      // The actual descriptors (16 bytes each)
- *      struct vring_desc desc[num];
- *
- *      // A ring of available descriptor heads with free-running index.
- *      __u16 avail_flags;
- *      __u16 avail_idx;
- *      __u16 available[num];
- *      __u16 used_event_idx;
- *
- *      // Padding to the next align boundary.
- *      char pad[];
- *
- *      // A ring of used descriptor heads with free-running index.
- *      __u16 used_flags;
- *      __u16 used_idx;
- *      struct vring_used_elem used[num];
- *      __u16 avail_event_idx;
- * };
- *
- * NOTE: for VirtIO PCI, align is 4096.
- */
-
-/*
- * We publish the used event index at the end of the available ring, and vice
- * versa. They are at the end for backwards compatibility.
- */
-#define vring_used_event(vr)  ((vr)->avail->ring[(vr)->num])
-#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num])
-
-static inline int
-vring_size(unsigned int num, unsigned long align)
-{
-	int size;
-
-	size = num * sizeof(struct vring_desc);
-	size += sizeof(struct vring_avail) + (num * sizeof(uint16_t));
-	size = RTE_ALIGN_CEIL(size, align);
-	size += sizeof(struct vring_used) +
-		(num * sizeof(struct vring_used_elem));
-	return size;
-}
-
-static inline void
-vring_init(struct vring *vr, unsigned int num, uint8_t *p,
-	unsigned long align)
-{
-	vr->num = num;
-	vr->desc = (struct vring_desc *) p;
-	vr->avail = (struct vring_avail *) (p +
-		num * sizeof(struct vring_desc));
-	vr->used = (void *)
-		RTE_ALIGN_CEIL((uintptr_t)(&vr->avail->ring[num]), align);
-}
-
-/*
- * The following is used with VIRTIO_RING_F_EVENT_IDX.
- * Assuming a given event_idx value from the other size, if we have
- * just incremented index from old to new_idx, should we trigger an
- * event?
- */
-static inline int
-vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
-{
-	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
-}
-
-#endif /* _VIRTIO_RING_H_ */
diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c
deleted file mode 100644
index 3ff275c..0000000
--- a/lib/librte_pmd_virtio/virtio_rxtx.c
+++ /dev/null
@@ -1,815 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <rte_cycles.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_branch_prediction.h>
-#include <rte_mempool.h>
-#include <rte_malloc.h>
-#include <rte_mbuf.h>
-#include <rte_ether.h>
-#include <rte_ethdev.h>
-#include <rte_prefetch.h>
-#include <rte_string_fns.h>
-#include <rte_errno.h>
-#include <rte_byteorder.h>
-
-#include "virtio_logs.h"
-#include "virtio_ethdev.h"
-#include "virtqueue.h"
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
-#define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
-#else
-#define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
-#endif
-
-static void
-vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
-{
-	struct vring_desc *dp, *dp_tail;
-	struct vq_desc_extra *dxp;
-	uint16_t desc_idx_last = desc_idx;
-
-	dp  = &vq->vq_ring.desc[desc_idx];
-	dxp = &vq->vq_descx[desc_idx];
-	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
-	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
-		while (dp->flags & VRING_DESC_F_NEXT) {
-			desc_idx_last = dp->next;
-			dp = &vq->vq_ring.desc[dp->next];
-		}
-	}
-	dxp->ndescs = 0;
-
-	/*
-	 * We must append the existing free chain, if any, to the end of
-	 * newly freed chain. If the virtqueue was completely used, then
-	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
-	 */
-	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
-		vq->vq_desc_head_idx = desc_idx;
-	} else {
-		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
-		dp_tail->next = desc_idx;
-	}
-
-	vq->vq_desc_tail_idx = desc_idx_last;
-	dp->next = VQ_RING_DESC_CHAIN_END;
-}
-
-static uint16_t
-virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
-			   uint32_t *len, uint16_t num)
-{
-	struct vring_used_elem *uep;
-	struct rte_mbuf *cookie;
-	uint16_t used_idx, desc_idx;
-	uint16_t i;
-
-	/*  Caller does the check */
-	for (i = 0; i < num ; i++) {
-		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
-		uep = &vq->vq_ring.used->ring[used_idx];
-		desc_idx = (uint16_t) uep->id;
-		len[i] = uep->len;
-		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
-
-		if (unlikely(cookie == NULL)) {
-			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
-				vq->vq_used_cons_idx);
-			break;
-		}
-
-		rte_prefetch0(cookie);
-		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
-		rx_pkts[i]  = cookie;
-		vq->vq_used_cons_idx++;
-		vq_ring_free_chain(vq, desc_idx);
-		vq->vq_descx[desc_idx].cookie = NULL;
-	}
-
-	return i;
-}
-
-#ifndef DEFAULT_TX_FREE_THRESH
-#define DEFAULT_TX_FREE_THRESH 32
-#endif
-
-/* Cleanup from completed transmits. */
-static void
-virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
-{
-	uint16_t i, used_idx, desc_idx;
-	for (i = 0; i < num; i++) {
-		struct vring_used_elem *uep;
-		struct vq_desc_extra *dxp;
-
-		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
-		uep = &vq->vq_ring.used->ring[used_idx];
-
-		desc_idx = (uint16_t) uep->id;
-		dxp = &vq->vq_descx[desc_idx];
-		vq->vq_used_cons_idx++;
-		vq_ring_free_chain(vq, desc_idx);
-
-		if (dxp->cookie != NULL) {
-			rte_pktmbuf_free(dxp->cookie);
-			dxp->cookie = NULL;
-		}
-	}
-}
-
-
-static inline int
-virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
-{
-	struct vq_desc_extra *dxp;
-	struct virtio_hw *hw = vq->hw;
-	struct vring_desc *start_dp;
-	uint16_t needed = 1;
-	uint16_t head_idx, idx;
-
-	if (unlikely(vq->vq_free_cnt == 0))
-		return -ENOSPC;
-	if (unlikely(vq->vq_free_cnt < needed))
-		return -EMSGSIZE;
-
-	head_idx = vq->vq_desc_head_idx;
-	if (unlikely(head_idx >= vq->vq_nentries))
-		return -EFAULT;
-
-	idx = head_idx;
-	dxp = &vq->vq_descx[idx];
-	dxp->cookie = (void *)cookie;
-	dxp->ndescs = needed;
-
-	start_dp = vq->vq_ring.desc;
-	start_dp[idx].addr =
-		(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
-		- hw->vtnet_hdr_size);
-	start_dp[idx].len =
-		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
-	start_dp[idx].flags =  VRING_DESC_F_WRITE;
-	idx = start_dp[idx].next;
-	vq->vq_desc_head_idx = idx;
-	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
-		vq->vq_desc_tail_idx = idx;
-	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
-	vq_update_avail_ring(vq, head_idx);
-
-	return 0;
-}
-
-static int
-virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
-{
-	struct vq_desc_extra *dxp;
-	struct vring_desc *start_dp;
-	uint16_t seg_num = cookie->nb_segs;
-	uint16_t needed = 1 + seg_num;
-	uint16_t head_idx, idx;
-	uint16_t head_size = txvq->hw->vtnet_hdr_size;
-
-	if (unlikely(txvq->vq_free_cnt == 0))
-		return -ENOSPC;
-	if (unlikely(txvq->vq_free_cnt < needed))
-		return -EMSGSIZE;
-	head_idx = txvq->vq_desc_head_idx;
-	if (unlikely(head_idx >= txvq->vq_nentries))
-		return -EFAULT;
-
-	idx = head_idx;
-	dxp = &txvq->vq_descx[idx];
-	dxp->cookie = (void *)cookie;
-	dxp->ndescs = needed;
-
-	start_dp = txvq->vq_ring.desc;
-	start_dp[idx].addr =
-		txvq->virtio_net_hdr_mem + idx * head_size;
-	start_dp[idx].len = (uint32_t)head_size;
-	start_dp[idx].flags = VRING_DESC_F_NEXT;
-
-	for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
-		idx = start_dp[idx].next;
-		start_dp[idx].addr  = RTE_MBUF_DATA_DMA_ADDR(cookie);
-		start_dp[idx].len   = cookie->data_len;
-		start_dp[idx].flags = VRING_DESC_F_NEXT;
-		cookie = cookie->next;
-	}
-
-	start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
-	idx = start_dp[idx].next;
-	txvq->vq_desc_head_idx = idx;
-	if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
-		txvq->vq_desc_tail_idx = idx;
-	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
-	vq_update_avail_ring(txvq, head_idx);
-
-	return 0;
-}
-
-static inline struct rte_mbuf *
-rte_rxmbuf_alloc(struct rte_mempool *mp)
-{
-	struct rte_mbuf *m;
-
-	m = __rte_mbuf_raw_alloc(mp);
-	__rte_mbuf_sanity_check_raw(m, 0);
-
-	return m;
-}
-
-static void
-virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
-{
-	struct rte_mbuf *m;
-	int i, nbufs, error, size = vq->vq_nentries;
-	struct vring *vr = &vq->vq_ring;
-	uint8_t *ring_mem = vq->vq_ring_virt_mem;
-
-	PMD_INIT_FUNC_TRACE();
-
-	/*
-	 * Reinitialise since virtio port might have been stopped and restarted
-	 */
-	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
-	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
-	vq->vq_used_cons_idx = 0;
-	vq->vq_desc_head_idx = 0;
-	vq->vq_avail_idx = 0;
-	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
-	vq->vq_free_cnt = vq->vq_nentries;
-	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
-
-	/* Chain all the descriptors in the ring with an END */
-	for (i = 0; i < size - 1; i++)
-		vr->desc[i].next = (uint16_t)(i + 1);
-	vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
-
-	/*
-	 * Disable device(host) interrupting guest
-	 */
-	virtqueue_disable_intr(vq);
-
-	/* Only rx virtqueue needs mbufs to be allocated at initialization */
-	if (queue_type == VTNET_RQ) {
-		if (vq->mpool == NULL)
-			rte_exit(EXIT_FAILURE,
-			"Cannot allocate initial mbufs for rx virtqueue");
-
-		/* Allocate blank mbufs for the each rx descriptor */
-		nbufs = 0;
-		error = ENOSPC;
-		while (!virtqueue_full(vq)) {
-			m = rte_rxmbuf_alloc(vq->mpool);
-			if (m == NULL)
-				break;
-
-			/******************************************
-			*         Enqueue allocated buffers        *
-			*******************************************/
-			error = virtqueue_enqueue_recv_refill(vq, m);
-
-			if (error) {
-				rte_pktmbuf_free(m);
-				break;
-			}
-			nbufs++;
-		}
-
-		vq_update_avail_idx(vq);
-
-		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
-
-		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
-			vq->vq_queue_index);
-		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
-			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
-	} else if (queue_type == VTNET_TQ) {
-		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
-			vq->vq_queue_index);
-		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
-			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
-	} else {
-		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
-			vq->vq_queue_index);
-		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
-			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
-	}
-}
-
-void
-virtio_dev_cq_start(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-
-	if (hw->cvq) {
-		virtio_dev_vring_start(hw->cvq, VTNET_CQ);
-		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
-	}
-}
-
-void
-virtio_dev_rxtx_start(struct rte_eth_dev *dev)
-{
-	/*
-	 * Start receive and transmit vrings
-	 * -	Setup vring structure for all queues
-	 * -	Initialize descriptor for the rx vring
-	 * -	Allocate blank mbufs for the each rx descriptor
-	 *
-	 */
-	int i;
-
-	PMD_INIT_FUNC_TRACE();
-
-	/* Start rx vring. */
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
-	}
-
-	/* Start tx vring. */
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
-	}
-}
-
-int
-virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
-			uint16_t queue_idx,
-			uint16_t nb_desc,
-			unsigned int socket_id,
-			__rte_unused const struct rte_eth_rxconf *rx_conf,
-			struct rte_mempool *mp)
-{
-	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
-	struct virtqueue *vq;
-	int ret;
-
-	PMD_INIT_FUNC_TRACE();
-	ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
-			nb_desc, socket_id, &vq);
-	if (ret < 0) {
-		PMD_INIT_LOG(ERR, "tvq initialization failed");
-		return ret;
-	}
-
-	/* Create mempool for rx mbuf allocation */
-	vq->mpool = mp;
-
-	dev->data->rx_queues[queue_idx] = vq;
-	return 0;
-}
-
-/*
- * struct rte_eth_dev *dev: Used to update dev
- * uint16_t nb_desc: Defaults to values read from config space
- * unsigned int socket_id: Used to allocate memzone
- * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
- * uint16_t queue_idx: Just used as an index in dev txq list
- */
-int
-virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
-			uint16_t queue_idx,
-			uint16_t nb_desc,
-			unsigned int socket_id,
-			const struct rte_eth_txconf *tx_conf)
-{
-	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
-	struct virtqueue *vq;
-	uint16_t tx_free_thresh;
-	int ret;
-
-	PMD_INIT_FUNC_TRACE();
-
-	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
-	    != ETH_TXQ_FLAGS_NOXSUMS) {
-		PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
-		return -EINVAL;
-	}
-
-	ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
-			nb_desc, socket_id, &vq);
-	if (ret < 0) {
-		PMD_INIT_LOG(ERR, "rvq initialization failed");
-		return ret;
-	}
-
-	tx_free_thresh = tx_conf->tx_free_thresh;
-	if (tx_free_thresh == 0)
-		tx_free_thresh =
-			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
-
-	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
-		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
-			"number of TX entries minus 3 (%u)."
-			" (tx_free_thresh=%u port=%u queue=%u)\n",
-			vq->vq_nentries - 3,
-			tx_free_thresh, dev->data->port_id, queue_idx);
-		return -EINVAL;
-	}
-
-	vq->vq_free_thresh = tx_free_thresh;
-
-	dev->data->tx_queues[queue_idx] = vq;
-	return 0;
-}
-
-static void
-virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
-{
-	int error;
-	/*
-	 * Requeue the discarded mbuf. This should always be
-	 * successful since it was just dequeued.
-	 */
-	error = virtqueue_enqueue_recv_refill(vq, m);
-	if (unlikely(error)) {
-		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
-		rte_pktmbuf_free(m);
-	}
-}
-
-#define VIRTIO_MBUF_BURST_SZ 64
-#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
-uint16_t
-virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
-{
-	struct virtqueue *rxvq = rx_queue;
-	struct virtio_hw *hw;
-	struct rte_mbuf *rxm, *new_mbuf;
-	uint16_t nb_used, num, nb_rx;
-	uint32_t len[VIRTIO_MBUF_BURST_SZ];
-	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
-	int error;
-	uint32_t i, nb_enqueued;
-	const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
-
-	nb_used = VIRTQUEUE_NUSED(rxvq);
-
-	virtio_rmb();
-
-	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
-	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
-	if (likely(num > DESC_PER_CACHELINE))
-		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
-
-	if (num == 0)
-		return 0;
-
-	num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
-	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
-
-	hw = rxvq->hw;
-	nb_rx = 0;
-	nb_enqueued = 0;
-
-	for (i = 0; i < num ; i++) {
-		rxm = rcv_pkts[i];
-
-		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
-
-		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
-			PMD_RX_LOG(ERR, "Packet drop");
-			nb_enqueued++;
-			virtio_discard_rxbuf(rxvq, rxm);
-			rxvq->errors++;
-			continue;
-		}
-
-		rxm->port = rxvq->port_id;
-		rxm->data_off = RTE_PKTMBUF_HEADROOM;
-
-		rxm->nb_segs = 1;
-		rxm->next = NULL;
-		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
-		rxm->data_len = (uint16_t)(len[i] - hdr_size);
-
-		if (hw->vlan_strip)
-			rte_vlan_strip(rxm);
-
-		VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
-
-		rx_pkts[nb_rx++] = rxm;
-		rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
-	}
-
-	rxvq->packets += nb_rx;
-
-	/* Allocate new mbuf for the used descriptor */
-	error = ENOSPC;
-	while (likely(!virtqueue_full(rxvq))) {
-		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
-		if (unlikely(new_mbuf == NULL)) {
-			struct rte_eth_dev *dev
-				= &rte_eth_devices[rxvq->port_id];
-			dev->data->rx_mbuf_alloc_failed++;
-			break;
-		}
-		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
-		if (unlikely(error)) {
-			rte_pktmbuf_free(new_mbuf);
-			break;
-		}
-		nb_enqueued++;
-	}
-
-	if (likely(nb_enqueued)) {
-		vq_update_avail_idx(rxvq);
-
-		if (unlikely(virtqueue_kick_prepare(rxvq))) {
-			virtqueue_notify(rxvq);
-			PMD_RX_LOG(DEBUG, "Notified\n");
-		}
-	}
-
-	return nb_rx;
-}
-
-uint16_t
-virtio_recv_mergeable_pkts(void *rx_queue,
-			struct rte_mbuf **rx_pkts,
-			uint16_t nb_pkts)
-{
-	struct virtqueue *rxvq = rx_queue;
-	struct virtio_hw *hw;
-	struct rte_mbuf *rxm, *new_mbuf;
-	uint16_t nb_used, num, nb_rx;
-	uint32_t len[VIRTIO_MBUF_BURST_SZ];
-	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
-	struct rte_mbuf *prev;
-	int error;
-	uint32_t i, nb_enqueued;
-	uint32_t seg_num;
-	uint16_t extra_idx;
-	uint32_t seg_res;
-	const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-
-	nb_used = VIRTQUEUE_NUSED(rxvq);
-
-	virtio_rmb();
-
-	if (nb_used == 0)
-		return 0;
-
-	PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
-
-	hw = rxvq->hw;
-	nb_rx = 0;
-	i = 0;
-	nb_enqueued = 0;
-	seg_num = 0;
-	extra_idx = 0;
-	seg_res = 0;
-
-	while (i < nb_used) {
-		struct virtio_net_hdr_mrg_rxbuf *header;
-
-		if (nb_rx == nb_pkts)
-			break;
-
-		num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
-		if (num != 1)
-			continue;
-
-		i++;
-
-		PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
-		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
-
-		rxm = rcv_pkts[0];
-
-		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
-			PMD_RX_LOG(ERR, "Packet drop\n");
-			nb_enqueued++;
-			virtio_discard_rxbuf(rxvq, rxm);
-			rxvq->errors++;
-			continue;
-		}
-
-		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
-			RTE_PKTMBUF_HEADROOM - hdr_size);
-		seg_num = header->num_buffers;
-
-		if (seg_num == 0)
-			seg_num = 1;
-
-		rxm->data_off = RTE_PKTMBUF_HEADROOM;
-		rxm->nb_segs = seg_num;
-		rxm->next = NULL;
-		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
-		rxm->data_len = (uint16_t)(len[0] - hdr_size);
-
-		rxm->port = rxvq->port_id;
-		rx_pkts[nb_rx] = rxm;
-		prev = rxm;
-
-		seg_res = seg_num - 1;
-
-		while (seg_res != 0) {
-			/*
-			 * Get extra segments for current uncompleted packet.
-			 */
-			uint16_t  rcv_cnt =
-				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
-			if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
-				uint32_t rx_num =
-					virtqueue_dequeue_burst_rx(rxvq,
-					rcv_pkts, len, rcv_cnt);
-				i += rx_num;
-				rcv_cnt = rx_num;
-			} else {
-				PMD_RX_LOG(ERR,
-					"No enough segments for packet.\n");
-				nb_enqueued++;
-				virtio_discard_rxbuf(rxvq, rxm);
-				rxvq->errors++;
-				break;
-			}
-
-			extra_idx = 0;
-
-			while (extra_idx < rcv_cnt) {
-				rxm = rcv_pkts[extra_idx];
-
-				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
-				rxm->next = NULL;
-				rxm->pkt_len = (uint32_t)(len[extra_idx]);
-				rxm->data_len = (uint16_t)(len[extra_idx]);
-
-				if (prev)
-					prev->next = rxm;
-
-				prev = rxm;
-				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
-				extra_idx++;
-			};
-			seg_res -= rcv_cnt;
-		}
-
-		if (hw->vlan_strip)
-			rte_vlan_strip(rx_pkts[nb_rx]);
-
-		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
-			rx_pkts[nb_rx]->data_len);
-
-		rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
-		nb_rx++;
-	}
-
-	rxvq->packets += nb_rx;
-
-	/* Allocate new mbuf for the used descriptor */
-	error = ENOSPC;
-	while (likely(!virtqueue_full(rxvq))) {
-		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
-		if (unlikely(new_mbuf == NULL)) {
-			struct rte_eth_dev *dev
-				= &rte_eth_devices[rxvq->port_id];
-			dev->data->rx_mbuf_alloc_failed++;
-			break;
-		}
-		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
-		if (unlikely(error)) {
-			rte_pktmbuf_free(new_mbuf);
-			break;
-		}
-		nb_enqueued++;
-	}
-
-	if (likely(nb_enqueued)) {
-		vq_update_avail_idx(rxvq);
-
-		if (unlikely(virtqueue_kick_prepare(rxvq))) {
-			virtqueue_notify(rxvq);
-			PMD_RX_LOG(DEBUG, "Notified");
-		}
-	}
-
-	return nb_rx;
-}
-
-uint16_t
-virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
-	struct virtqueue *txvq = tx_queue;
-	struct rte_mbuf *txm;
-	uint16_t nb_used, nb_tx;
-	int error;
-
-	if (unlikely(nb_pkts < 1))
-		return nb_pkts;
-
-	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
-	nb_used = VIRTQUEUE_NUSED(txvq);
-
-	virtio_rmb();
-	if (likely(nb_used > txvq->vq_free_thresh))
-		virtio_xmit_cleanup(txvq, nb_used);
-
-	nb_tx = 0;
-
-	while (nb_tx < nb_pkts) {
-		/* Need one more descriptor for virtio header. */
-		int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
-
-		/*Positive value indicates it need free vring descriptors */
-		if (unlikely(need > 0)) {
-			nb_used = VIRTQUEUE_NUSED(txvq);
-			virtio_rmb();
-			need = RTE_MIN(need, (int)nb_used);
-
-			virtio_xmit_cleanup(txvq, need);
-			need = (int)tx_pkts[nb_tx]->nb_segs -
-				txvq->vq_free_cnt + 1;
-		}
-
-		/*
-		 * Zero or negative value indicates it has enough free
-		 * descriptors to use for transmitting.
-		 */
-		if (likely(need <= 0)) {
-			txm = tx_pkts[nb_tx];
-
-			/* Do VLAN tag insertion */
-			if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
-				error = rte_vlan_insert(&txm);
-				if (unlikely(error)) {
-					rte_pktmbuf_free(txm);
-					++nb_tx;
-					continue;
-				}
-			}
-
-			/* Enqueue Packet buffers */
-			error = virtqueue_enqueue_xmit(txvq, txm);
-			if (unlikely(error)) {
-				if (error == ENOSPC)
-					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
-				else if (error == EMSGSIZE)
-					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1");
-				else
-					PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d", error);
-				break;
-			}
-			nb_tx++;
-			txvq->bytes += txm->pkt_len;
-		} else {
-			PMD_TX_LOG(ERR, "No free tx descriptors to transmit");
-			break;
-		}
-	}
-
-	txvq->packets += nb_tx;
-
-	if (likely(nb_tx)) {
-		vq_update_avail_idx(txvq);
-
-		if (unlikely(virtqueue_kick_prepare(txvq))) {
-			virtqueue_notify(txvq);
-			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
-		}
-	}
-
-	return nb_tx;
-}
diff --git a/lib/librte_pmd_virtio/virtqueue.c b/lib/librte_pmd_virtio/virtqueue.c
deleted file mode 100644
index 8a3005f..0000000
--- a/lib/librte_pmd_virtio/virtqueue.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <stdint.h>
-
-#include <rte_mbuf.h>
-
-#include "virtqueue.h"
-#include "virtio_logs.h"
-#include "virtio_pci.h"
-
-void
-virtqueue_disable_intr(struct virtqueue *vq)
-{
-	/*
-	 * Set VRING_AVAIL_F_NO_INTERRUPT to hint host
-	 * not to interrupt when it consumes packets
-	 * Note: this is only considered a hint to the host
-	 */
-	vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
-}
-
-/*
- * Two types of mbuf to be cleaned:
- * 1) mbuf that has been consumed by backend but not used by virtio.
- * 2) mbuf that hasn't been consued by backend.
- */
-struct rte_mbuf *
-virtqueue_detatch_unused(struct virtqueue *vq)
-{
-	struct rte_mbuf *cookie;
-	int idx;
-
-	for (idx = 0; idx < vq->vq_nentries; idx++) {
-		if ((cookie = vq->vq_descx[idx].cookie) != NULL) {
-			vq->vq_descx[idx].cookie = NULL;
-			return cookie;
-		}
-	}
-	return NULL;
-}
diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h
deleted file mode 100644
index 9d6079e..0000000
--- a/lib/librte_pmd_virtio/virtqueue.h
+++ /dev/null
@@ -1,325 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTQUEUE_H_
-#define _VIRTQUEUE_H_
-
-#include <stdint.h>
-
-#include <rte_atomic.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_mempool.h>
-
-#include "virtio_pci.h"
-#include "virtio_ring.h"
-#include "virtio_logs.h"
-
-struct rte_mbuf;
-
-/*
- * Per virtio_config.h in Linux.
- *     For virtio_pci on SMP, we don't need to order with respect to MMIO
- *     accesses through relaxed memory I/O windows, so smp_mb() et al are
- *     sufficient.
- *
- * This driver is for virtio_pci on SMP and therefore can assume
- * weaker (compiler barriers)
- */
-#define virtio_mb()	rte_mb()
-#define virtio_rmb()	rte_compiler_barrier()
-#define virtio_wmb()	rte_compiler_barrier()
-
-#ifdef RTE_PMD_PACKET_PREFETCH
-#define rte_packet_prefetch(p)  rte_prefetch1(p)
-#else
-#define rte_packet_prefetch(p)  do {} while(0)
-#endif
-
-#define VIRTQUEUE_MAX_NAME_SZ 32
-
-#define RTE_MBUF_DATA_DMA_ADDR(mb) \
-	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
-
-#define VTNET_SQ_RQ_QUEUE_IDX 0
-#define VTNET_SQ_TQ_QUEUE_IDX 1
-#define VTNET_SQ_CQ_QUEUE_IDX 2
-
-enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
-/**
- * The maximum virtqueue size is 2^15. Use that value as the end of
- * descriptor chain terminator since it will never be a valid index
- * in the descriptor table. This is used to verify we are correctly
- * handling vq_free_cnt.
- */
-#define VQ_RING_DESC_CHAIN_END 32768
-
-/**
- * Control the RX mode, ie. promiscuous, allmulti, etc...
- * All commands require an "out" sg entry containing a 1 byte
- * state value, zero = disable, non-zero = enable.  Commands
- * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
- * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
- */
-#define VIRTIO_NET_CTRL_RX              0
-#define VIRTIO_NET_CTRL_RX_PROMISC      0
-#define VIRTIO_NET_CTRL_RX_ALLMULTI     1
-#define VIRTIO_NET_CTRL_RX_ALLUNI       2
-#define VIRTIO_NET_CTRL_RX_NOMULTI      3
-#define VIRTIO_NET_CTRL_RX_NOUNI        4
-#define VIRTIO_NET_CTRL_RX_NOBCAST      5
-
-/**
- * Control the MAC
- *
- * The MAC filter table is managed by the hypervisor, the guest should
- * assume the size is infinite.  Filtering should be considered
- * non-perfect, ie. based on hypervisor resources, the guest may
- * received packets from sources not specified in the filter list.
- *
- * In addition to the class/cmd header, the TABLE_SET command requires
- * two out scatterlists.  Each contains a 4 byte count of entries followed
- * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
- * first sg list contains unicast addresses, the second is for multicast.
- * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
- * is available.
- *
- * The ADDR_SET command requests one out scatterlist, it contains a
- * 6 bytes MAC address. This functionality is present if the
- * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
- */
-struct virtio_net_ctrl_mac {
-	uint32_t entries;
-	uint8_t macs[][ETHER_ADDR_LEN];
-} __attribute__((__packed__));
-
-#define VIRTIO_NET_CTRL_MAC    1
- #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
- #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
-
-/**
- * Control VLAN filtering
- *
- * The VLAN filter table is controlled via a simple ADD/DEL interface.
- * VLAN IDs not added may be filtered by the hypervisor.  Del is the
- * opposite of add.  Both commands expect an out entry containing a 2
- * byte VLAN ID.  VLAN filtering is available with the
- * VIRTIO_NET_F_CTRL_VLAN feature bit.
- */
-#define VIRTIO_NET_CTRL_VLAN     2
-#define VIRTIO_NET_CTRL_VLAN_ADD 0
-#define VIRTIO_NET_CTRL_VLAN_DEL 1
-
-struct virtio_net_ctrl_hdr {
-	uint8_t class;
-	uint8_t cmd;
-} __attribute__((packed));
-
-typedef uint8_t virtio_net_ctrl_ack;
-
-#define VIRTIO_NET_OK     0
-#define VIRTIO_NET_ERR    1
-
-#define VIRTIO_MAX_CTRL_DATA 2048
-
-struct virtio_pmd_ctrl {
-	struct virtio_net_ctrl_hdr hdr;
-	virtio_net_ctrl_ack status;
-	uint8_t data[VIRTIO_MAX_CTRL_DATA];
-};
-
-struct virtqueue {
-	struct virtio_hw         *hw;     /**< virtio_hw structure pointer. */
-	const struct rte_memzone *mz;     /**< mem zone to populate RX ring. */
-	const struct rte_memzone *virtio_net_hdr_mz; /**< memzone to populate hdr. */
-	struct rte_mempool       *mpool;  /**< mempool for mbuf allocation */
-	uint16_t    queue_id;             /**< DPDK queue index. */
-	uint8_t     port_id;              /**< Device port identifier. */
-	uint16_t    vq_queue_index;       /**< PCI queue index */
-
-	void        *vq_ring_virt_mem;    /**< linear address of vring*/
-	unsigned int vq_ring_size;
-	phys_addr_t vq_ring_mem;          /**< physical address of vring */
-
-	struct vring vq_ring;    /**< vring keeping desc, used and avail */
-	uint16_t    vq_free_cnt; /**< num of desc available */
-	uint16_t    vq_nentries; /**< vring desc numbers */
-	uint16_t    vq_free_thresh; /**< free threshold */
-	/**
-	 * Head of the free chain in the descriptor table. If
-	 * there are no free descriptors, this will be set to
-	 * VQ_RING_DESC_CHAIN_END.
-	 */
-	uint16_t  vq_desc_head_idx;
-	uint16_t  vq_desc_tail_idx;
-	/**
-	 * Last consumed descriptor in the used table,
-	 * trails vq_ring.used->idx.
-	 */
-	uint16_t vq_used_cons_idx;
-	uint16_t vq_avail_idx;
-	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
-
-	/* Statistics */
-	uint64_t	packets;
-	uint64_t	bytes;
-	uint64_t	errors;
-
-	struct vq_desc_extra {
-		void              *cookie;
-		uint16_t          ndescs;
-	} vq_descx[0];
-};
-
-/* If multiqueue is provided by host, then we suppport it. */
-#ifndef VIRTIO_NET_F_MQ
-/* Device supports Receive Flow Steering */
-#define VIRTIO_NET_F_MQ 0x400000
-#define VIRTIO_NET_CTRL_MQ   4
-#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
-#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
-#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
-#endif
-#ifndef VIRTIO_NET_F_CTRL_MAC_ADDR
-#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000
-#define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
-#endif
-
-/**
- * This is the first element of the scatter-gather list.  If you don't
- * specify GSO or CSUM features, you can simply ignore the header.
- */
-struct virtio_net_hdr {
-#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
-	uint8_t flags;
-#define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
-#define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
-#define VIRTIO_NET_HDR_GSO_UDP      3    /**< GSO frame, IPv4 UDP (UFO) */
-#define VIRTIO_NET_HDR_GSO_TCPV6    4    /**< GSO frame, IPv6 TCP */
-#define VIRTIO_NET_HDR_GSO_ECN      0x80 /**< TCP has ECN set */
-	uint8_t gso_type;
-	uint16_t hdr_len;     /**< Ethernet + IP + tcp/udp hdrs */
-	uint16_t gso_size;    /**< Bytes to append to hdr_len per frame */
-	uint16_t csum_start;  /**< Position to start checksumming from */
-	uint16_t csum_offset; /**< Offset after that to place checksum */
-};
-
-/**
- * This is the version of the header to use when the MRG_RXBUF
- * feature has been negotiated.
- */
-struct virtio_net_hdr_mrg_rxbuf {
-	struct   virtio_net_hdr hdr;
-	uint16_t num_buffers; /**< Number of merged rx buffers */
-};
-
-/**
- * Tell the backend not to interrupt us.
- */
-void virtqueue_disable_intr(struct virtqueue *vq);
-/**
- *  Dump virtqueue internal structures, for debug purpose only.
- */
-void virtqueue_dump(struct virtqueue *vq);
-/**
- *  Get all mbufs to be freed.
- */
-struct rte_mbuf *virtqueue_detatch_unused(struct virtqueue *vq);
-
-static inline int
-virtqueue_full(const struct virtqueue *vq)
-{
-	return vq->vq_free_cnt == 0;
-}
-
-#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx))
-
-static inline void
-vq_update_avail_idx(struct virtqueue *vq)
-{
-	virtio_wmb();
-	vq->vq_ring.avail->idx = vq->vq_avail_idx;
-}
-
-static inline void
-vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
-{
-	uint16_t avail_idx;
-	/*
-	 * Place the head of the descriptor chain into the next slot and make
-	 * it usable to the host. The chain is made available now rather than
-	 * deferring to virtqueue_notify() in the hopes that if the host is
-	 * currently running on another CPU, we can keep it processing the new
-	 * descriptor.
-	 */
-	avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
-	vq->vq_ring.avail->ring[avail_idx] = desc_idx;
-	vq->vq_avail_idx++;
-}
-
-static inline int
-virtqueue_kick_prepare(struct virtqueue *vq)
-{
-	return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY);
-}
-
-static inline void
-virtqueue_notify(struct virtqueue *vq)
-{
-	/*
-	 * Ensure updated avail->idx is visible to host.
-	 * For virtio on IA, the notificaiton is through io port operation
-	 * which is a serialization instruction itself.
-	 */
-	VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_NOTIFY, vq->vq_queue_index);
-}
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
-#define VIRTQUEUE_DUMP(vq) do { \
-	uint16_t used_idx, nused; \
-	used_idx = (vq)->vq_ring.used->idx; \
-	nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
-	PMD_INIT_LOG(DEBUG, \
-	  "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
-	  " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
-	  " avail.flags=0x%x; used.flags=0x%x", \
-	  (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \
-	  (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \
-	  (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \
-	  (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \
-} while (0)
-#else
-#define VIRTQUEUE_DUMP(vq) do { } while (0)
-#endif
-
-#endif /* _VIRTQUEUE_H_ */
-- 
2.1.0

^ permalink raw reply	[relevance 1%]

* Re: [dpdk-dev] [RFC PATCH 0/2] dynamic memzones
  2015-05-08 16:37  4% [dpdk-dev] [RFC PATCH 0/2] dynamic memzones Sergio Gonzalez Monroy
  2015-05-08 16:37  1% ` [dpdk-dev] [RFC PATCH 2/2] eal: memzone allocated by malloc Sergio Gonzalez Monroy
@ 2015-05-12 16:30  0% ` Olivier MATZ
  1 sibling, 0 replies; 200+ results
From: Olivier MATZ @ 2015-05-12 16:30 UTC (permalink / raw)
  To: Sergio Gonzalez Monroy, dev

Hi Sergio,

On 05/08/2015 06:37 PM, Sergio Gonzalez Monroy wrote:
> Please NOTE that this series is meant to illustrate an idea/approach and start
> discussion on the topic.
>
> Current implemetation allows reserving/creating memzones but not the opposite
> (unreserve/delete). This affects mempools and other memzone based objects.
>
>  From my point of view, implementing unreserve functionality for memzones would
> look like malloc over memsegs.
> Thus, this approach moves malloc inside eal (which in turn removes a circular
> dependency), where malloc heaps are composed of memsegs.
> We keep both malloc and memzone APIs as they are, but memzones allocate its
> memory by calling malloc_heap_alloc (there would be some ABI changes, see below).
> Some extra functionality is required in malloc to allow for boundary constrained
> memory requests.
> In summary, currently malloc is based on memzones, and with this approach
> memzones are based on malloc.
>
> An alternative would be to move malloc internals (malloc_heap, malloc_elem)
> to the eal, but keeping the malloc library as is, where malloc is based on
> memzones. This way we could avoid ABI changes while keeping the existing
> circular dependency between malloc and eal.
>
> TODOs:
>   - Implement memzone_unreserve, simply call rte_malloc_free.
>   - Implement mempool_delete, simply call rte_memzone_unreserve.
>   - Init heaps with all available memsegs at once.
>   - Review symbols in version map.
>
> ABI changes:
>   - Removed support for rte_memzone_reserve_xxxx with len=0 (not needed?).
>   - Removed librte_malloc as single library (linker script as work around?).
>
> IDEAS FOR FUTURE WORK:
>   - More control over requested memory, ie. shared/private, phys_contig, etc.
>     One of the goals would be trying to reduce the need of physically contiguous
>     memory when not required.
>   - Attach/unattach hugepages at runtime (faster VM migration).
>   - Improve malloc algorithm? ie. jemalloc (or any other).
>
>
> Any comments/toughts and/or different approaches are welcome.

I like the idea and I don't see any issue on the principle. It
will clearly help to have dynamic pools or rings.

(I didn't dive in the second patch very deep, it's just a
high-level thought).

Regards,
Olivier

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
  @ 2015-05-12 15:33  4%   ` Neil Horman
  2015-05-13 13:52  0%     ` De Lara Guarch, Pablo
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-05-12 15:33 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

On Tue, May 12, 2015 at 12:02:32PM +0100, Pablo de Lara wrote:
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 60% better performance, compared to the original one.
> 
> This patchset updates the current jhash in DPDK,
> including two new functions that generate two hashes from a single key.
> 
> It also separates the existing hash function performance tests to
> another file, to make it quicker to run.
> 
> changes in v4:
> - Simplify key alignment checks
> - Include missing x86 arch check
> 
> changes in v3:
> 
> - Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
>   functions
> 
> changes in v2:
> 
> - Split single commit in three commits, one that updates the existing functions
>   and another that adds two new functions and use one of those functions
>   as a base to be called by the other ones.
> - Remove some unnecessary ifdefs in the code.
> - Add new macros to help on the reutilization of constants
> - Separate hash function performance tests to another file
>   and improve cycle measurements.
> - Rename existing function rte_jhash2 to rte_jhash_32b
>   (something more meaninful) and mark rte_jhash2 as
>   deprecated
> 
> Pablo de Lara (6):
>   test/hash: move hash function perf tests to separate file
>   test/hash: improve accuracy on cycle measurements
>   hash: update jhash function with the latest available
>   hash: add two new functions to jhash library
>   hash: remove duplicated code
>   hash: rename rte_jhash2 to rte_jhash_32b
> 
>  app/test/Makefile               |    1 +
>  app/test/test_func_reentrancy.c |    2 +-
>  app/test/test_hash.c            |    4 +-
>  app/test/test_hash_func_perf.c  |  145 +++++++++++++++++
>  app/test/test_hash_perf.c       |   71 +--------
>  lib/librte_hash/rte_jhash.h     |  338 +++++++++++++++++++++++++++++----------
>  6 files changed, 402 insertions(+), 159 deletions(-)
>  create mode 100644 app/test/test_hash_func_perf.c
> 
> -- 
> 1.7.4.1
> 
> 
did you run this through the ABI checker?  I see you're removing several symbols
that will likely need to go through the ABI deprecation process.

Neil

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH 2/6] rte_sched: expand scheduler hierarchy for more VLAN's
  2015-05-11 17:32  4%     ` Stephen Hemminger
@ 2015-05-11 17:43  4%       ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-05-11 17:43 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Stephen Hemminger

On Mon, May 11, 2015 at 10:32:59AM -0700, Stephen Hemminger wrote:
> On Mon, 11 May 2015 17:20:07 +0000
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > Have you run this through the ABI checker?  Seems like this would alter lots of
> > pointer offsets.
> > Neil
> 
> No, I have not run it through ABI checker.
> It would change the ABI for applications using qos_sched but will not
> change layout of mbuf.
> 
> But my assumption was that as part of release process the ABI version
> would change rather than doing for each patch that gets merged.
> 

You're correct that the ABI version can change, but the process is to make an
update to doc/guides/rel_notes/abi.rst documenting the proposed changed, wait
for that to be published in an official release, then make the change for the
following release.  That way downstream adopters have some lead time to prep for
upstream changes.

Neil

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH 2/6] rte_sched: expand scheduler hierarchy for more VLAN's
       [not found]       ` <8edea4c81f624728bb5f0476b680c410@BRMWP-EXMB11.corp.brocade.com>
@ 2015-05-11 17:32  4%     ` Stephen Hemminger
  2015-05-11 17:43  4%       ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2015-05-11 17:32 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev, Stephen Hemminger

On Mon, 11 May 2015 17:20:07 +0000
Neil Horman <nhorman@tuxdriver.com> wrote:

> Have you run this through the ABI checker?  Seems like this would alter lots of
> pointer offsets.
> Neil

No, I have not run it through ABI checker.
It would change the ABI for applications using qos_sched but will not
change layout of mbuf.

But my assumption was that as part of release process the ABI version
would change rather than doing for each patch that gets merged.

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH 2/6] rte_sched: expand scheduler hierarchy for more VLAN's
  @ 2015-05-11 17:20  3%   ` Neil Horman
       [not found]       ` <8edea4c81f624728bb5f0476b680c410@BRMWP-EXMB11.corp.brocade.com>
  1 sibling, 0 replies; 200+ results
From: Neil Horman @ 2015-05-11 17:20 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Stephen Hemminger

On Mon, May 11, 2015 at 10:07:47AM -0700, Stephen Hemminger wrote:
> From: Stephen Hemminger <shemming@brocade.com>
> 
> The QoS subport is limited to 8 bits in original code.
> But customers demanded ability to support full number of VLAN's (4096)
> therefore use the full part of the tag field of mbuf.
> 
> Resize the pipe as well to allow for more pipes in future and
> avoid expensive bitfield access.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  lib/librte_mbuf/rte_mbuf.h   |  5 ++++-
>  lib/librte_sched/rte_sched.h | 38 ++++++++++++++++++++++++--------------
>  2 files changed, 28 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index ab6de67..cc0658d 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -295,7 +295,10 @@ struct rte_mbuf {
>  			/**< First 4 flexible bytes or FD ID, dependent on
>  			     PKT_RX_FDIR_* flag in ol_flags. */
>  		} fdir;           /**< Filter identifier if FDIR enabled */
> -		uint32_t sched;   /**< Hierarchical scheduler */
> +		struct {
> +			uint32_t lo;
> +			uint32_t hi;
> +		} sched;          /**< Hierarchical scheduler */
>  		uint32_t usr;	  /**< User defined tags. See rte_distributor_process() */
>  	} hash;                   /**< hash information */
>  
> diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
> index e6bba22..bf5ef8d 100644
> --- a/lib/librte_sched/rte_sched.h
> +++ b/lib/librte_sched/rte_sched.h
> @@ -195,16 +195,20 @@ struct rte_sched_port_params {
>  #endif
>  };
>  
> -/** Path through the scheduler hierarchy used by the scheduler enqueue operation to
> -identify the destination queue for the current packet. Stored in the field hash.sched
> -of struct rte_mbuf of each packet, typically written by the classification stage and read by
> -scheduler enqueue.*/
> +/*
> + * Path through the scheduler hierarchy used by the scheduler enqueue
> + * operation to identify the destination queue for the current
> + * packet. Stored in the field pkt.hash.sched of struct rte_mbuf of
> + * each packet, typically written by the classification stage and read
> + * by scheduler enqueue.
> + */
>  struct rte_sched_port_hierarchy {
> -	uint32_t queue:2;                /**< Queue ID (0 .. 3) */
> -	uint32_t traffic_class:2;        /**< Traffic class ID (0 .. 3)*/
> -	uint32_t pipe:20;                /**< Pipe ID */
> -	uint32_t subport:6;              /**< Subport ID */
> -	uint32_t color:2;                /**< Color */
> +	uint16_t queue:2;		 /**< Queue ID (0 .. 3) */
> +	uint16_t traffic_class:2;	 /**< Traffic class ID (0 .. 3)*/
> +	uint16_t color:2;		 /**< Color */
> +	uint16_t unused:10;
> +	uint16_t subport;		 /**< Subport ID */
> +	uint32_t pipe;			 /**< Pipe ID */
>  };
Have you run this through the ABI checker?  Seems like this would alter lots of
pointer offsets.
Neil

>  
>  /*
> @@ -350,12 +354,15 @@ rte_sched_queue_read_stats(struct rte_sched_port *port,
>   */
>  static inline void
>  rte_sched_port_pkt_write(struct rte_mbuf *pkt,
> -	uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue, enum rte_meter_color color)
> +			 uint32_t subport, uint32_t pipe,
> +			 uint32_t traffic_class,
> +			 uint32_t queue, enum rte_meter_color color)
>  {
> -	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
> +	struct rte_sched_port_hierarchy *sched
> +		= (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
>  
> -	sched->color = (uint32_t) color;
>  	sched->subport = subport;
> +	sched->color = (uint32_t) color;
>  	sched->pipe = pipe;
>  	sched->traffic_class = traffic_class;
>  	sched->queue = queue;
> @@ -379,9 +386,12 @@ rte_sched_port_pkt_write(struct rte_mbuf *pkt,
>   *
>   */
>  static inline void
> -rte_sched_port_pkt_read_tree_path(struct rte_mbuf *pkt, uint32_t *subport, uint32_t *pipe, uint32_t *traffic_class, uint32_t *queue)
> +rte_sched_port_pkt_read_tree_path(struct rte_mbuf *pkt, uint32_t *subport,
> +				  uint32_t *pipe, uint32_t *traffic_class,
> +				  uint32_t *queue)
>  {
> -	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
> +	struct rte_sched_port_hierarchy *sched
> +		= (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
>  
>  	*subport = sched->subport;
>  	*pipe = sched->pipe;
> -- 
> 2.1.4
> 
> 

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH 4/6] rte_sched: allow reading without clearing
  @ 2015-05-11 12:53  3%   ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-05-11 12:53 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

2015-04-29 10:04, Stephen Hemminger:
> The rte_sched statistics API should allow reading statistics without
> clearing. Make auto-clear optional.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  app/test/test_sched.c        |  4 ++--
>  examples/qos_sched/stats.c   | 16 +++++++++++-----
>  lib/librte_sched/rte_sched.c | 44 ++++++++++++++++++++++----------------------
>  lib/librte_sched/rte_sched.h | 18 ++++++++++--------
[...]

This API change needs more adjustments in the example app:

examples/qos_sched/stats.c: In function ‘subport_stat’:
examples/qos_sched/stats.c:263:9: error: too few arguments to function ‘rte_sched_subport_read_stats’
         rte_sched_subport_read_stats(port, subport_id, &stats, tc_ov);
         ^
examples/qos_sched/stats.c: In function ‘pipe_stat’:
examples/qos_sched/stats.c:309:25: error: too few arguments to function ‘rte_sched_queue_read_stats’
                         rte_sched_queue_read_stats(port, queue_id + (i * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j), &stats, &qlen);
                         ^

[...]
> --- a/lib/librte_sched/rte_sched.h
> +++ b/lib/librte_sched/rte_sched.h
> @@ -308,14 +308,15 @@ rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params);
>   * @param tc_ov
>   *   Pointer to pre-allocated 4-entry array where the oversubscription status for
>   *   each of the 4 subport traffic classes should be stored.
> + * @parm clear
> + *   Reset statistics after read
>   * @return
>   *   0 upon success, error code otherwise
>   */
>  int
> -rte_sched_subport_read_stats(struct rte_sched_port *port,
> -	uint32_t subport_id,
> -	struct rte_sched_subport_stats *stats,
> -	uint32_t *tc_ov);
> +rte_sched_subport_read_stats(struct rte_sched_port *port, uint32_t subport_id,
> +			     struct rte_sched_subport_stats *stats,
> +			     uint32_t *tc_ov, int clear);
>  
>  /**
>   * Hierarchical scheduler queue statistics read
> @@ -329,14 +330,15 @@ rte_sched_subport_read_stats(struct rte_sched_port *port,
>   *   counters should be stored
>   * @param qlen
>   *   Pointer to pre-allocated variable where the current queue length should be stored.
> + * @parm clear
> + *   Reset statistics after read
>   * @return
>   *   0 upon success, error code otherwise
>   */
>  int
> -rte_sched_queue_read_stats(struct rte_sched_port *port,
> -	uint32_t queue_id,
> -	struct rte_sched_queue_stats *stats,
> -	uint16_t *qlen);
> +rte_sched_queue_read_stats(struct rte_sched_port *port, uint32_t queue_id,
> +			   struct rte_sched_queue_stats *stats,
> +			   uint16_t *qlen, int clear);
>  
>  /*
>   * Run-time
> 

What about ABI versioning? compatibility?

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [RFC PATCH 0/2] dynamic memzones
@ 2015-05-08 16:37  4% Sergio Gonzalez Monroy
  2015-05-08 16:37  1% ` [dpdk-dev] [RFC PATCH 2/2] eal: memzone allocated by malloc Sergio Gonzalez Monroy
  2015-05-12 16:30  0% ` [dpdk-dev] [RFC PATCH 0/2] dynamic memzones Olivier MATZ
  0 siblings, 2 replies; 200+ results
From: Sergio Gonzalez Monroy @ 2015-05-08 16:37 UTC (permalink / raw)
  To: dev

Please NOTE that this series is meant to illustrate an idea/approach and start
discussion on the topic.

Current implemetation allows reserving/creating memzones but not the opposite
(unreserve/delete). This affects mempools and other memzone based objects.

>From my point of view, implementing unreserve functionality for memzones would
look like malloc over memsegs.
Thus, this approach moves malloc inside eal (which in turn removes a circular
dependency), where malloc heaps are composed of memsegs.
We keep both malloc and memzone APIs as they are, but memzones allocate its
memory by calling malloc_heap_alloc (there would be some ABI changes, see below).
Some extra functionality is required in malloc to allow for boundary constrained
memory requests.
In summary, currently malloc is based on memzones, and with this approach
memzones are based on malloc.

An alternative would be to move malloc internals (malloc_heap, malloc_elem)
to the eal, but keeping the malloc library as is, where malloc is based on
memzones. This way we could avoid ABI changes while keeping the existing
circular dependency between malloc and eal.

TODOs:
 - Implement memzone_unreserve, simply call rte_malloc_free.
 - Implement mempool_delete, simply call rte_memzone_unreserve.
 - Init heaps with all available memsegs at once.
 - Review symbols in version map.

ABI changes:
 - Removed support for rte_memzone_reserve_xxxx with len=0 (not needed?).
 - Removed librte_malloc as single library (linker script as work around?).

IDEAS FOR FUTURE WORK:
 - More control over requested memory, ie. shared/private, phys_contig, etc.
   One of the goals would be trying to reduce the need of physically contiguous
   memory when not required.
 - Attach/unattach hugepages at runtime (faster VM migration).
 - Improve malloc algorithm? ie. jemalloc (or any other).


Any comments/toughts and/or different approaches are welcome.


Sergio Gonzalez Monroy (2):
  eal: move librte_malloc to eal/common
  eal: memzone allocated by malloc

 config/common_bsdapp                            |   9 +-
 config/common_linuxapp                          |   9 +-
 lib/Makefile                                    |   1 -
 lib/librte_acl/Makefile                         |   2 +-
 lib/librte_eal/bsdapp/eal/Makefile              |   4 +-
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  18 ++
 lib/librte_eal/common/Makefile                  |   1 +
 lib/librte_eal/common/eal_common_memzone.c      | 233 ++--------------
 lib/librte_eal/common/include/rte_malloc.h      | 342 ++++++++++++++++++++++++
 lib/librte_eal/common/include/rte_malloc_heap.h |   4 +-
 lib/librte_eal/common/include/rte_memory.h      |   1 +
 lib/librte_eal/common/malloc_elem.c             | 342 ++++++++++++++++++++++++
 lib/librte_eal/common/malloc_elem.h             | 192 +++++++++++++
 lib/librte_eal/common/malloc_heap.c             | 287 ++++++++++++++++++++
 lib/librte_eal/common/malloc_heap.h             |  70 +++++
 lib/librte_eal/common/rte_malloc.c              | 259 ++++++++++++++++++
 lib/librte_eal/linuxapp/eal/Makefile            |   4 +-
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  18 ++
 lib/librte_hash/Makefile                        |   2 +-
 lib/librte_lpm/Makefile                         |   2 +-
 lib/librte_malloc/Makefile                      |  52 ----
 lib/librte_malloc/malloc_elem.c                 | 320 ----------------------
 lib/librte_malloc/malloc_elem.h                 | 190 -------------
 lib/librte_malloc/malloc_heap.c                 | 209 ---------------
 lib/librte_malloc/malloc_heap.h                 |  70 -----
 lib/librte_malloc/rte_malloc.c                  | 260 ------------------
 lib/librte_malloc/rte_malloc.h                  | 342 ------------------------
 lib/librte_malloc/rte_malloc_version.map        |  19 --
 lib/librte_mempool/Makefile                     |   2 -
 lib/librte_pmd_af_packet/Makefile               |   1 -
 lib/librte_pmd_bond/Makefile                    |   1 -
 lib/librte_pmd_e1000/Makefile                   |   2 +-
 lib/librte_pmd_enic/Makefile                    |   2 +-
 lib/librte_pmd_fm10k/Makefile                   |   2 +-
 lib/librte_pmd_i40e/Makefile                    |   2 +-
 lib/librte_pmd_ixgbe/Makefile                   |   2 +-
 lib/librte_pmd_mlx4/Makefile                    |   1 -
 lib/librte_pmd_null/Makefile                    |   1 -
 lib/librte_pmd_pcap/Makefile                    |   1 -
 lib/librte_pmd_virtio/Makefile                  |   2 +-
 lib/librte_pmd_vmxnet3/Makefile                 |   2 +-
 lib/librte_pmd_xenvirt/Makefile                 |   2 +-
 lib/librte_port/Makefile                        |   1 -
 lib/librte_ring/Makefile                        |   3 +-
 lib/librte_table/Makefile                       |   1 -
 45 files changed, 1571 insertions(+), 1719 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_malloc.h
 create mode 100644 lib/librte_eal/common/malloc_elem.c
 create mode 100644 lib/librte_eal/common/malloc_elem.h
 create mode 100644 lib/librte_eal/common/malloc_heap.c
 create mode 100644 lib/librte_eal/common/malloc_heap.h
 create mode 100644 lib/librte_eal/common/rte_malloc.c
 delete mode 100644 lib/librte_malloc/Makefile
 delete mode 100644 lib/librte_malloc/malloc_elem.c
 delete mode 100644 lib/librte_malloc/malloc_elem.h
 delete mode 100644 lib/librte_malloc/malloc_heap.c
 delete mode 100644 lib/librte_malloc/malloc_heap.h
 delete mode 100644 lib/librte_malloc/rte_malloc.c
 delete mode 100644 lib/librte_malloc/rte_malloc.h
 delete mode 100644 lib/librte_malloc/rte_malloc_version.map

-- 
1.9.3

^ permalink raw reply	[relevance 4%]

* [dpdk-dev] [RFC PATCH 2/2] eal: memzone allocated by malloc
  2015-05-08 16:37  4% [dpdk-dev] [RFC PATCH 0/2] dynamic memzones Sergio Gonzalez Monroy
@ 2015-05-08 16:37  1% ` Sergio Gonzalez Monroy
  2015-05-12 16:30  0% ` [dpdk-dev] [RFC PATCH 0/2] dynamic memzones Olivier MATZ
  1 sibling, 0 replies; 200+ results
From: Sergio Gonzalez Monroy @ 2015-05-08 16:37 UTC (permalink / raw)
  To: dev

In the current memory hierarchy, memsegs are groups of physically contiguous
hugepages, memzone are slices of memsegs and malloc further slices memzones
into smaller memory chunks.

This patch modifies malloc so it slices/partitions memsegs instead of memzones.
Thus memzones would call malloc internally for memoy allocation while
maintaining its ABI. The only exception is the reserving a memzone with
len=0 is not supported anymore.

Signed-off-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
---
 lib/librte_eal/common/eal_common_memzone.c      | 233 ++----------------------
 lib/librte_eal/common/include/rte_malloc_heap.h |   4 +-
 lib/librte_eal/common/include/rte_memory.h      |   1 +
 lib/librte_eal/common/malloc_elem.c             |  60 ++++--
 lib/librte_eal/common/malloc_elem.h             |  14 +-
 lib/librte_eal/common/malloc_heap.c             | 188 +++++++++++++------
 lib/librte_eal/common/malloc_heap.h             |   4 +-
 lib/librte_eal/common/rte_malloc.c              |   7 +-
 8 files changed, 207 insertions(+), 304 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 888f9e5..3dc8133 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -50,11 +50,10 @@
 #include <rte_string_fns.h>
 #include <rte_common.h>
 
+#include "malloc_heap.h"
+#include "malloc_elem.h"
 #include "eal_private.h"
 
-/* internal copy of free memory segments */
-static struct rte_memseg *free_memseg = NULL;
-
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
@@ -88,53 +87,12 @@ rte_memzone_reserve(const char *name, size_t len, int socket_id,
 			len, socket_id, flags, RTE_CACHE_LINE_SIZE);
 }
 
-/*
- * Helper function for memzone_reserve_aligned_thread_unsafe().
- * Calculate address offset from the start of the segment.
- * Align offset in that way that it satisfy istart alignmnet and
- * buffer of the  requested length would not cross specified boundary.
- */
-static inline phys_addr_t
-align_phys_boundary(const struct rte_memseg *ms, size_t len, size_t align,
-	size_t bound)
-{
-	phys_addr_t addr_offset, bmask, end, start;
-	size_t step;
-
-	step = RTE_MAX(align, bound);
-	bmask = ~((phys_addr_t)bound - 1);
-
-	/* calculate offset to closest alignment */
-	start = RTE_ALIGN_CEIL(ms->phys_addr, align);
-	addr_offset = start - ms->phys_addr;
-
-	while (addr_offset + len < ms->len) {
-
-		/* check, do we meet boundary condition */
-		end = start + len - (len != 0);
-		if ((start & bmask) == (end & bmask))
-			break;
-
-		/* calculate next offset */
-		start = RTE_ALIGN_CEIL(start + 1, step);
-		addr_offset = start - ms->phys_addr;
-	}
-
-	return (addr_offset);
-}
-
 static const struct rte_memzone *
 memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 		int socket_id, unsigned flags, unsigned align, unsigned bound)
 {
 	struct rte_mem_config *mcfg;
-	unsigned i = 0;
-	int memseg_idx = -1;
-	uint64_t addr_offset, seg_offset = 0;
 	size_t requested_len;
-	size_t memseg_len = 0;
-	phys_addr_t memseg_physaddr;
-	void *memseg_addr;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
@@ -166,10 +124,10 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	if (align < RTE_CACHE_LINE_SIZE)
 		align = RTE_CACHE_LINE_SIZE;
 
-
-	/* align length on cache boundary. Check for overflow before doing so */
-	if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
-		rte_errno = EINVAL; /* requested size too big */
+	/* align length on cache boundary. Check for overflow before doing so
+	 * FIXME need to update API doc regarding len value*/
+	if ((len > SIZE_MAX - RTE_CACHE_LINE_MASK) || (len == 0)){
+		rte_errno = EINVAL;
 		return NULL;
 	}
 
@@ -186,123 +144,29 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 		return NULL;
 	}
 
-	/* find the smallest segment matching requirements */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		/* last segment */
-		if (free_memseg[i].addr == NULL)
-			break;
-
-		/* empty segment, skip it */
-		if (free_memseg[i].len == 0)
-			continue;
-
-		/* bad socket ID */
-		if (socket_id != SOCKET_ID_ANY &&
-		    free_memseg[i].socket_id != SOCKET_ID_ANY &&
-		    socket_id != free_memseg[i].socket_id)
-			continue;
-
-		/*
-		 * calculate offset to closest alignment that
-		 * meets boundary conditions.
-		 */
-		addr_offset = align_phys_boundary(free_memseg + i,
-			requested_len, align, bound);
-
-		/* check len */
-		if ((requested_len + addr_offset) > free_memseg[i].len)
-			continue;
-
-		/* check flags for hugepage sizes */
-		if ((flags & RTE_MEMZONE_2MB) &&
-				free_memseg[i].hugepage_sz == RTE_PGSIZE_1G)
-			continue;
-		if ((flags & RTE_MEMZONE_1GB) &&
-				free_memseg[i].hugepage_sz == RTE_PGSIZE_2M)
-			continue;
-		if ((flags & RTE_MEMZONE_16MB) &&
-				free_memseg[i].hugepage_sz == RTE_PGSIZE_16G)
-			continue;
-		if ((flags & RTE_MEMZONE_16GB) &&
-				free_memseg[i].hugepage_sz == RTE_PGSIZE_16M)
-			continue;
-
-		/* this segment is the best until now */
-		if (memseg_idx == -1) {
-			memseg_idx = i;
-			memseg_len = free_memseg[i].len;
-			seg_offset = addr_offset;
-		}
-		/* find the biggest contiguous zone */
-		else if (len == 0) {
-			if (free_memseg[i].len > memseg_len) {
-				memseg_idx = i;
-				memseg_len = free_memseg[i].len;
-				seg_offset = addr_offset;
-			}
-		}
-		/*
-		 * find the smallest (we already checked that current
-		 * zone length is > len
-		 */
-		else if (free_memseg[i].len + align < memseg_len ||
-				(free_memseg[i].len <= memseg_len + align &&
-				addr_offset < seg_offset)) {
-			memseg_idx = i;
-			memseg_len = free_memseg[i].len;
-			seg_offset = addr_offset;
-		}
-	}
 
-	/* no segment found */
-	if (memseg_idx == -1) {
-		/*
-		 * If RTE_MEMZONE_SIZE_HINT_ONLY flag is specified,
-		 * try allocating again without the size parameter otherwise -fail.
-		 */
-		if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY)  &&
-		    ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB)
-		|| (flags & RTE_MEMZONE_16MB) || (flags & RTE_MEMZONE_16GB)))
-			return memzone_reserve_aligned_thread_unsafe(name,
-				len, socket_id, 0, align, bound);
+	/* get socket heap */
 
+	/* allocate memory on heap */
+	void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket_id], NULL,
+			requested_len, flags, align, bound);
+	if (mz_addr == NULL) {
 		rte_errno = ENOMEM;
 		return NULL;
 	}
 
-	/* save aligned physical and virtual addresses */
-	memseg_physaddr = free_memseg[memseg_idx].phys_addr + seg_offset;
-	memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr,
-			(uintptr_t) seg_offset);
-
-	/* if we are looking for a biggest memzone */
-	if (len == 0) {
-		if (bound == 0)
-			requested_len = memseg_len - seg_offset;
-		else
-			requested_len = RTE_ALIGN_CEIL(memseg_physaddr + 1,
-				bound) - memseg_physaddr;
-	}
-
-	/* set length to correct value */
-	len = (size_t)seg_offset + requested_len;
-
-	/* update our internal state */
-	free_memseg[memseg_idx].len -= len;
-	free_memseg[memseg_idx].phys_addr += len;
-	free_memseg[memseg_idx].addr =
-		(char *)free_memseg[memseg_idx].addr + len;
+	const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
 
 	/* fill the zone in config */
 	struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++];
 	snprintf(mz->name, sizeof(mz->name), "%s", name);
-	mz->phys_addr = memseg_physaddr;
-	mz->addr = memseg_addr;
+	mz->phys_addr = rte_malloc_virt2phy(mz_addr);
+	mz->addr = mz_addr;
 	mz->len = requested_len;
-	mz->hugepage_sz = free_memseg[memseg_idx].hugepage_sz;
-	mz->socket_id = free_memseg[memseg_idx].socket_id;
+	mz->hugepage_sz = elem->ms->hugepage_sz;
+	mz->socket_id = elem->ms->socket_id;
 	mz->flags = 0;
-	mz->memseg_id = memseg_idx;
+	mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;
 
 	return mz;
 }
@@ -419,45 +283,6 @@ rte_memzone_dump(FILE *f)
 }
 
 /*
- * called by init: modify the free memseg list to have cache-aligned
- * addresses and cache-aligned lengths
- */
-static int
-memseg_sanitize(struct rte_memseg *memseg)
-{
-	unsigned phys_align;
-	unsigned virt_align;
-	unsigned off;
-
-	phys_align = memseg->phys_addr & RTE_CACHE_LINE_MASK;
-	virt_align = (unsigned long)memseg->addr & RTE_CACHE_LINE_MASK;
-
-	/*
-	 * sanity check: phys_addr and addr must have the same
-	 * alignment
-	 */
-	if (phys_align != virt_align)
-		return -1;
-
-	/* memseg is really too small, don't bother with it */
-	if (memseg->len < (2 * RTE_CACHE_LINE_SIZE)) {
-		memseg->len = 0;
-		return 0;
-	}
-
-	/* align start address */
-	off = (RTE_CACHE_LINE_SIZE - phys_align) & RTE_CACHE_LINE_MASK;
-	memseg->phys_addr += off;
-	memseg->addr = (char *)memseg->addr + off;
-	memseg->len -= off;
-
-	/* align end address */
-	memseg->len &= ~((uint64_t)RTE_CACHE_LINE_MASK);
-
-	return 0;
-}
-
-/*
  * Init the memzone subsystem
  */
 int
@@ -465,14 +290,10 @@ rte_eal_memzone_init(void)
 {
 	struct rte_mem_config *mcfg;
 	const struct rte_memseg *memseg;
-	unsigned i = 0;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
 
-	/* mirror the runtime memsegs from config */
-	free_memseg = mcfg->free_memseg;
-
 	/* secondary processes don't need to initialise anything */
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
 		return 0;
@@ -485,26 +306,6 @@ rte_eal_memzone_init(void)
 
 	rte_rwlock_write_lock(&mcfg->mlock);
 
-	/* fill in uninitialized free_memsegs */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		if (memseg[i].addr == NULL)
-			break;
-		if (free_memseg[i].addr != NULL)
-			continue;
-		memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg));
-	}
-
-	/* make all zones cache-aligned */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		if (free_memseg[i].addr == NULL)
-			break;
-		if (memseg_sanitize(&free_memseg[i]) < 0) {
-			RTE_LOG(ERR, EAL, "%s(): Sanity check failed\n", __func__);
-			rte_rwlock_write_unlock(&mcfg->mlock);
-			return -1;
-		}
-	}
-
 	/* delete all zones */
 	mcfg->memzone_idx = 0;
 	memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index 716216f..5333348 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -40,7 +40,7 @@
 #include <rte_memory.h>
 
 /* Number of free lists per heap, grouped by size. */
-#define RTE_HEAP_NUM_FREELISTS  5
+#define RTE_HEAP_NUM_FREELISTS  10
 
 /**
  * Structure to hold malloc heap
@@ -48,7 +48,7 @@
 struct malloc_heap {
 	rte_spinlock_t lock;
 	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
-	unsigned mz_count;
+	unsigned ms_count;
 	unsigned alloc_count;
 	size_t total_size;
 } __rte_cache_aligned;
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 7f8103f..ab13d04 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -100,6 +100,7 @@ struct rte_memseg {
 	 /**< store segment MFNs */
 	uint64_t mfn[DOM0_NUM_MEMBLOCK];
 #endif
+	uint8_t used;               /**< already used by a heap */
 } __attribute__((__packed__));
 
 /**
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index a5e1248..5e95abb 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -37,7 +37,6 @@
 #include <sys/queue.h>
 
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_launch.h>
 #include <rte_per_lcore.h>
@@ -56,10 +55,10 @@
  */
 void
 malloc_elem_init(struct malloc_elem *elem,
-		struct malloc_heap *heap, const struct rte_memzone *mz, size_t size)
+		struct malloc_heap *heap, const struct rte_memseg *ms, size_t size)
 {
 	elem->heap = heap;
-	elem->mz = mz;
+	elem->ms = ms;
 	elem->prev = NULL;
 	memset(&elem->free_list, 0, sizeof(elem->free_list));
 	elem->state = ELEM_FREE;
@@ -70,12 +69,12 @@ malloc_elem_init(struct malloc_elem *elem,
 }
 
 /*
- * initialise a dummy malloc_elem header for the end-of-memzone marker
+ * initialise a dummy malloc_elem header for the end-of-memseg marker
  */
 void
 malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
 {
-	malloc_elem_init(elem, prev->heap, prev->mz, 0);
+	malloc_elem_init(elem, prev->heap, prev->ms, 0);
 	elem->prev = prev;
 	elem->state = ELEM_BUSY; /* mark busy so its never merged */
 }
@@ -86,12 +85,24 @@ malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
  * fit, return NULL.
  */
 static void *
-elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align)
+elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
+		size_t bound)
 {
-	const uintptr_t end_pt = (uintptr_t)elem +
+	const size_t bmask = ~(bound - 1);
+	uintptr_t end_pt = (uintptr_t)elem +
 			elem->size - MALLOC_ELEM_TRAILER_LEN;
-	const uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
-	const uintptr_t new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+	uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
+	uintptr_t new_elem_start;
+
+	/* check boundary */
+	if ((new_data_start & bmask) != (end_pt & bmask)) {
+		end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
+		new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
+		if ((end_pt & bmask) != (new_data_start & bmask))
+			return NULL;
+	}
+
+	new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
 
 	/* if the new start point is before the exist start, it won't fit */
 	return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start;
@@ -102,9 +113,10 @@ elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align)
  * alignment request from the current element
  */
 int
-malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align)
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size,	unsigned align,
+		size_t bound)
 {
-	return elem_start_pt(elem, size, align) != NULL;
+	return elem_start_pt(elem, size, align, bound) != NULL;
 }
 
 /*
@@ -118,7 +130,7 @@ split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
 	const unsigned old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
 	const unsigned new_elem_size = elem->size - old_elem_size;
 
-	malloc_elem_init(split_pt, elem->heap, elem->mz, new_elem_size);
+	malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
 	split_pt->prev = elem;
 	next_elem->prev = split_pt;
 	elem->size = old_elem_size;
@@ -190,12 +202,25 @@ elem_free_list_remove(struct malloc_elem *elem)
  * is not done here, as it's done there previously.
  */
 struct malloc_elem *
-malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align)
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
+		size_t bound)
 {
-	struct malloc_elem *new_elem = elem_start_pt(elem, size, align);
-	const unsigned old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
+	struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound);
+	const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
+	const size_t trailer_size = elem->size - old_elem_size - size;
+
+	elem_free_list_remove(elem);
 
-	if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE){
+	if (trailer_size > MALLOC_ELEM_OVERHEAD * 2 + MIN_DATA_SIZE) {
+		/* split it, too much free space after elem */
+		struct malloc_elem *new_free_elem =
+				RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
+
+		split_elem(elem, new_free_elem);
+		malloc_elem_free_list_insert(new_free_elem);
+	}
+
+	if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
 		/* don't split it, pad the element instead */
 		elem->state = ELEM_BUSY;
 		elem->pad = old_elem_size;
@@ -208,8 +233,6 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align)
 			new_elem->size = elem->size - elem->pad;
 			set_header(new_elem);
 		}
-		/* remove element from free list */
-		elem_free_list_remove(elem);
 
 		return new_elem;
 	}
@@ -219,7 +242,6 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align)
 	 * Re-insert original element, in case its new size makes it
 	 * belong on a different list.
 	 */
-	elem_free_list_remove(elem);
 	split_elem(elem, new_elem);
 	new_elem->state = ELEM_BUSY;
 	malloc_elem_free_list_insert(elem);
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index 9790b1a..e05d2ea 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -47,9 +47,9 @@ enum elem_state {
 
 struct malloc_elem {
 	struct malloc_heap *heap;
-	struct malloc_elem *volatile prev;      /* points to prev elem in memzone */
+	struct malloc_elem *volatile prev;      /* points to prev elem in memseg */
 	LIST_ENTRY(malloc_elem) free_list;      /* list of free elements in heap */
-	const struct rte_memzone *mz;
+	const struct rte_memseg *ms;
 	volatile enum elem_state state;
 	uint32_t pad;
 	size_t size;
@@ -136,11 +136,11 @@ malloc_elem_from_data(const void *data)
 void
 malloc_elem_init(struct malloc_elem *elem,
 		struct malloc_heap *heap,
-		const struct rte_memzone *mz,
+		const struct rte_memseg *ms,
 		size_t size);
 
 /*
- * initialise a dummy malloc_elem header for the end-of-memzone marker
+ * initialise a dummy malloc_elem header for the end-of-memseg marker
  */
 void
 malloc_elem_mkend(struct malloc_elem *elem,
@@ -151,14 +151,16 @@ malloc_elem_mkend(struct malloc_elem *elem,
  * of the requested size and with the requested alignment
  */
 int
-malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align);
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
+		unsigned align, size_t bound);
 
 /*
  * reserve a block of data in an existing malloc_elem. If the malloc_elem
  * is much larger than the data block requested, we split the element in two.
  */
 struct malloc_elem *
-malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align);
+malloc_elem_alloc(struct malloc_elem *elem, size_t size,
+		unsigned align, size_t bound);
 
 /*
  * free a malloc_elem block by adding it to the free list. If the
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index defb903..b79e0e9 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -39,7 +39,6 @@
 #include <sys/queue.h>
 
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_eal_memconfig.h>
 #include <rte_launch.h>
@@ -54,69 +53,136 @@
 #include "malloc_elem.h"
 #include "malloc_heap.h"
 
-/* since the memzone size starts with a digit, it will appear unquoted in
- * rte_config.h, so quote it so it can be passed to rte_str_to_size */
-#define MALLOC_MEMZONE_SIZE RTE_STR(RTE_MALLOC_MEMZONE_SIZE)
+static unsigned
+check_hugepage_sz(unsigned flags, size_t hugepage_sz)
+{
+	unsigned ret = 1;
 
-/*
- * returns the configuration setting for the memzone size as a size_t value
- */
-static inline size_t
-get_malloc_memzone_size(void)
+	if ((flags & RTE_MEMZONE_2MB) && hugepage_sz == RTE_PGSIZE_1G)
+		ret = 0;
+	if ((flags & RTE_MEMZONE_1GB) && hugepage_sz == RTE_PGSIZE_2M)
+		ret = 0;
+	if ((flags & RTE_MEMZONE_16MB) && hugepage_sz == RTE_PGSIZE_16G)
+		ret = 0;
+	if ((flags & RTE_MEMZONE_16GB) && hugepage_sz == RTE_PGSIZE_16M)
+		ret = 0;
+
+	return ret;
+}
+
+static struct rte_memseg*
+find_suitable_memseg(int socket_id, size_t size, unsigned flags,
+		size_t align, size_t bound)
 {
-	return rte_str_to_size(MALLOC_MEMZONE_SIZE);
+	struct rte_memseg *ms = rte_eal_get_configuration()->mem_config->memseg;
+	uintptr_t data_end, data_start;
+	size_t bmask = ~(bound - 1);
+	unsigned i;
+	int ms_idx = -1, alt_ms_idx = -1;
+	size_t ms_len = 0, alt_ms_len = 0;
+	size_t min_size;
+
+	min_size = size + align + MALLOC_ELEM_OVERHEAD * 2;
+
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		/* last segment */
+		if (ms[i].addr == NULL)
+			break;
+
+		/* in use */
+		if (ms[i].used)
+			continue;
+
+		/* bad socket ID */
+		if (socket_id != SOCKET_ID_ANY && ms[i].socket_id != SOCKET_ID_ANY &&
+		    socket_id != ms[i].socket_id)
+			continue;
+
+		/* check len */
+		if (min_size > ms[i].len)
+			continue;
+
+		/* check boundary */
+		data_end = (uintptr_t)ms[i].addr + ms[i].len -
+			MALLOC_ELEM_OVERHEAD - MALLOC_ELEM_TRAILER_LEN ;
+		data_end = RTE_ALIGN_FLOOR(data_end, RTE_CACHE_LINE_SIZE);
+		data_start = RTE_ALIGN_FLOOR((data_end - size), align);
+		if ((data_end & bmask) != (data_start & bmask)) {
+			/* check we have enough space before boudnary */
+			data_end = RTE_ALIGN_FLOOR(data_end, bound);
+			data_start = RTE_ALIGN_FLOOR((data_end - size), align);
+			if (((data_end & bmask) != (data_start & bmask)) ||
+					((uintptr_t)ms[i].addr > (data_start - MALLOC_ELEM_HEADER_LEN)))
+				continue;
+		}
+
+		/* at this point, we have a memseg */
+
+		/* keep best memseg found */
+		if ((alt_ms_idx == -1) ||
+			(ms[i].len < alt_ms_len)) {
+			alt_ms_idx = i;
+			alt_ms_len = ms[i].len;
+		}
+
+		/* check flags for hugepage sizes */
+		if (!check_hugepage_sz(flags, ms[i].hugepage_sz))
+			continue;
+
+		/* keep best memseg found with requested hugepage size */
+		if ((ms_idx == -1) ||
+			(ms[i].len < ms_len)) {
+			ms_idx = i;
+			ms_len = ms[i].len;
+		}
+	}
+
+	if ((ms_idx == -1) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY))
+		ms_idx = alt_ms_idx;
+
+	if (ms_idx == -1)
+		return NULL;
+
+	return &ms[ms_idx];
 }
 
+/* This function expects correct values:
+ * - size: >= RTE_CACHE_LINE_SIZE
+ * - align: power_of_two && >= RTE_CACHE_LINE_SIZE
+ * - bound: power_of_two && >= size
+ */
 /*
+ * find a suitable memory segment available to expand the heap
  * reserve an extra memory zone and make it available for use by a particular
  * heap. This reserves the zone and sets a dummy malloc_elem header at the end
  * to prevent overflow. The rest of the zone is added to free list as a single
  * large free block
  */
 static int
-malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align)
+malloc_heap_add_memseg(struct malloc_heap *heap, size_t size,
+		unsigned flags, size_t align, size_t bound)
 {
-	const unsigned mz_flags = 0;
-	const size_t block_size = get_malloc_memzone_size();
-	/* ensure the data we want to allocate will fit in the memzone */
-	const size_t min_size = size + align + MALLOC_ELEM_OVERHEAD * 2;
-	const struct rte_memzone *mz = NULL;
+	struct rte_memseg *ms = NULL;
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	unsigned numa_socket = heap - mcfg->malloc_heaps;
-
-	size_t mz_size = min_size;
-	if (mz_size < block_size)
-		mz_size = block_size;
-
-	char mz_name[RTE_MEMZONE_NAMESIZE];
-	snprintf(mz_name, sizeof(mz_name), "MALLOC_S%u_HEAP_%u",
-		     numa_socket, heap->mz_count++);
-
-	/* try getting a block. if we fail and we don't need as big a block
-	 * as given in the config, we can shrink our request and try again
-	 */
-	do {
-		mz = rte_memzone_reserve(mz_name, mz_size, numa_socket,
-					 mz_flags);
-		if (mz == NULL)
-			mz_size /= 2;
-	} while (mz == NULL && mz_size > min_size);
-	if (mz == NULL)
+	unsigned socket_id = heap - mcfg->malloc_heaps;
+
+	ms = find_suitable_memseg(socket_id, size, flags, align, bound);
+	if (ms == NULL)
 		return -1;
 
+	ms->used = 1;
 	/* allocate the memory block headers, one at end, one at start */
-	struct malloc_elem *start_elem = (struct malloc_elem *)mz->addr;
-	struct malloc_elem *end_elem = RTE_PTR_ADD(mz->addr,
-			mz_size - MALLOC_ELEM_OVERHEAD);
+	struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
+	struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
+			ms->len - MALLOC_ELEM_OVERHEAD);
 	end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
 
 	const unsigned elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
-	malloc_elem_init(start_elem, heap, mz, elem_size);
+	malloc_elem_init(start_elem, heap, ms, elem_size);
 	malloc_elem_mkend(end_elem, start_elem);
 	malloc_elem_free_list_insert(start_elem);
 
-	/* increase heap total size by size of new memzone */
-	heap->total_size+=mz_size - MALLOC_ELEM_OVERHEAD;
+	heap->total_size += ms->len - MALLOC_ELEM_OVERHEAD;
 	return 0;
 }
 
@@ -126,10 +192,11 @@ malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align)
  * Returns null on failure, or pointer to element on success.
  */
 static struct malloc_elem *
-find_suitable_element(struct malloc_heap *heap, size_t size, unsigned align)
+find_suitable_element(struct malloc_heap *heap, size_t size,
+		unsigned flags, size_t align, size_t bound)
 {
 	size_t idx;
-	struct malloc_elem *elem;
+	struct malloc_elem *elem, *alt_elem = NULL;
 
 	for (idx = malloc_elem_free_list_index(size);
 		idx < RTE_HEAP_NUM_FREELISTS; idx++)
@@ -137,40 +204,51 @@ find_suitable_element(struct malloc_heap *heap, size_t size, unsigned align)
 		for (elem = LIST_FIRST(&heap->free_head[idx]);
 			!!elem; elem = LIST_NEXT(elem, free_list))
 		{
-			if (malloc_elem_can_hold(elem, size, align))
-				return elem;
+			if (malloc_elem_can_hold(elem, size, align, bound)) {
+				if (check_hugepage_sz(flags, elem->ms->hugepage_sz))
+					return elem;
+				else
+					alt_elem = elem;
+			}
 		}
 	}
+
+	if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY))
+		return alt_elem;
+
 	return NULL;
 }
 
 /*
- * Main function called by malloc to allocate a block of memory from the
- * heap. It locks the free list, scans it, and adds a new memzone if the
- * scan fails. Once the new memzone is added, it re-scans and should return
+ * Main function to allocate a block of memory from the heap.
+ * It locks the free list, scans it, and adds a new memseg if the
+ * scan fails. Once the new memseg is added, it re-scans and should return
  * the new element after releasing the lock.
  */
 void *
 malloc_heap_alloc(struct malloc_heap *heap,
-		const char *type __attribute__((unused)), size_t size, unsigned align)
+		const char *type __attribute__((unused)), size_t size, unsigned flags,
+		size_t align, size_t bound)
 {
 	size = RTE_CACHE_LINE_ROUNDUP(size);
 	align = RTE_CACHE_LINE_ROUNDUP(align);
+
 	rte_spinlock_lock(&heap->lock);
-	struct malloc_elem *elem = find_suitable_element(heap, size, align);
+	struct malloc_elem *elem = find_suitable_element(heap, size, flags,
+			align, bound);
 	if (elem == NULL){
-		if ((malloc_heap_add_memzone(heap, size, align)) == 0)
-			elem = find_suitable_element(heap, size, align);
+		if ((malloc_heap_add_memseg(heap, size, flags, align, bound)) == 0)
+			elem = find_suitable_element(heap, size, flags, align, bound);
 	}
 
 	if (elem != NULL){
-		elem = malloc_elem_alloc(elem, size, align);
+		elem = malloc_elem_alloc(elem, size, align, bound);
 		/* increase heap's count of allocated elements */
 		heap->alloc_count++;
 	}
 	rte_spinlock_unlock(&heap->lock);
-	return elem == NULL ? NULL : (void *)(&elem[1]);
 
+	return elem == NULL ? NULL : (void *)(&elem[1]);
 }
 
 /*
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index a47136d..4ba3353 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -53,8 +53,8 @@ malloc_get_numa_socket(void)
 }
 
 void *
-malloc_heap_alloc(struct malloc_heap *heap, const char *type,
-		size_t size, unsigned align);
+malloc_heap_alloc(struct malloc_heap *heap,	const char *type, size_t size,
+		unsigned flags, size_t align, size_t bound);
 
 int
 malloc_heap_get_stats(const struct malloc_heap *heap,
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index c313a57..54c2bd8 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -39,7 +39,6 @@
 
 #include <rte_memcpy.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_eal_memconfig.h>
 #include <rte_branch_prediction.h>
@@ -87,7 +86,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg)
 		return NULL;
 
 	ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type,
-				size, align == 0 ? 1 : align);
+				size, 0, align == 0 ? 1 : align, 0);
 	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
 		return ret;
 
@@ -98,7 +97,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg)
 			continue;
 
 		ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type,
-					size, align == 0 ? 1 : align);
+					size, 0, align == 0 ? 1 : align, 0);
 		if (ret != NULL)
 			return ret;
 	}
@@ -256,5 +255,5 @@ rte_malloc_virt2phy(const void *addr)
 	const struct malloc_elem *elem = malloc_elem_from_data(addr);
 	if (elem == NULL)
 		return 0;
-	return elem->mz->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->mz->addr);
+	return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr);
 }
-- 
1.9.3

^ permalink raw reply	[relevance 1%]

* [dpdk-dev] [PATCH v7 00/10] Interrupt mode PMD
  2015-02-27  4:56  3% ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Cunming Liang
                     ` (2 preceding siblings ...)
  2015-05-05  5:39  3%   ` [dpdk-dev] From: Cunming Liang <cunming.liang@intel.com> Cunming Liang
@ 2015-05-05  5:53  3%   ` Cunming Liang
  3 siblings, 0 replies; 200+ results
From: Cunming Liang @ 2015-05-05  5:53 UTC (permalink / raw)
  To: dev; +Cc: shemming

v7 changes
 - decouple epoll event and intr operation
 - add condition check in the case intr vector is disabled
 - renaming some APIs

v6 changes
 - split rte_intr_wait_rx_pkt into two APIs 'wait' and 'set'.
 - rewrite rte_intr_rx_wait/rte_intr_rx_set.
 - using vector number instead of queue_id as interrupt API params.
 - patch reorder and split.

v5 changes
 - Rebase the patchset onto the HEAD
 - Isolate ethdev from EAL for new-added wait-for-rx interrupt function
 - Export wait-for-rx interrupt function for shared libraries
 - Split-off a new patch file for changed struct rte_intr_handle that
   other patches depend on, to avoid breaking git bisect
 - Change sample applicaiton to accomodate EAL function spec change
   accordingly

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Adjust position of new-added structure fields and functions to
   avoid breaking ABI
 
v3 changes
 - Add return value for interrupt enable/disable functions
 - Move spinlok from PMD to L3fwd-power
 - Remove unnecessary variables in e1000_mac_info
 - Fix miscelleous review comments
 
v2 changes
 - Fix compilation issue in Makefile for missed header file.
 - Consolidate internal and community review comments of v1 patch set.
 
The patch series introduce low-latency one-shot rx interrupt into DPDK with
polling and interrupt mode switch control example.
 
DPDK userspace interrupt notification and handling mechanism is based on UIO
with below limitation:
1) It is designed to handle LSC interrupt only with inefficient suspended
   pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
   which then wakes up DPDK polling thread). In this way, it introduces
   non-deterministic wakeup latency for DPDK polling thread as well as packet
   latency if it is used to handle Rx interrupt.
2) UIO only supports a single interrupt vector which has to been shared by
   LSC interrupt and interrupts assigned to dedicated rx queues.
 
This patchset includes below features:
1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF only).
2) Build on top of the VFIO mechanism instead of UIO, so it could support
   up to 64 interrupt vectors for rx queue interrupts.
3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
   VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
   user space.
4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
   switch algorithms in L3fwd-power example.

Known limitations:
1) It does not work for UIO due to a single interrupt eventfd shared by LSC
   and rx queue interrupt handlers causes a mess.
2) LSC interrupt is not supported by VF driver, so it is by default disabled
   in L3fwd-power now. Feel free to turn in on if you want to support both LSC
   and rx queue interrupts on a PF.

Cunming Liang (10):
  eal/linux: add interrupt vectors support in intr_handle
  eal/linux: add rte_epoll_wait/ctl support
  eal/linux: add API to set rx interrupt event monitor
  eal/bsd: dummy for new intr definition
  eal/linux: fix comments typo on vfio msi
  eal/linux: add interrupt vectors handling on VFIO
  ethdev: add rx intr enable, disable and ctl functions
  ixgbe: enable rx queue interrupts for both PF and VF
  igb: enable rx queue interrupts for PF
  l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
    switch

 examples/l3fwd-power/main.c                        | 206 ++++++++--
 .../bsdapp/eal/include/exec-env/rte_interrupts.h   |   6 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 232 +++++++++--
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  12 +
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |  97 +++++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map    |   4 +
 lib/librte_ether/rte_ethdev.c                      | 132 +++++++
 lib/librte_ether/rte_ethdev.h                      | 104 +++++
 lib/librte_ether/rte_ether_version.map             |   4 +
 lib/librte_pmd_e1000/e1000_ethdev.h                |   3 +
 lib/librte_pmd_e1000/igb_ethdev.c                  | 256 +++++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c                | 425 ++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h                |   7 +
 13 files changed, 1394 insertions(+), 94 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v7 07/10] ethdev: add rx intr enable, disable and ctl functions
  2015-05-05  5:39  3%   ` [dpdk-dev] From: Cunming Liang <cunming.liang@intel.com> Cunming Liang
@ 2015-05-05  5:39  2%     ` Cunming Liang
  2015-05-21  8:55  2%     ` [dpdk-dev] [PATCH v8 00/11] Interrupt mode PMD Cunming Liang
  1 sibling, 0 replies; 200+ results
From: Cunming Liang @ 2015-05-05  5:39 UTC (permalink / raw)
  To: dev; +Cc: shemming

The patch adds two dev_ops functions to enable and disable rx queue interrupts.
In addtion, it adds rte_eth_dev_rx_intr_ctl/rx_intr_q to support per port or per queue rx intr event set.

Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
v7 changes
 - remove rx_intr_vec_get
 - add rx_intr_ctl and rx_intr_ctl_q

v6 changes
 - add rx_intr_vec_get to retrieve the vector num of the queue.

v5 changes
 - Rebase the patchset onto the HEAD

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Put new functions at the end of eth_dev_ops to avoid breaking ABI

v3 changes
 - Add return value for interrupt enable/disable functions

 lib/librte_ether/rte_ethdev.c          | 132 +++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.h          | 104 ++++++++++++++++++++++++++
 lib/librte_ether/rte_ether_version.map |   4 +
 3 files changed, 240 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 024fe8b..cdde14c 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -3281,6 +3281,138 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	}
 	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
+
+int
+rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
+{
+	uint32_t vec;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+	uint16_t qid;
+	int rc;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	intr_handle = &dev->pci_dev->intr_handle;
+	if (!intr_handle->intr_vec) {
+		PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		return -EPERM;
+	}
+
+	for (qid = 0; qid < dev->data->nb_rx_queues; qid++) {
+		if (intr_handle->intr_vec[qid] < 0) {
+			PMD_DEBUG_TRACE("RX Intr vector invalid on %d\n", qid);
+			continue;
+		}
+
+		vec = intr_handle->intr_vec[qid];
+		rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec,
+				     data, rte_eth_dev_socket_id(port_id));
+		if (rc) {
+			PMD_DEBUG_TRACE("p %d q %d rx ctl error"
+					" op %d epfd %d vec %u\n",
+					port_id, qid, op, epfd, vec);
+		}
+	}
+
+	return 0;
+}
+
+int
+rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+			  int epfd, int op, void *data)
+{
+	uint32_t vec;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+	int rc;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	if (queue_id >= dev->data->nb_rx_queues) {
+		PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		return -EINVAL;
+	}
+
+	intr_handle = &dev->pci_dev->intr_handle;
+	if (!intr_handle->intr_vec || intr_handle->intr_vec[queue_id] < 0) {
+		PMD_DEBUG_TRACE("RX Intr vector unset on %d\n", rx_queue_id);
+		return -EPERM;
+	}
+
+	vec = intr_handle->intr_vec[queue_id];
+	rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec,
+			     data, rte_eth_dev_socket_id(port_id));
+	if (rc) {
+		PMD_DEBUG_TRACE("p %d q %d rx ctl error"
+				" op %d epfd %d vec %u\n",
+				port_id, queue_id, op, epfd, vec);
+		return rc;
+	}
+
+	return 0;
+}
+
+int
+rte_eth_dev_rx_intr_enable(uint8_t port_id,
+			   uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+}
+
+int
+rte_eth_dev_rx_intr_disable(uint8_t port_id,
+			    uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
+}
+
 #ifdef RTE_NIC_BYPASS
 int rte_eth_dev_bypass_init(uint8_t port_id)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 4648290..e5efec0 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -829,6 +829,8 @@ struct rte_eth_fdir {
 struct rte_intr_conf {
 	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
 	uint16_t lsc;
+	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
+	uint16_t rxq;
 };
 
 /**
@@ -1034,6 +1036,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    const struct rte_eth_txconf *tx_conf);
 /**< @internal Setup a transmit queue of an Ethernet device. */
 
+typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
+
 typedef void (*eth_queue_release_t)(void *queue);
 /**< @internal Release memory resources allocated by given RX/TX queue. */
 
@@ -1385,6 +1395,10 @@ struct eth_dev_ops {
 	/** Get current RSS hash configuration. */
 	rss_hash_conf_get_t rss_hash_conf_get;
 	eth_filter_ctrl_t              filter_ctrl;          /**< common filter control*/
+
+	/** Enable/disable Rx queue interrupt. */
+	eth_rx_enable_intr_t       rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
+	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
 };
 
 /**
@@ -2867,6 +2881,96 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 				enum rte_eth_event_type event);
 
 /**
+ * When there is no rx packet coming in Rx Queue for a long time, we can
+ * sleep lcore related to RX Queue for power saving, and enable rx interrupt
+ * to be triggered when rx packect arrives.
+ *
+ * The rte_eth_dev_rx_intr_enable() function enables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_enable(uint8_t port_id,
+			       uint16_t queue_id);
+
+/**
+ * When lcore wakes up from rx interrupt indicating packet coming, disable rx
+ * interrupt and returns to polling mode.
+ *
+ * The rte_eth_dev_rx_intr_disable() function disables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_disable(uint8_t port_id,
+				uint16_t queue_id);
+
+/**
+ * RX Interrupt control per port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ *   Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data);
+
+/**
+ * RX Interrupt control per queue.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ *   Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+			  int epfd, int op, void *data);
+
+/**
  * Turn on the LED on the Ethernet device.
  * This function turns on the LED on the Ethernet device.
  *
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index a2d25a6..2799b99 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -48,6 +48,10 @@ DPDK_2.0 {
 	rte_eth_dev_rss_hash_update;
 	rte_eth_dev_rss_reta_query;
 	rte_eth_dev_rss_reta_update;
+	rte_eth_dev_rx_intr_ctl;
+	rte_eth_dev_rx_intr_ctl_q;
+	rte_eth_dev_rx_intr_disable;
+	rte_eth_dev_rx_intr_enable;
 	rte_eth_dev_rx_queue_start;
 	rte_eth_dev_rx_queue_stop;
 	rte_eth_dev_set_link_down;
-- 
1.8.1.4

^ permalink raw reply	[relevance 2%]

* [dpdk-dev] From: Cunming Liang <cunming.liang@intel.com>
  2015-02-27  4:56  3% ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Cunming Liang
  2015-02-27  4:56  2%   ` [dpdk-dev] [PATCH v6 5/8] ethdev: add rx interrupt enable/disable functions Cunming Liang
  2015-02-27  8:00  0%   ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Liu, Yong
@ 2015-05-05  5:39  3%   ` Cunming Liang
  2015-05-05  5:39  2%     ` [dpdk-dev] [PATCH v7 07/10] ethdev: add rx intr enable, disable and ctl functions Cunming Liang
  2015-05-21  8:55  2%     ` [dpdk-dev] [PATCH v8 00/11] Interrupt mode PMD Cunming Liang
  2015-05-05  5:53  3%   ` [dpdk-dev] [PATCH v7 00/10] Interrupt mode PMD Cunming Liang
  3 siblings, 2 replies; 200+ results
From: Cunming Liang @ 2015-05-05  5:39 UTC (permalink / raw)
  To: dev; +Cc: shemming

v7 changes
 - decouple epoll event and intr operation
 - add condition check in the case intr vector is disabled
 - renaming some APIs

v6 changes
 - split rte_intr_wait_rx_pkt into two APIs 'wait' and 'set'.
 - rewrite rte_intr_rx_wait/rte_intr_rx_set.
 - using vector number instead of queue_id as interrupt API params.
 - patch reorder and split.

v5 changes
 - Rebase the patchset onto the HEAD
 - Isolate ethdev from EAL for new-added wait-for-rx interrupt function
 - Export wait-for-rx interrupt function for shared libraries
 - Split-off a new patch file for changed struct rte_intr_handle that
   other patches depend on, to avoid breaking git bisect
 - Change sample applicaiton to accomodate EAL function spec change
   accordingly

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Adjust position of new-added structure fields and functions to
   avoid breaking ABI
 
v3 changes
 - Add return value for interrupt enable/disable functions
 - Move spinlok from PMD to L3fwd-power
 - Remove unnecessary variables in e1000_mac_info
 - Fix miscelleous review comments
 
v2 changes
 - Fix compilation issue in Makefile for missed header file.
 - Consolidate internal and community review comments of v1 patch set.
 
The patch series introduce low-latency one-shot rx interrupt into DPDK with
polling and interrupt mode switch control example.
 
DPDK userspace interrupt notification and handling mechanism is based on UIO
with below limitation:
1) It is designed to handle LSC interrupt only with inefficient suspended
   pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
   which then wakes up DPDK polling thread). In this way, it introduces
   non-deterministic wakeup latency for DPDK polling thread as well as packet
   latency if it is used to handle Rx interrupt.
2) UIO only supports a single interrupt vector which has to been shared by
   LSC interrupt and interrupts assigned to dedicated rx queues.
 
This patchset includes below features:
1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF only).
2) Build on top of the VFIO mechanism instead of UIO, so it could support
   up to 64 interrupt vectors for rx queue interrupts.
3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
   VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
   user space.
4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
   switch algorithms in L3fwd-power example.

Known limitations:
1) It does not work for UIO due to a single interrupt eventfd shared by LSC
   and rx queue interrupt handlers causes a mess.
2) LSC interrupt is not supported by VF driver, so it is by default disabled
   in L3fwd-power now. Feel free to turn in on if you want to support both LSC
   and rx queue interrupts on a PF.

Cunming Liang (10):
  eal/linux: add interrupt vectors support in intr_handle
  eal/linux: add rte_epoll_wait/ctl support
  eal/linux: add API to set rx interrupt event monitor
  eal/bsd: dummy for new intr definition
  eal/linux: fix comments typo on vfio msi
  eal/linux: add interrupt vectors handling on VFIO
  ethdev: add rx intr enable, disable and ctl functions
  ixgbe: enable rx queue interrupts for both PF and VF
  igb: enable rx queue interrupts for PF
  l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
    switch

 examples/l3fwd-power/main.c                        | 206 ++++++++--
 .../bsdapp/eal/include/exec-env/rte_interrupts.h   |   6 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 232 +++++++++--
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  12 +
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |  97 +++++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map    |   4 +
 lib/librte_ether/rte_ethdev.c                      | 132 +++++++
 lib/librte_ether/rte_ethdev.h                      | 104 +++++
 lib/librte_ether/rte_ether_version.map             |   4 +
 lib/librte_pmd_e1000/e1000_ethdev.h                |   3 +
 lib/librte_pmd_e1000/igb_ethdev.c                  | 256 +++++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c                | 425 ++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h                |   7 +
 13 files changed, 1394 insertions(+), 94 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-30 16:00  3%                                       ` Neil Horman
@ 2015-05-01  0:15  4%                                         ` Ravi Kerur
  0 siblings, 0 replies; 200+ results
From: Ravi Kerur @ 2015-05-01  0:15 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Thu, Apr 30, 2015 at 9:00 AM, Neil Horman <nhorman@tuxdriver.com> wrote:

> On Wed, Apr 29, 2015 at 10:47:04AM -0700, Ravi Kerur wrote:
> > > > I tried to run validate-abi.sh on BSD but ran into errors. If there
> is a
> > > > way to check against BSD please let me know.
> > > >
> > > The ABI checker should work on BSD as far as I know, since it only
> relies
> > > on
> > > dwarf information in the output binary.  What errors are you seeing?
> > >
> >
> > dpdk-bsd:/home/rkerur/dpdk-validate-abi-1/dpdk # sh
> > ./scripts/validate-abi.sh v2.0.0-rc3 v2.0.0-abi
> x86_64-native-bsdapp-clang
> > mktemp: illegal option -- p
> Ah, bsd mktemp doesn't support the -p option.  I'll see if I can fix that.
>

I think there are couple of other issues I found

freeBSD sed is different from Linux (GNU sed) and I get following errors
with the script

"sed 1 command c expects \ followed by text".

I have to use gsed (GNU sed) in freeBSD to get rid of that error and
similarly freeBSD uses gmake instead of make.  I have made those minor
changes and sending them with this email as an attachment.


> > usage: mktemp [-d] [-q] [-t prefix] [-u] template ...
> >        mktemp [-d] [-q] [-u] -t prefix
> > Cant find abi-compliance-checker utility
> >
> > abi-compliance-checker is installed as shown below.
> >
> > dpdk-bsd:/home/rkerur/dpdk-validate-abi-1/dpdk # pkg install
> > devel/abi-compliance-checker
> > Updating FreeBSD repository catalogue...
> > FreeBSD repository is up-to-date.
> > All repositories are up-to-date.
> > Checking integrity... done (0 conflicting)
> > The most recent version of packages are already installed
> >
>
> Whats the path for abi-compliance checker there?  It would seem that the
> binary
> isn't in your path, as which isn't locating it.
>

I am using regular freeBSD port install which doesn't install in any
/usr/bin or /usr/local/bin. I finally decided to install both abi-dumper
and abi-compliance-checker from source, compile and install it in correct
directory. Above error is fixed after that, however, abi utilities use
"eu-readelf" and I can't find that utility to install in freeBSD. I get
following errors

ERROR: can't find "eu-readelf" command

freeBSD has only readelf. Please let me know if there is a way to get rid
of this error.

Thanks,
Ravi

>
> > >
> > > Neil
> > >
> > >
>

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-29 17:47  5%                                     ` Ravi Kerur
@ 2015-04-30 16:00  3%                                       ` Neil Horman
  2015-05-01  0:15  4%                                         ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-30 16:00 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Wed, Apr 29, 2015 at 10:47:04AM -0700, Ravi Kerur wrote:
> > > I tried to run validate-abi.sh on BSD but ran into errors. If there is a
> > > way to check against BSD please let me know.
> > >
> > The ABI checker should work on BSD as far as I know, since it only relies
> > on
> > dwarf information in the output binary.  What errors are you seeing?
> >
> 
> dpdk-bsd:/home/rkerur/dpdk-validate-abi-1/dpdk # sh
> ./scripts/validate-abi.sh v2.0.0-rc3 v2.0.0-abi x86_64-native-bsdapp-clang
> mktemp: illegal option -- p
Ah, bsd mktemp doesn't support the -p option.  I'll see if I can fix that.

> usage: mktemp [-d] [-q] [-t prefix] [-u] template ...
>        mktemp [-d] [-q] [-u] -t prefix
> Cant find abi-compliance-checker utility
> 
> abi-compliance-checker is installed as shown below.
> 
> dpdk-bsd:/home/rkerur/dpdk-validate-abi-1/dpdk # pkg install
> devel/abi-compliance-checker
> Updating FreeBSD repository catalogue...
> FreeBSD repository is up-to-date.
> All repositories are up-to-date.
> Checking integrity... done (0 conflicting)
> The most recent version of packages are already installed
> 

Whats the path for abi-compliance checker there?  It would seem that the binary
isn't in your path, as which isn't locating it.
Neil

> 
> >
> > Neil
> >
> >

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] gmake test on freeBSD
  2015-04-29  8:29  0% ` [dpdk-dev] gmake test on freeBSD Bruce Richardson
@ 2015-04-29 17:58  0%   ` Ravi Kerur
  0 siblings, 0 replies; 200+ results
From: Ravi Kerur @ 2015-04-29 17:58 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev

On Wed, Apr 29, 2015 at 1:29 AM, Bruce Richardson <
bruce.richardson@intel.com> wrote:

> On Tue, Apr 28, 2015 at 06:15:53PM -0700, Ravi Kerur wrote:
> > DPDK team,
> >
> > Is there a automated tests to run on freeBSD similar to Linux (make
> test).
> >
> > I ran "gmake test T=x86_64-native-bsdapp-clang CC=clang" I get following
> > output
> >
> > /usr/home/rkerur/dpdk-validate-abi-1/dpdk/build/app/test -c f -n 4
> >
> > Test name                      Test result                      Test
> > Total
> >
> ================================================================================
> > Start group_1:                 Fail [Can't run]              [00m 00s]
> > Timer autotest:                Fail [Can't run]              [00m 00s]
> > Debug autotest:                Fail [Can't run]              [00m 00s]
> > Errno autotest:                Fail [Can't run]              [00m 00s]
> > Meter autotest:                Fail [Can't run]              [00m 00s]
> > Common autotest:               Fail [Can't run]              [00m 00s]
> > Dump log history:              Fail [Can't run]              [00m 00s]
> > ...
> > Start memcpy_perf:             Fail [No prompt]              [00m 00s]
> > Memcpy performance autotest:   Fail [No prompt]              [00m 00s]
> [00m
> > 01s]
> > Start hash_perf:               Fail [No prompt]              [00m 00s]
> > Hash performance autotest:     Fail [No prompt]              [00m 00s]
> [00m
> > 01s]
> > Start power:                   Fail [No prompt]              [00m 00s]
> > Power autotest:                Fail [No prompt]              [00m 00s]
> [00m
> > 01s]
> > ...
> >
> > I have contigmem and nic_uio installed. I know some applications are
> > linuxapp specific but wanted to know if there is a similar automated test
> > tool like Linux?
> >
> > Thanks,
> > Ravi
>
> There is no separate test tool for FreeBSD. Unfortunately there are a
> number of little
> things that don't really work on FreeBSD - and this looks to be one of
> them. We
> probably need to look to fix this.
>
> Thanks Bruce. Is it due to missing infra for BSD or some minor fixes in
app/test?

> /Bruce
>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-29 10:04  3%                                   ` Neil Horman
@ 2015-04-29 17:47  5%                                     ` Ravi Kerur
  2015-04-30 16:00  3%                                       ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Ravi Kerur @ 2015-04-29 17:47 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

> > I tried to run validate-abi.sh on BSD but ran into errors. If there is a
> > way to check against BSD please let me know.
> >
> The ABI checker should work on BSD as far as I know, since it only relies
> on
> dwarf information in the output binary.  What errors are you seeing?
>

dpdk-bsd:/home/rkerur/dpdk-validate-abi-1/dpdk # sh
./scripts/validate-abi.sh v2.0.0-rc3 v2.0.0-abi x86_64-native-bsdapp-clang
mktemp: illegal option -- p
usage: mktemp [-d] [-q] [-t prefix] [-u] template ...
       mktemp [-d] [-q] [-u] -t prefix
Cant find abi-compliance-checker utility

abi-compliance-checker is installed as shown below.

dpdk-bsd:/home/rkerur/dpdk-validate-abi-1/dpdk # pkg install
devel/abi-compliance-checker
Updating FreeBSD repository catalogue...
FreeBSD repository is up-to-date.
All repositories are up-to-date.
Checking integrity... done (0 conflicting)
The most recent version of packages are already installed


>
> Neil
>
>

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v8 0/6] Move common functions in EAL
  2015-04-28 23:46  4% [dpdk-dev] [PATCH v8 0/6] Move common functions in EAL Ravi Kerur
  2015-04-28 23:46  2% ` [dpdk-dev] [PATCH v8 1/6] Move common functions in eal_thread.c Ravi Kerur
@ 2015-04-29 10:14  0% ` Neil Horman
  1 sibling, 0 replies; 200+ results
From: Neil Horman @ 2015-04-29 10:14 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Tue, Apr 28, 2015 at 04:46:21PM -0700, Ravi Kerur wrote:
> Changes in v8 includes
> Re-ordering source file compilation to fix ABI warning.
> Ran validate-abi against x86_64-native-linuxapp-gcc,
> x86_64-native-linuxapp-clang and x86_64-ivshmem-linuxapp-gcc
> environments.
> 
> Testing:
> Linux - Ubuntu x86_64 14.04
> Compilation successful (x86_64-native-linuxapp-gcc and
> x86_64-native-linuxapp-clang).
> "make test" results match baseline code.
> testpmd utility on I217/I218 Intel chipset.
> 
> FreeBSD 10.0 x86_64
> Compilation successful (x86_64-native-bsdapp-gcc and
> x86_64-native-bsdapp-clang).
> Tested with helloworld, timer and cmdline examples.
> 
> Ravi Kerur (6):
>   Move common functions in eal_thread.c
>   Move common functions in eal.c
>   Move common functions in eal_lcore.c
>   Move common functions in eal_timer.c
>   Move common functions in eal_memory.c
>   Move common functions in eal_pci.c
> 
>  lib/librte_eal/bsdapp/eal/Makefile           |   9 +-
>  lib/librte_eal/bsdapp/eal/eal.c              | 271 +++---------------------
>  lib/librte_eal/bsdapp/eal/eal_lcore.c        |  72 ++-----
>  lib/librte_eal/bsdapp/eal/eal_memory.c       |  47 ++---
>  lib/librte_eal/bsdapp/eal/eal_pci.c          |  72 +------
>  lib/librte_eal/bsdapp/eal/eal_thread.c       | 152 --------------
>  lib/librte_eal/bsdapp/eal/eal_timer.c        |  52 +----
>  lib/librte_eal/common/eal_common_app_usage.c |  63 ++++++
>  lib/librte_eal/common/eal_common_lcore.c     | 107 ++++++++++
>  lib/librte_eal/common/eal_common_mem_cfg.c   | 224 ++++++++++++++++++++
>  lib/librte_eal/common/eal_common_memory.c    |  38 +++-
>  lib/librte_eal/common/eal_common_pci.c       |  72 +++++++
>  lib/librte_eal/common/eal_common_proc_type.c |  58 ++++++
>  lib/librte_eal/common/eal_common_sysfs.c     | 148 ++++++++++++++
>  lib/librte_eal/common/eal_common_thread.c    | 147 ++++++++++++-
>  lib/librte_eal/common/eal_common_timer.c     | 102 +++++++++
>  lib/librte_eal/common/eal_hugepages.h        |   1 +
>  lib/librte_eal/common/eal_private.h          | 171 +++++++++++++++-
>  lib/librte_eal/common/include/rte_eal.h      |   4 +
>  lib/librte_eal/linuxapp/eal/Makefile         |  10 +-
>  lib/librte_eal/linuxapp/eal/eal.c            | 296 ++++-----------------------
>  lib/librte_eal/linuxapp/eal/eal_lcore.c      |  66 +-----
>  lib/librte_eal/linuxapp/eal/eal_memory.c     |  36 +---
>  lib/librte_eal/linuxapp/eal/eal_pci.c        |  75 +------
>  lib/librte_eal/linuxapp/eal/eal_thread.c     | 152 +-------------
>  lib/librte_eal/linuxapp/eal/eal_timer.c      |  55 +----
>  26 files changed, 1277 insertions(+), 1223 deletions(-)
>  create mode 100644 lib/librte_eal/common/eal_common_app_usage.c
>  create mode 100644 lib/librte_eal/common/eal_common_lcore.c
>  create mode 100644 lib/librte_eal/common/eal_common_mem_cfg.c
>  create mode 100644 lib/librte_eal/common/eal_common_proc_type.c
>  create mode 100644 lib/librte_eal/common/eal_common_sysfs.c
>  create mode 100644 lib/librte_eal/common/eal_common_timer.c
> 
> -- 
> 1.9.1
> 
> 

Series
Acked-by: Neil Horman <nhorman@tuxdriver.com>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-28 23:52  4%                                 ` Ravi Kerur
@ 2015-04-29 10:04  3%                                   ` Neil Horman
  2015-04-29 17:47  5%                                     ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-29 10:04 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Tue, Apr 28, 2015 at 04:52:37PM -0700, Ravi Kerur wrote:
> On Tue, Apr 28, 2015 at 12:35 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Mon, Apr 27, 2015 at 03:39:41PM -0700, Ravi Kerur wrote:
> > > On Mon, Apr 27, 2015 at 6:44 AM, Neil Horman <nhorman@tuxdriver.com>
> > wrote:
> > >
> > > > On Sat, Apr 25, 2015 at 05:09:01PM -0700, Ravi Kerur wrote:
> > > > > On Sat, Apr 25, 2015 at 6:02 AM, Neil Horman <nhorman@tuxdriver.com>
> > > > wrote:
> > > > >
> > > > > > On Sat, Apr 25, 2015 at 08:32:42AM -0400, Neil Horman wrote:
> > > > > > > On Fri, Apr 24, 2015 at 06:45:06PM -0700, Ravi Kerur wrote:
> > > > > > > > On Fri, Apr 24, 2015 at 2:24 PM, Ravi Kerur <rkerur@gmail.com>
> > > > wrote:
> > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <
> > > > nhorman@tuxdriver.com
> > > > > > >
> > > > > > > > > wrote:
> > > > > > > > >
> > > > > > > > >> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> > > > > > > > >> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <
> > > > > > nhorman@tuxdriver.com>
> > > > > > > > >> wrote:
> > > > > > > > >> >
> > > > > > > > >> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur
> > wrote:
> > > > > > > > >> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <
> > > > > > nhorman@tuxdriver.com
> > > > > > > > >> >
> > > > > > > > >> > > wrote:
> > > > > > > > >> > > >
> > > > > > > > >> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur
> > > > wrote:
> > > > > > > > >> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
> > > > > > > > >> nhorman@tuxdriver.com>
> > > > > > > > >> > > > > wrote:
> > > > > > > > >> > > > > >
> > > > > > > > >> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi
> > Kerur
> > > > > > wrote:
> > > > > > > > >> > > > > > > > Changes in v7
> > > > > > > > >> > > > > > > > Remove _setname_ pthread calls.
> > > > > > > > >> > > > > > > > Use rte_gettid() API in RTE_LOG to print
> > > > thread_id.
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > Changes in v6
> > > > > > > > >> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from
> > > > eal_common_thread.c
> > > > > > file.
> > > > > > > > >> > > > > > > > Add pthread_setname_np/pthread_set_name_np for
> > > > > > Linux/FreeBSD
> > > > > > > > >> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG
> > > > when
> > > > > > > > >> available.
> > > > > > > > >> > > > > > > > Use existing rte_get_systid() in RTE_LOG to
> > print
> > > > > > thread_id.
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > Changes in v5
> > > > > > > > >> > > > > > > > Rebase to latest code.
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > Changes in v4
> > > > > > > > >> > > > > > > > None
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > Changes in v3
> > > > > > > > >> > > > > > > > Changed subject to be more explicit on file
> > name
> > > > > > inclusion.
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > Changes in v2
> > > > > > > > >> > > > > > > > None
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > Changes in v1
> > > > > > > > >> > > > > > > > eal_thread.c has minor differences between
> > Linux
> > > > and
> > > > > > BSD,
> > > > > > > > >> move
> > > > > > > > >> > > > > > > > entire file into common directory.
> > > > > > > > >> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on
> > minor
> > > > > > > > >> differences.
> > > > > > > > >> > > > > > > > Rename eal_thread.c to eal_common_thread.c
> > > > > > > > >> > > > > > > > Makefile changes to reflect file move and name
> > > > change.
> > > > > > > > >> > > > > > > > Fix checkpatch warnings.
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > > > > >> > > > > > > > ---
> > > > > > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |
> >  2
> > > > +-
> > > > > > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    |
> > 152
> > > > > > > > >> > > > > > > ------------------------------
> > > > > > > > >> > > > > > > >  lib/librte_eal/common/eal_common_thread.c |
> > 147
> > > > > > > > >> > > > > > > ++++++++++++++++++++++++++++-
> > > > > > > > >> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  |
> > 152
> > > > > > > > >> > > > > > > +-----------------------------
> > > > > > > > >> > > > > > > >  4 files changed, 148 insertions(+), 305
> > > > deletions(-)
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > diff --git
> > a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > > >> > > > > > > > index 2357cfa..55971b9 100644
> > > > > > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > > >> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o :=
> > > > > > -D_GNU_SOURCE
> > > > > > > > >> > > > > > > >  # workaround for a gcc bug with noreturn
> > > > attribute
> > > > > > > > >> > > > > > > >  #
> > > > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > > > > > > >> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > > > > > >> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > > > > > >> > > > > > > > +CFLAGS_eal_common_thread.o +=
> > -Wno-return-type
> > > > > > > > >> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > > > > > > >> > > > > > > >  endif
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > diff --git
> > > > a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > > >> > > > > > > > index 9a03437..5714b8f 100644
> > > > > > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > > >> > > > > > > > @@ -35,163 +35,11 @@
> > > > > > > > >> > > > > > > >  #include <stdio.h>
> > > > > > > > >> > > > > > > >  #include <stdlib.h>
> > > > > > > > >> > > > > > > >  #include <stdint.h>
> > > > > > > > >> > > > > > > > -#include <unistd.h>
> > > > > > > > >> > > > > > > > -#include <sched.h>
> > > > > > > > >> > > > > > > > -#include <pthread_np.h>
> > > > > > > > >> > > > > > > > -#include <sys/queue.h>
> > > > > > > > >> > > > > > > >  #include <sys/thr.h>
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > -#include <rte_debug.h>
> > > > > > > > >> > > > > > > > -#include <rte_atomic.h>
> > > > > > > > >> > > > > > > > -#include <rte_launch.h>
> > > > > > > > >> > > > > > > > -#include <rte_log.h>
> > > > > > > > >> > > > > > > > -#include <rte_memory.h>
> > > > > > > > >> > > > > > > > -#include <rte_memzone.h>
> > > > > > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > > > > > >> > > > > > > > -#include <rte_eal.h>
> > > > > > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > > > > > >> > > > > > > > -#include <rte_lcore.h>
> > > > > > > > >> > > > > > > > -
> > > > > > > > >> > > > > > > >  #include "eal_private.h"
> > > > > > > > >> > > > > > > >  #include "eal_thread.h"
> > > > > > > > >> > > > > > > >
> > > > > > > > >> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) =
> > > > > > LCORE_ID_ANY;
> > > > > > > > >> > > > > > > NAK, these are exported symbols, you can't
> > remove
> > > > them
> > > > > > without
> > > > > > > > >> > > going
> > > > > > > > >> > > > > > > through the
> > > > > > > > >> > > > > > > deprecation process.
> > > > > > > > >> > > > > > >
> > > > > > > > >> > > > > > >
> > > > > > > > >> > > > > > They are not removed/deleted, they are moved from
> > > > > > eal_thread.c
> > > > > > > > >> to
> > > > > > > > >> > > > > > eal_common_thread.c file since it is common to
> > both
> > > > Linux
> > > > > > and
> > > > > > > > >> BSD.
> > > > > > > > >> > > > > >
> > > > > > > > >> > > > > Then perhaps you forgot to export the symbol?  Its
> > > > showing
> > > > > > up as
> > > > > > > > >> > > removed
> > > > > > > > >> > > > > on the
> > > > > > > > >> > > > > ABI checker utility.
> > > > > > > > >> > > > >
> > > > > > > > >> > > > > Neil
> > > > > > > > >> > > > >
> > > > > > > > >> > > >
> > > > > > > > >> > > > Can you please show me in the current code where it is
> > > > being
> > > > > > > > >> exported? I
> > > > > > > > >> > > > have only moved definitions to _common_ files, not
> > sure
> > > > why it
> > > > > > > > >> should be
> > > > > > > > >> > > > exported now.  I searched in the current code for
> > > > > > > > >> RTE_DEFINE_PER_LCORE
> > > > > > > > >> > > >
> > > > > > > > >> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir
> > RTE_DEFINE_PER_LCORE
> > > > *
> > > > > > > > >> > > > app/test/test_per_lcore.c:static
> > > > > > RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > > >> test) =
> > > > > > > > >> > > > 0x12345678;
> > > > > > > > >> > > >
> > > > > > > > >>
> > > > > >
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > > > > > >> > > >
> > > > > > > > >>
> > > > > >
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > > > > > >> > > >
> > > > > > > > >> > >
> > > > > > > > >>
> > > > > >
> > > >
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > > > > > >> > > > _cpuset);
> > > > > > > > >> > > >
> > > > > > > > >>
> > > > > >
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > > > > > >> > > >
> > > > > > > > >>
> > > > > >
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > > > > > >> > > >
> > > > > > > > >>
> > > > > >
> > > >
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > > > > > >> > > > _cpuset);
> > > > > > > > >> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > > > > > > > >> > > > RTE_DEFINE_PER_LCORE(type, name)            \
> > > > > > > > >> > > > lib/librte_eal/common/include/rte_eal.h:    static
> > > > > > > > >> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > > > > > > > >> > > >
> > > > > > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > > > > > > > >> > > > _rte_errno);
> > > > > > > > >> > > > lib/librte_eal/common/eal_common_errno.c:    static
> > > > > > > > >> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> > > > > > > > >> > > >
> > > > > > > > >> > > >
> > > > > > > > >> > > > > > Thanks
> > > > > > > > >> > > > > > Ravi
> > > > > > > > >> > > > > >
> > > > > > > > >> > > > > > Regards
> > > > > > > > >> > > > > > > Neil
> > > > > > > > >> > > > > > >
> > > > > > > > >> > > > > > >
> > > > > > > > >> > > > >
> > > > > > > > >> > > Its exported in the version map file:
> > > > > > > > >> > >  per_lcore__lcore_id;
> > > > > > > > >> > >
> > > > > > > > >> > >
> > > > > > > > >> > Thanks Neil, I checked and both linux and bsd
> > > > rte_eal_version.map
> > > > > > have
> > > > > > > > >> it.
> > > > > > > > >> > I compared .map file between "changed code" and the
> > original,
> > > > > > they are
> > > > > > > > >> same
> > > > > > > > >> > for both linux and bsd. In fact you had ACK'd v4 version
> > of
> > > > this
> > > > > > patch
> > > > > > > > >> > series and no major changes after that. Please let me
> > know if
> > > > I
> > > > > > missed
> > > > > > > > >> > something.
> > > > > > > > >> >
> > > > > > > > >> I did, and I'm retracting that, because I didn't think to
> > check
> > > > the
> > > > > > ABI
> > > > > > > > >> compatibility on this.  But I ran it throught the ABI
> > checking
> > > > > > script
> > > > > > > > >> this and
> > > > > > > > >> this error popped out.  You should run it as well, its in
> > the
> > > > > > scripts
> > > > > > > > >> directory.
> > > > > > > > >>
> > > > > > > > >>
> > > > > > > > >> I see in your first patch you removed it and re-added it in
> > the
> > > > > > common
> > > > > > > > >> section.
> > > > > > > > >> But something about how its building is causing it to not
> > show
> > > > up
> > > > > > as an
> > > > > > > > >> exported
> > > > > > > > >> symbol, which is problematic, as other applications are
> > going to
> > > > > > want
> > > > > > > > >> access to
> > > > > > > > >> it.
> > > > > > > > >>
> > > > > > > > >> It also possible that the ABI checker is throwing a false
> > > > positive,
> > > > > > but
> > > > > > > > >> either
> > > > > > > > >> way, it needs to be looked into prior to moving forward with
> > > > this.
> > > > > > > > >>
> > > > > > > > >>
> > > > > > > > > I did following things.
> > > > > > > > >
> > > > > > > > > Put a tag (v2.0.0-before-common-eal)  before EAL common
> > functions
> > > > > > changes
> > > > > > > > > for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net:
> > remove
> > > > > > unneeded
> > > > > > > > > include)
> > > > > > > > >
> > > > > > > > > Put a tag (v2.0.0-common-eal) after EAL common functions
> > changes
> > > > for
> > > > > > > > > commit (25737e5a7212630a7b5d8ca756860a062f403789 Move common
> > > > > > functions in
> > > > > > > > > eal_pci.c)
> > > > > > > > >
> > > > > > > > > Ran validate-abi against x86_64-native-linuxapp-gcc and
> > > > > > > > >
> > > > > > > > > v2.0.0-rc3 and v2.0.0-before-common-eal, html report for
> > > > > > librte_eal.so
> > > > > > > > > shows removed symbols for "per_lcore__cpuset"
> > > > > > > > >
> > > > > > > > > v2.0.0-rc3 and v2.0.0-common-eal, html report for
> > librte_eal.so
> > > > shows
> > > > > > > > > removed symbols for "per_lcore__cpuset"
> > > > > > > > >
> > > > > > > > > Removed symbol is different from what you have reported and
> > in my
> > > > > > case I
> > > > > > > > > see it even before my commit. If you are interested I can
> > unicast
> > > > > > you html
> > > > > > > > > report file. Please let me know how to proceed.
> > > > > > > > >
> > > > > > > > >
> > > > > > > >
> > > > > > > > I did some experiment and found some interesting things.  I
> > will
> > > > take
> > > > > > eal.c
> > > > > > > > as an example
> > > > > > > >
> > > > > > > > eal.c is split into eal_common_sysfs.c eal_common_mem_cfg.c
> > > > > > > > eal_common_proc_type.c and eal_common_app_usage.c. In
> > > > > > linuxapp/eal/Makefile
> > > > > > > > if I compile new files right after eal.c as shown below
> > > > > > > >
> > > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
> > > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
> > > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
> > > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) +=
> > eal_common_proc_type.c
> > > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) +=
> > eal_common_app_usage.c
> > > > > > > > ...
> > > > > > > >
> > > > > > > > validate-abi results matches baseline. Instead if i place new
> > > > _common_
> > > > > > > > files in common area in linuxapp/eal/Makefile as shown below
> > > > > > > >
> > > > > > > > # from common dir
> > > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
> > > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
> > > > > > > > ...
> > > > > > > >
> > > > > > > > validate-abi reports problem in binary compatibility and source
> > > > > > > > compatiblity
> > > > > > > >
> > > > > > > > eal_filesystem.h, librte_eal.so.1
> > > > > > > >  [+] eal_parse_sysfs_value ( char const* filename, unsigned
> > long*
> > > > val )
> > > > > > > >  @@ DPDK_2.0 (2)
> > > > > > > >
> > > > > > > > I believe files in common and linuxapp directory are compiled
> > same
> > > > way
> > > > > > so
> > > > > > > > not sure why placement in makefile makes difference.
> > > > > > > >
> > > > > > > > Could this be false-positive from validate-abi script??
> > > > > > > >
> > > > > > > It could be, yes.  Though I'm more inclined to think that
> > perhaps in
> > > > the
> > > > > > new
> > > > > > > version of the code we're not generating ithe same dwarf
> > information
> > > > out
> > > > > > of it.
> > > > > > > In fact for some reason, I've checked both the build before and
> > after
> > > > > > your
> > > > > > > patch series, and the exported CFLAGS aren't getting passed to
> > the
> > > > build
> > > > > > > properly, implying that we're not building all the code in the
> > > > validator
> > > > > > with
> > > > > > > the -g flag, which the validator need to function properly.  I'm
> > > > looking
> > > > > > into
> > > > > > > that
> > > > > > > Neil
> > > > > > >
> > > > > > >
> > > > > > Found the problem, I was stupidly reading the report incorrectly.
> > The
> > > > > > problem
> > > > > > regarding _lcore_id is a source compatibilty issue (because the
> > symbol
> > > > > > moved to
> > > > > > a new location), which is irrelevant to us.  Its not in any way a
> > > > binary
> > > > > > compat
> > > > > > problem, which is what we care about.  Sorry for the noise.
> > > > > >
> > > > > > I do still have a few concerns about some changed calling
> > conventions
> > > > with
> > > > > > a few
> > > > > > other functions, which I'll look into on monday.
> > > > > >
> > > > > >
> > > > > Please let me know your inputs on changed calling conventions. Most
> > of
> > > > them
> > > > > can be fixed by re-arranging moved code in _common_ files and order
> > of
> > > > > compilation.
> > > > >
> > > > If moving the order of compliation around fixes the problem, then I am
> > > > reasonably convinced that it is, if not a false positive, a minor issue
> > > > with the
> > > > compilers dwarf information (The compiler just can't sanely change the
> > > > location
> > > > in which parameters are passed).  If you make those changes, I'll ACK
> > > > them, and
> > > > look into whats going on with the calling conventions
> > > >
> > >
> > > Issues like the one shown below are taken care by reordering the code
> > > compilation.
> > >
> > > eal_parse_sysfs_value ( char const* filename, unsigned long* val )
> > >
> > > Change
> > > The parameter filename became passed on stack instead of rdi register
> > >
> > > Effect
> > > Violation of the calling convention. This may result in crash or
> > incorrect
> > > behavior of applications.
> > >
> > > Last one that is left out is in
> > >
> > > rte_thread_set_affinity ( rte_cpuset_t* p1 )
> > >
> > > Change
> > > The parameter *p1* became passed in *rdi* register instead of stack.
> > >
> > > Effect
> > > Violation of the calling convention. This may result in crash or
> > incorrect
> > > behavior of applications.
> > >
> > > After checking abi-0.99.pdf (x86-64.org) looks like for
> > > "rte_thread_set_affinity" new code is doing the right thing by passing
> > the
> > > parameter in "rdi" register since pointer is classified as
> > "integer_class".
> > > Nothing needs to be fixed here. After you confirm that warning can be
> > > ignored I will work on sending new revision.
> > >
> > ACK then, send the new revision, this appears to be a false positive.
> >
> > Thanks for taking the time to confirm.
> >
> 
> Thanks Neil. I have sent v8 which fixes ABI warnings. I have tested it with
> x86_64-native-linuxapp-gcc, x86_64-native-linuxapp-clang and
> x86_64-ivshmem-gcc targets. ABI results look fine to me.
> 
> I tried to run validate-abi.sh on BSD but ran into errors. If there is a
> way to check against BSD please let me know.
> 
The ABI checker should work on BSD as far as I know, since it only relies on
dwarf information in the output binary.  What errors are you seeing?

Neil

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] gmake test on freeBSD
       [not found]     <CAFb4SLBGcR1EHL5FkJ7r6-7mqWR9UJ7GLD2cm18SJ8AuoWu_Og@mail.gmail.com>
@ 2015-04-29  8:29  0% ` Bruce Richardson
  2015-04-29 17:58  0%   ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Bruce Richardson @ 2015-04-29  8:29 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Tue, Apr 28, 2015 at 06:15:53PM -0700, Ravi Kerur wrote:
> DPDK team,
> 
> Is there a automated tests to run on freeBSD similar to Linux (make test).
> 
> I ran "gmake test T=x86_64-native-bsdapp-clang CC=clang" I get following
> output
> 
> /usr/home/rkerur/dpdk-validate-abi-1/dpdk/build/app/test -c f -n 4
> 
> Test name                      Test result                      Test
> Total
> ================================================================================
> Start group_1:                 Fail [Can't run]              [00m 00s]
> Timer autotest:                Fail [Can't run]              [00m 00s]
> Debug autotest:                Fail [Can't run]              [00m 00s]
> Errno autotest:                Fail [Can't run]              [00m 00s]
> Meter autotest:                Fail [Can't run]              [00m 00s]
> Common autotest:               Fail [Can't run]              [00m 00s]
> Dump log history:              Fail [Can't run]              [00m 00s]
> ...
> Start memcpy_perf:             Fail [No prompt]              [00m 00s]
> Memcpy performance autotest:   Fail [No prompt]              [00m 00s] [00m
> 01s]
> Start hash_perf:               Fail [No prompt]              [00m 00s]
> Hash performance autotest:     Fail [No prompt]              [00m 00s] [00m
> 01s]
> Start power:                   Fail [No prompt]              [00m 00s]
> Power autotest:                Fail [No prompt]              [00m 00s] [00m
> 01s]
> ...
> 
> I have contigmem and nic_uio installed. I know some applications are
> linuxapp specific but wanted to know if there is a similar automated test
> tool like Linux?
> 
> Thanks,
> Ravi

There is no separate test tool for FreeBSD. Unfortunately there are a number of little
things that don't really work on FreeBSD - and this looks to be one of them. We
probably need to look to fix this.

/Bruce

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH 1/3] pcap: utilize underlying real interface properties
@ 2015-04-29  0:30  2% Nicolás Pernas Maradei
  0 siblings, 0 replies; 200+ results
From: Nicolás Pernas Maradei @ 2015-04-29  0:30 UTC (permalink / raw)
  To: dev, tero.aho

Hi Tero,

Just a few comments on one of your patches - see inline comments below. Interesting features btw.

Nico.

-- 
Nicolás Pernas Maradei


On 27 February 2015 at 13:43:14, dev-request@dpdk.org (dev-request@dpdk.org) wrote:

Message: 5 
Date: Fri, 27 Feb 2015 15:42:38 +0200 
From: Tero Aho <tero.aho@coriant.com> 
To: <dev@dpdk.org> 
Subject: [dpdk-dev] [PATCH 1/3] pcap: utilize underlying real 
interface	properties 
Message-ID: <1425044560-23397-2-git-send-email-tero.aho@coriant.com> 
Content-Type: text/plain 

These changes set pcap interface mac address to the real underlying 
interface address instead of the default one. Also real interface link 
status, speed and duplex are reported when eth_link_update is called 
for the pcap interface. 

Signed-off-by: Tero Aho <tero.aho@coriant.com> 
--- 
lib/librte_pmd_pcap/rte_eth_pcap.c | 51 +++++++++++++++++++++++++++++++++++--- 
1 file changed, 47 insertions(+), 4 deletions(-) 

diff --git a/lib/librte_pmd_pcap/rte_eth_pcap.c b/lib/librte_pmd_pcap/rte_eth_pcap.c 
index 5e94930..289af28 100644 
--- a/lib/librte_pmd_pcap/rte_eth_pcap.c 
+++ b/lib/librte_pmd_pcap/rte_eth_pcap.c 
@@ -43,6 +43,11 @@ 
#include <rte_dev.h> 

#include <net/if.h> 
+#include <sys/socket.h> 
+#include <sys/ioctl.h> 
+#include <string.h> 
+#include <linux/ethtool.h> 
+#include <linux/sockios.h> 

#include <pcap.h> 

@@ -102,6 +107,8 @@ struct pmd_internals { 
unsigned nb_tx_queues; 
int if_index; 
int single_iface; 
+ const char *if_name; 
+ int if_fd; 
}; 

const char *valid_arguments[] = { 
@@ -451,6 +458,26 @@ static int 
eth_link_update(struct rte_eth_dev *dev __rte_unused, 
*dev is being used. Remove __rte_unused


int wait_to_complete __rte_unused) 
{ 
+ struct ifreq ifr; 
+ struct ethtool_cmd cmd; 
+ struct pmd_internals *internals = dev->data->dev_private; 
+ 
+ if (internals->if_name && (internals->if_fd != -1)) { 
+ /* get link status, speed and duplex from the underlying interface */ 
+ 
+ strncpy(ifr.ifr_name, internals->if_name, sizeof(ifr.ifr_name)-1); 
+ ifr.ifr_name[sizeof(ifr.ifr_name)-1] = 0; 
Use snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), “%s”, internals->if_name) instead. It’s safer and cleaner.


+ if (!ioctl(internals->if_fd, SIOCGIFFLAGS, &ifr)) 
+ dev->data->dev_link.link_status = (ifr.ifr_flags & IFF_UP) ? 1 : 0; 
+ 
+ cmd.cmd = ETHTOOL_GSET; 
+ ifr.ifr_data = (void *)&cmd; 
+ if (!ioctl(internals->if_fd, SIOCETHTOOL, &ifr)) { 
+ dev->data->dev_link.link_speed = ethtool_cmd_speed(&cmd); 
+ dev->data->dev_link.link_duplex = 
+ cmd.duplex ? ETH_LINK_FULL_DUPLEX : ETH_LINK_HALF_DUPLEX; 
+ } 
+ } 
return 0; 
} 

@@ -736,11 +763,24 @@ rte_pmd_init_internals(const char *name, const unsigned nb_rx_queues, 
(*internals)->nb_rx_queues = nb_rx_queues; 
(*internals)->nb_tx_queues = nb_tx_queues; 

- if (pair == NULL) 
+ if (pair == NULL) { 
(*internals)->if_index = 0; 
- else 
+ } else { 
+ /* use real inteface mac addr, save name and fd for eth_link_update */ 
(*internals)->if_index = if_nametoindex(pair->value); 
+ (*internals)->if_name = strdup(pair->value); 
+ (*internals)->if_fd = socket(AF_INET, SOCK_DGRAM, 0); 
I see you are using a socket and ioctl calls to get the info you need from the interface. I’m not a big fan of opening a socket at this point just to get some parameters of the NIC. I’d rather reading those from sysfs. Is there a reason why you’d prefer to open a socket?

These would be the files you’d need to open and read to the get the info you are looking for. 

# cat /sys/class/net/eth0/address
# cat /sys/class/net/eth0/duplex
# cat /sys/class/net/eth0/speed

In my opinion the code would be cleaner doing it this way. DPDK already manipulates sysfs in other places too. 
What do you think?


+ if ((*internals)->if_fd != -1) { 
+ struct ifreq ifr; 
+ strncpy(ifr.ifr_name, pair->value, sizeof(ifr.ifr_name)-1); 
+ ifr.ifr_name[sizeof(ifr.ifr_name)-1] = 0; 
Use snprintf() like before.


+ if (!ioctl((*internals)->if_fd, SIOCGIFHWADDR, &ifr)) { 
+ data->mac_addrs = rte_zmalloc_socket(NULL, ETHER_ADDR_LEN, 0, numa_node); 
+ if (data->mac_addrs) 
+ rte_memcpy(data->mac_addrs, ifr.ifr_addr.sa_data, ETHER_ADDR_LEN); 
+ } 
+ } 
+ } 
pci_dev->numa_node = numa_node; 

data->dev_private = *internals; 
@@ -749,7 +789,8 @@ rte_pmd_init_internals(const char *name, const unsigned nb_rx_queues, 
data->nb_rx_queues = (uint16_t)nb_rx_queues; 
data->nb_tx_queues = (uint16_t)nb_tx_queues; 
data->dev_link = pmd_link; 
- data->mac_addrs = &eth_addr; 
+ if (data->mac_addrs == NULL) 
+ data->mac_addrs = &eth_addr; 
strncpy(data->name, 
(*eth_dev)->data->name, strlen((*eth_dev)->data->name)); 

@@ -758,6 +799,8 @@ rte_pmd_init_internals(const char *name, const unsigned nb_rx_queues, 
(*eth_dev)->pci_dev = pci_dev; 
(*eth_dev)->driver = &rte_pcap_pmd; 

+ eth_link_update((*eth_dev), 0); 
+ 
return 0; 

error: if (data) 
-- 
1.9.1 


============================================================ 
The information contained in this message may be privileged 
and confidential and protected from disclosure. If the reader 
of this message is not the intended recipient, or an employee 
or agent responsible for delivering this message to the 
intended recipient, you are hereby notified that any reproduction, 
dissemination or distribution of this communication is strictly 
prohibited. If you have received this communication in error, 
please notify us immediately by replying to the message and 
deleting it from your computer. Thank you. Coriant-Tellabs 
============================================================ 


------------------------------ 

Subject: Digest Footer 

_______________________________________________ 
dev mailing list 
dev@dpdk.org 
http://dpdk.org/ml/listinfo/dev 


------------------------------ 

End of dev Digest, Vol 29, Issue 99 
*********************************** 
From rkerur@gmail.com  Wed Apr 29 03:15:55 2015
Return-Path: <rkerur@gmail.com>
Received: from mail-ob0-f177.google.com (mail-ob0-f177.google.com
 [209.85.214.177]) by dpdk.org (Postfix) with ESMTP id 5986EC740
 for <dev@dpdk.org>; Wed, 29 Apr 2015 03:15:55 +0200 (CEST)
Received: by obcux3 with SMTP id ux3so9679357obc.2
 for <dev@dpdk.org>; Tue, 28 Apr 2015 18:15:53 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s 120113;
 h=mime-version:date:message-id:subject:from:to:content-type;
 bh=MZ4S6lBJpl67DyjGKoevJH7hj1P2/ahlV1nfxWJ6CPw=;
 b=MlHdvUs4IlMw5URRnd8FCPs1PxkZyuqSk6Wem3+AbXe0sX3eOHd3SDgmk2/oSL9TPc
 Lnrs+5BGw+bS9WPki9UN2tg6QokPszOxpfxfodEpqa0PgWFeRZbRrqE5fEwJbcFNkeyD
 E1SatooWejmZst7JVyACc+lnno8tm4KVhebLtnX7uCsUzrRZ4cTIwY/S26NhCSpUzDjO
 HU4BQgzDMMShyYavYUeK8TIaRfVz3EnQs1yBeEJFNZrDvu8fucl4CKGofmJHu94a1+cs
 eIO0DM1sqnBQstBzuVH21PkmBmn2f2CgZ04G0HzmanmWvwhYi+fEnskb12ZJ1fnE/Xs2
 eH0w=MIME-Version: 1.0
X-Received: by 10.182.39.168 with SMTP id q8mr16869793obk.23.1430270153684;
 Tue, 28 Apr 2015 18:15:53 -0700 (PDT)
Received: by 10.202.179.195 with HTTP; Tue, 28 Apr 2015 18:15:53 -0700 (PDT)
Date: Tue, 28 Apr 2015 18:15:53 -0700
Message-ID: <CAFb4SLBGcR1EHL5FkJ7r6-7mqWR9UJ7GLD2cm18SJ8AuoWu_Og@mail.gmail.com>
From: Ravi Kerur <rkerur@gmail.com>
To: "dev@dpdk.org" <dev@dpdk.org>
Content-Type: text/plain; charset=ISO-8859-1
X-Content-Filtered-By: Mailman/MimeDel 2.1.15
Subject: [dpdk-dev] gmake test on freeBSD
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Wed, 29 Apr 2015 01:15:55 -0000

DPDK team,

Is there a automated tests to run on freeBSD similar to Linux (make test).

I ran "gmake test T=x86_64-native-bsdapp-clang CC=clang" I get following
output

/usr/home/rkerur/dpdk-validate-abi-1/dpdk/build/app/test -c f -n 4

Test name                      Test result                      Test
Total
===============================================================================Start group_1:                 Fail [Can't run]              [00m 00s]
Timer autotest:                Fail [Can't run]              [00m 00s]
Debug autotest:                Fail [Can't run]              [00m 00s]
Errno autotest:                Fail [Can't run]              [00m 00s]
Meter autotest:                Fail [Can't run]              [00m 00s]
Common autotest:               Fail [Can't run]              [00m 00s]
Dump log history:              Fail [Can't run]              [00m 00s]
...
Start memcpy_perf:             Fail [No prompt]              [00m 00s]
Memcpy performance autotest:   Fail [No prompt]              [00m 00s] [00m
01s]
Start hash_perf:               Fail [No prompt]              [00m 00s]
Hash performance autotest:     Fail [No prompt]              [00m 00s] [00m
01s]
Start power:                   Fail [No prompt]              [00m 00s]
Power autotest:                Fail [No prompt]              [00m 00s] [00m
01s]
...

I have contigmem and nic_uio installed. I know some applications are
linuxapp specific but wanted to know if there is a similar automated test
tool like Linux?

Thanks,
Ravi

^ permalink raw reply	[relevance 2%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-28 19:35  0%                               ` Neil Horman
@ 2015-04-28 23:52  4%                                 ` Ravi Kerur
  2015-04-29 10:04  3%                                   ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Ravi Kerur @ 2015-04-28 23:52 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Tue, Apr 28, 2015 at 12:35 PM, Neil Horman <nhorman@tuxdriver.com> wrote:

> On Mon, Apr 27, 2015 at 03:39:41PM -0700, Ravi Kerur wrote:
> > On Mon, Apr 27, 2015 at 6:44 AM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
> >
> > > On Sat, Apr 25, 2015 at 05:09:01PM -0700, Ravi Kerur wrote:
> > > > On Sat, Apr 25, 2015 at 6:02 AM, Neil Horman <nhorman@tuxdriver.com>
> > > wrote:
> > > >
> > > > > On Sat, Apr 25, 2015 at 08:32:42AM -0400, Neil Horman wrote:
> > > > > > On Fri, Apr 24, 2015 at 06:45:06PM -0700, Ravi Kerur wrote:
> > > > > > > On Fri, Apr 24, 2015 at 2:24 PM, Ravi Kerur <rkerur@gmail.com>
> > > wrote:
> > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <
> > > nhorman@tuxdriver.com
> > > > > >
> > > > > > > > wrote:
> > > > > > > >
> > > > > > > >> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> > > > > > > >> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <
> > > > > nhorman@tuxdriver.com>
> > > > > > > >> wrote:
> > > > > > > >> >
> > > > > > > >> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur
> wrote:
> > > > > > > >> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <
> > > > > nhorman@tuxdriver.com
> > > > > > > >> >
> > > > > > > >> > > wrote:
> > > > > > > >> > > >
> > > > > > > >> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur
> > > wrote:
> > > > > > > >> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
> > > > > > > >> nhorman@tuxdriver.com>
> > > > > > > >> > > > > wrote:
> > > > > > > >> > > > > >
> > > > > > > >> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi
> Kerur
> > > > > wrote:
> > > > > > > >> > > > > > > > Changes in v7
> > > > > > > >> > > > > > > > Remove _setname_ pthread calls.
> > > > > > > >> > > > > > > > Use rte_gettid() API in RTE_LOG to print
> > > thread_id.
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > Changes in v6
> > > > > > > >> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from
> > > eal_common_thread.c
> > > > > file.
> > > > > > > >> > > > > > > > Add pthread_setname_np/pthread_set_name_np for
> > > > > Linux/FreeBSD
> > > > > > > >> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG
> > > when
> > > > > > > >> available.
> > > > > > > >> > > > > > > > Use existing rte_get_systid() in RTE_LOG to
> print
> > > > > thread_id.
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > Changes in v5
> > > > > > > >> > > > > > > > Rebase to latest code.
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > Changes in v4
> > > > > > > >> > > > > > > > None
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > Changes in v3
> > > > > > > >> > > > > > > > Changed subject to be more explicit on file
> name
> > > > > inclusion.
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > Changes in v2
> > > > > > > >> > > > > > > > None
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > Changes in v1
> > > > > > > >> > > > > > > > eal_thread.c has minor differences between
> Linux
> > > and
> > > > > BSD,
> > > > > > > >> move
> > > > > > > >> > > > > > > > entire file into common directory.
> > > > > > > >> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on
> minor
> > > > > > > >> differences.
> > > > > > > >> > > > > > > > Rename eal_thread.c to eal_common_thread.c
> > > > > > > >> > > > > > > > Makefile changes to reflect file move and name
> > > change.
> > > > > > > >> > > > > > > > Fix checkpatch warnings.
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > > > >> > > > > > > > ---
> > > > > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |
>  2
> > > +-
> > > > > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    |
> 152
> > > > > > > >> > > > > > > ------------------------------
> > > > > > > >> > > > > > > >  lib/librte_eal/common/eal_common_thread.c |
> 147
> > > > > > > >> > > > > > > ++++++++++++++++++++++++++++-
> > > > > > > >> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  |
> 152
> > > > > > > >> > > > > > > +-----------------------------
> > > > > > > >> > > > > > > >  4 files changed, 148 insertions(+), 305
> > > deletions(-)
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > diff --git
> a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > >> > > > > > > > index 2357cfa..55971b9 100644
> > > > > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > >> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o :=
> > > > > -D_GNU_SOURCE
> > > > > > > >> > > > > > > >  # workaround for a gcc bug with noreturn
> > > attribute
> > > > > > > >> > > > > > > >  #
> > > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > > > > > >> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > > > > >> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > > > > >> > > > > > > > +CFLAGS_eal_common_thread.o +=
> -Wno-return-type
> > > > > > > >> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > > > > > >> > > > > > > >  endif
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > diff --git
> > > a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > >> > > > > > > > index 9a03437..5714b8f 100644
> > > > > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > >> > > > > > > > @@ -35,163 +35,11 @@
> > > > > > > >> > > > > > > >  #include <stdio.h>
> > > > > > > >> > > > > > > >  #include <stdlib.h>
> > > > > > > >> > > > > > > >  #include <stdint.h>
> > > > > > > >> > > > > > > > -#include <unistd.h>
> > > > > > > >> > > > > > > > -#include <sched.h>
> > > > > > > >> > > > > > > > -#include <pthread_np.h>
> > > > > > > >> > > > > > > > -#include <sys/queue.h>
> > > > > > > >> > > > > > > >  #include <sys/thr.h>
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > -#include <rte_debug.h>
> > > > > > > >> > > > > > > > -#include <rte_atomic.h>
> > > > > > > >> > > > > > > > -#include <rte_launch.h>
> > > > > > > >> > > > > > > > -#include <rte_log.h>
> > > > > > > >> > > > > > > > -#include <rte_memory.h>
> > > > > > > >> > > > > > > > -#include <rte_memzone.h>
> > > > > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > > > > >> > > > > > > > -#include <rte_eal.h>
> > > > > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > > > > >> > > > > > > > -#include <rte_lcore.h>
> > > > > > > >> > > > > > > > -
> > > > > > > >> > > > > > > >  #include "eal_private.h"
> > > > > > > >> > > > > > > >  #include "eal_thread.h"
> > > > > > > >> > > > > > > >
> > > > > > > >> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) =
> > > > > LCORE_ID_ANY;
> > > > > > > >> > > > > > > NAK, these are exported symbols, you can't
> remove
> > > them
> > > > > without
> > > > > > > >> > > going
> > > > > > > >> > > > > > > through the
> > > > > > > >> > > > > > > deprecation process.
> > > > > > > >> > > > > > >
> > > > > > > >> > > > > > >
> > > > > > > >> > > > > > They are not removed/deleted, they are moved from
> > > > > eal_thread.c
> > > > > > > >> to
> > > > > > > >> > > > > > eal_common_thread.c file since it is common to
> both
> > > Linux
> > > > > and
> > > > > > > >> BSD.
> > > > > > > >> > > > > >
> > > > > > > >> > > > > Then perhaps you forgot to export the symbol?  Its
> > > showing
> > > > > up as
> > > > > > > >> > > removed
> > > > > > > >> > > > > on the
> > > > > > > >> > > > > ABI checker utility.
> > > > > > > >> > > > >
> > > > > > > >> > > > > Neil
> > > > > > > >> > > > >
> > > > > > > >> > > >
> > > > > > > >> > > > Can you please show me in the current code where it is
> > > being
> > > > > > > >> exported? I
> > > > > > > >> > > > have only moved definitions to _common_ files, not
> sure
> > > why it
> > > > > > > >> should be
> > > > > > > >> > > > exported now.  I searched in the current code for
> > > > > > > >> RTE_DEFINE_PER_LCORE
> > > > > > > >> > > >
> > > > > > > >> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir
> RTE_DEFINE_PER_LCORE
> > > *
> > > > > > > >> > > > app/test/test_per_lcore.c:static
> > > > > RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > >> test) =
> > > > > > > >> > > > 0x12345678;
> > > > > > > >> > > >
> > > > > > > >>
> > > > >
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > > > > >> > > >
> > > > > > > >>
> > > > >
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > > > > >> > > >
> > > > > > > >> > >
> > > > > > > >>
> > > > >
> > >
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > > > > >> > > > _cpuset);
> > > > > > > >> > > >
> > > > > > > >>
> > > > >
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > > > > >> > > >
> > > > > > > >>
> > > > >
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > > > > >> > > >
> > > > > > > >>
> > > > >
> > >
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > > > > >> > > > _cpuset);
> > > > > > > >> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > > > > > > >> > > > RTE_DEFINE_PER_LCORE(type, name)            \
> > > > > > > >> > > > lib/librte_eal/common/include/rte_eal.h:    static
> > > > > > > >> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > > > > > > >> > > >
> > > > > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > > > > > > >> > > > _rte_errno);
> > > > > > > >> > > > lib/librte_eal/common/eal_common_errno.c:    static
> > > > > > > >> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> > > > > > > >> > > >
> > > > > > > >> > > >
> > > > > > > >> > > > > > Thanks
> > > > > > > >> > > > > > Ravi
> > > > > > > >> > > > > >
> > > > > > > >> > > > > > Regards
> > > > > > > >> > > > > > > Neil
> > > > > > > >> > > > > > >
> > > > > > > >> > > > > > >
> > > > > > > >> > > > >
> > > > > > > >> > > Its exported in the version map file:
> > > > > > > >> > >  per_lcore__lcore_id;
> > > > > > > >> > >
> > > > > > > >> > >
> > > > > > > >> > Thanks Neil, I checked and both linux and bsd
> > > rte_eal_version.map
> > > > > have
> > > > > > > >> it.
> > > > > > > >> > I compared .map file between "changed code" and the
> original,
> > > > > they are
> > > > > > > >> same
> > > > > > > >> > for both linux and bsd. In fact you had ACK'd v4 version
> of
> > > this
> > > > > patch
> > > > > > > >> > series and no major changes after that. Please let me
> know if
> > > I
> > > > > missed
> > > > > > > >> > something.
> > > > > > > >> >
> > > > > > > >> I did, and I'm retracting that, because I didn't think to
> check
> > > the
> > > > > ABI
> > > > > > > >> compatibility on this.  But I ran it throught the ABI
> checking
> > > > > script
> > > > > > > >> this and
> > > > > > > >> this error popped out.  You should run it as well, its in
> the
> > > > > scripts
> > > > > > > >> directory.
> > > > > > > >>
> > > > > > > >>
> > > > > > > >> I see in your first patch you removed it and re-added it in
> the
> > > > > common
> > > > > > > >> section.
> > > > > > > >> But something about how its building is causing it to not
> show
> > > up
> > > > > as an
> > > > > > > >> exported
> > > > > > > >> symbol, which is problematic, as other applications are
> going to
> > > > > want
> > > > > > > >> access to
> > > > > > > >> it.
> > > > > > > >>
> > > > > > > >> It also possible that the ABI checker is throwing a false
> > > positive,
> > > > > but
> > > > > > > >> either
> > > > > > > >> way, it needs to be looked into prior to moving forward with
> > > this.
> > > > > > > >>
> > > > > > > >>
> > > > > > > > I did following things.
> > > > > > > >
> > > > > > > > Put a tag (v2.0.0-before-common-eal)  before EAL common
> functions
> > > > > changes
> > > > > > > > for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net:
> remove
> > > > > unneeded
> > > > > > > > include)
> > > > > > > >
> > > > > > > > Put a tag (v2.0.0-common-eal) after EAL common functions
> changes
> > > for
> > > > > > > > commit (25737e5a7212630a7b5d8ca756860a062f403789 Move common
> > > > > functions in
> > > > > > > > eal_pci.c)
> > > > > > > >
> > > > > > > > Ran validate-abi against x86_64-native-linuxapp-gcc and
> > > > > > > >
> > > > > > > > v2.0.0-rc3 and v2.0.0-before-common-eal, html report for
> > > > > librte_eal.so
> > > > > > > > shows removed symbols for "per_lcore__cpuset"
> > > > > > > >
> > > > > > > > v2.0.0-rc3 and v2.0.0-common-eal, html report for
> librte_eal.so
> > > shows
> > > > > > > > removed symbols for "per_lcore__cpuset"
> > > > > > > >
> > > > > > > > Removed symbol is different from what you have reported and
> in my
> > > > > case I
> > > > > > > > see it even before my commit. If you are interested I can
> unicast
> > > > > you html
> > > > > > > > report file. Please let me know how to proceed.
> > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > > > I did some experiment and found some interesting things.  I
> will
> > > take
> > > > > eal.c
> > > > > > > as an example
> > > > > > >
> > > > > > > eal.c is split into eal_common_sysfs.c eal_common_mem_cfg.c
> > > > > > > eal_common_proc_type.c and eal_common_app_usage.c. In
> > > > > linuxapp/eal/Makefile
> > > > > > > if I compile new files right after eal.c as shown below
> > > > > > >
> > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
> > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
> > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
> > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) +=
> eal_common_proc_type.c
> > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) +=
> eal_common_app_usage.c
> > > > > > > ...
> > > > > > >
> > > > > > > validate-abi results matches baseline. Instead if i place new
> > > _common_
> > > > > > > files in common area in linuxapp/eal/Makefile as shown below
> > > > > > >
> > > > > > > # from common dir
> > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
> > > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
> > > > > > > ...
> > > > > > >
> > > > > > > validate-abi reports problem in binary compatibility and source
> > > > > > > compatiblity
> > > > > > >
> > > > > > > eal_filesystem.h, librte_eal.so.1
> > > > > > >  [+] eal_parse_sysfs_value ( char const* filename, unsigned
> long*
> > > val )
> > > > > > >  @@ DPDK_2.0 (2)
> > > > > > >
> > > > > > > I believe files in common and linuxapp directory are compiled
> same
> > > way
> > > > > so
> > > > > > > not sure why placement in makefile makes difference.
> > > > > > >
> > > > > > > Could this be false-positive from validate-abi script??
> > > > > > >
> > > > > > It could be, yes.  Though I'm more inclined to think that
> perhaps in
> > > the
> > > > > new
> > > > > > version of the code we're not generating ithe same dwarf
> information
> > > out
> > > > > of it.
> > > > > > In fact for some reason, I've checked both the build before and
> after
> > > > > your
> > > > > > patch series, and the exported CFLAGS aren't getting passed to
> the
> > > build
> > > > > > properly, implying that we're not building all the code in the
> > > validator
> > > > > with
> > > > > > the -g flag, which the validator need to function properly.  I'm
> > > looking
> > > > > into
> > > > > > that
> > > > > > Neil
> > > > > >
> > > > > >
> > > > > Found the problem, I was stupidly reading the report incorrectly.
> The
> > > > > problem
> > > > > regarding _lcore_id is a source compatibilty issue (because the
> symbol
> > > > > moved to
> > > > > a new location), which is irrelevant to us.  Its not in any way a
> > > binary
> > > > > compat
> > > > > problem, which is what we care about.  Sorry for the noise.
> > > > >
> > > > > I do still have a few concerns about some changed calling
> conventions
> > > with
> > > > > a few
> > > > > other functions, which I'll look into on monday.
> > > > >
> > > > >
> > > > Please let me know your inputs on changed calling conventions. Most
> of
> > > them
> > > > can be fixed by re-arranging moved code in _common_ files and order
> of
> > > > compilation.
> > > >
> > > If moving the order of compliation around fixes the problem, then I am
> > > reasonably convinced that it is, if not a false positive, a minor issue
> > > with the
> > > compilers dwarf information (The compiler just can't sanely change the
> > > location
> > > in which parameters are passed).  If you make those changes, I'll ACK
> > > them, and
> > > look into whats going on with the calling conventions
> > >
> >
> > Issues like the one shown below are taken care by reordering the code
> > compilation.
> >
> > eal_parse_sysfs_value ( char const* filename, unsigned long* val )
> >
> > Change
> > The parameter filename became passed on stack instead of rdi register
> >
> > Effect
> > Violation of the calling convention. This may result in crash or
> incorrect
> > behavior of applications.
> >
> > Last one that is left out is in
> >
> > rte_thread_set_affinity ( rte_cpuset_t* p1 )
> >
> > Change
> > The parameter *p1* became passed in *rdi* register instead of stack.
> >
> > Effect
> > Violation of the calling convention. This may result in crash or
> incorrect
> > behavior of applications.
> >
> > After checking abi-0.99.pdf (x86-64.org) looks like for
> > "rte_thread_set_affinity" new code is doing the right thing by passing
> the
> > parameter in "rdi" register since pointer is classified as
> "integer_class".
> > Nothing needs to be fixed here. After you confirm that warning can be
> > ignored I will work on sending new revision.
> >
> ACK then, send the new revision, this appears to be a false positive.
>
> Thanks for taking the time to confirm.
>

Thanks Neil. I have sent v8 which fixes ABI warnings. I have tested it with
x86_64-native-linuxapp-gcc, x86_64-native-linuxapp-clang and
x86_64-ivshmem-gcc targets. ABI results look fine to me.

I tried to run validate-abi.sh on BSD but ran into errors. If there is a
way to check against BSD please let me know.

>
> Best
> Neil
>
> > Thanks,
> > Ravi
> >
> >
> > > Thanks!
> > > Neil
> > >
> > > > Thanks,
> > > > Ravi
> > > >
> > > > Regards
> > > > > Neil
> > > > >
> > > > >
> > >
>

^ permalink raw reply	[relevance 4%]

* [dpdk-dev] [PATCH v8 2/6] Move common functions in eal.c
  2015-04-28 23:46  2% ` [dpdk-dev] [PATCH v8 1/6] Move common functions in eal_thread.c Ravi Kerur
@ 2015-04-28 23:46  1%   ` Ravi Kerur
  0 siblings, 0 replies; 200+ results
From: Ravi Kerur @ 2015-04-28 23:46 UTC (permalink / raw)
  To: dev

Changes in v8
Fix ABI warnings by reordering compilation of
eal_common_sysfs.c
eal_common_mem_cfg.c
eal_common_proc_type.c
eal_common_app_usage.c

Changes in v7
Fix compilation errors in clang.

Changes in v6
Split eal_common_system.c and eal_common_runtime.c into
eal_common_sysfs.c
eal_common_mem_cfg.c
eal_common_proc_type.c
eal_common_app_usage.c
based on functionality.

Changes in v5
Rebase to latest code.

Changes in v4
Remove eal_externs.h file, instead use  _get_ and _set_ APIS
to access those variables.
Split eal_common.c into eal_common_system.c and
and eal_common_runtime.c
rte_eal prefix functions are moved to _runtime_ and
eal prefix functions are moved to _system_ files respectively.

Changes in v3
Changed subject to be more explicit on file name inclusion.

Changes in v2
In function rte_eal_config_create remove #ifdef _BSDAPP_
and initialize mem_cfg_addr unconditionally.

Changes in v1
Move common functions in eal.c to librte_eal/common/eal_common.c.

Following functions are moved to eal_common.c file.

struct rte_config *rte_eal_get_configuration(void);
int eal_parse_sysfs_value(const char *filename, unsigned long *val);
static void rte_eal_config_create(void);
enum rte_proc_type_t eal_proc_type_detect(void);
void rte_eal_config_init(void);
rte_usage_hook_t rte_set_application_usage_hook(rte_usage_hook_t
usage_func);
inline size_t eal_get_hugepage_mem_size(void);
void eal_check_mem_on_local_socket(void);
int sync_func(__attribute__((unused)) void *arg);
inline void rte_eal_mcfg_complete(void);
int rte_eal_has_hugepages(void);
enum rte_lcore_role_t rte_eal_lcore_role(unsigned lcore_id);
enum rte_proc_type_t rte_eal_process_type(void);

Makefile changes to reflect new files added.
Fix checkpatch warnings and errors.

Signed-off-by: Ravi Kerur <rkerur@gmail.com>
---
 lib/librte_eal/bsdapp/eal/Makefile           |   4 +
 lib/librte_eal/bsdapp/eal/eal.c              | 271 +++---------------------
 lib/librte_eal/common/eal_common_app_usage.c |  63 ++++++
 lib/librte_eal/common/eal_common_mem_cfg.c   | 224 ++++++++++++++++++++
 lib/librte_eal/common/eal_common_proc_type.c |  58 ++++++
 lib/librte_eal/common/eal_common_sysfs.c     | 148 ++++++++++++++
 lib/librte_eal/common/eal_hugepages.h        |   1 +
 lib/librte_eal/common/eal_private.h          |  78 +++++++
 lib/librte_eal/common/include/rte_eal.h      |   4 +
 lib/librte_eal/linuxapp/eal/Makefile         |   4 +
 lib/librte_eal/linuxapp/eal/eal.c            | 296 ++++-----------------------
 11 files changed, 660 insertions(+), 491 deletions(-)
 create mode 100644 lib/librte_eal/common/eal_common_app_usage.c
 create mode 100644 lib/librte_eal/common/eal_common_mem_cfg.c
 create mode 100644 lib/librte_eal/common/eal_common_proc_type.c
 create mode 100644 lib/librte_eal/common/eal_common_sysfs.c

diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index b7ca47c..67abc54 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -52,6 +52,10 @@ LIBABIVER := 1
 
 # specific to linuxapp exec-env
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) := eal.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_sysfs.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_mem_cfg.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_proc_type.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_app_usage.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_memory.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_hugepage_info.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_thread.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 43e8a47..a9b1f38 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -80,29 +80,6 @@
 #include "eal_hugepages.h"
 #include "eal_options.h"
 
-#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
-
-/* Allow the application to print its usage message too if set */
-static rte_usage_hook_t	rte_application_usage_hook = NULL;
-/* early configuration structure, when memory config is not mmapped */
-static struct rte_mem_config early_mem_config;
-
-/* define fd variable here, because file needs to be kept open for the
- * duration of the program, as we hold a write lock on it in the primary proc */
-static int mem_cfg_fd = -1;
-
-static struct flock wr_lock = {
-		.l_type = F_WRLCK,
-		.l_whence = SEEK_SET,
-		.l_start = offsetof(struct rte_mem_config, memseg),
-		.l_len = sizeof(early_mem_config.memseg),
-};
-
-/* Address of global and public configuration */
-static struct rte_config rte_config = {
-		.mem_config = &early_mem_config,
-};
-
 /* internal configuration (per-core) */
 struct lcore_config lcore_config[RTE_MAX_LCORE];
 
@@ -112,160 +89,57 @@ struct internal_config internal_config;
 /* used by rte_rdtsc() */
 int rte_cycles_vmware_tsc_map;
 
-/* Return a pointer to the configuration structure */
-struct rte_config *
-rte_eal_get_configuration(void)
-{
-	return &rte_config;
-}
-
-/* parse a sysfs (or other) file containing one integer value */
-int
-eal_parse_sysfs_value(const char *filename, unsigned long *val)
-{
-	FILE *f;
-	char buf[BUFSIZ];
-	char *end = NULL;
-
-	if ((f = fopen(filename, "r")) == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
-			__func__, filename);
-		return -1;
-	}
-
-	if (fgets(buf, sizeof(buf), f) == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
-			__func__, filename);
-		fclose(f);
-		return -1;
-	}
-	*val = strtoul(buf, &end, 0);
-	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
-		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
-				__func__, filename);
-		fclose(f);
-		return -1;
-	}
-	fclose(f);
-	return 0;
-}
-
-
-/* create memory configuration in shared/mmap memory. Take out
- * a write lock on the memsegs, so we can auto-detect primary/secondary.
- * This means we never close the file while running (auto-close on exit).
- * We also don't lock the whole file, so that in future we can use read-locks
- * on other parts, e.g. memzones, to detect if there are running secondary
- * processes. */
-static void
-rte_eal_config_create(void)
+inline void *
+rte_eal_get_mem_cfg_addr(void)
 {
-	void *rte_mem_cfg_addr;
-	int retval;
-
-	const char *pathname = eal_runtime_config_path();
-
-	if (internal_config.no_shconf)
-		return;
-
-	if (mem_cfg_fd < 0){
-		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
-		if (mem_cfg_fd < 0)
-			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
-	}
-
-	retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
-	if (retval < 0){
-		close(mem_cfg_fd);
-		rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
-	}
-
-	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
-	if (retval < 0){
-		close(mem_cfg_fd);
-		rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
-				"process running?\n", pathname);
-	}
-
-	rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
-				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
-
-	if (rte_mem_cfg_addr == MAP_FAILED){
-		rte_panic("Cannot mmap memory for rte_config\n");
-	}
-	memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
-	rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+	return NULL;
 }
 
 /* attach to an existing shared memory config */
-static void
+void
 rte_eal_config_attach(void)
 {
-	void *rte_mem_cfg_addr;
+	struct rte_mem_config *mem_config;
+	struct rte_config *rte_config;
 	const char *pathname = eal_runtime_config_path();
+	int *mem_cfg_fd = eal_get_mem_cfg_fd();
 
 	if (internal_config.no_shconf)
 		return;
 
-	if (mem_cfg_fd < 0){
-		mem_cfg_fd = open(pathname, O_RDWR);
-		if (mem_cfg_fd < 0)
+	rte_config = rte_eal_get_configuration();
+	if (rte_config == NULL)
+		return;
+
+	if (*mem_cfg_fd < 0) {
+		*mem_cfg_fd = open(pathname, O_RDWR);
+		if (*mem_cfg_fd < 0)
 			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
 	}
 
-	rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
-				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
-	close(mem_cfg_fd);
-	if (rte_mem_cfg_addr == MAP_FAILED)
+	mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
+				PROT_READ | PROT_WRITE,
+				MAP_SHARED, *mem_cfg_fd, 0);
+	close(*mem_cfg_fd);
+	if (mem_config == MAP_FAILED)
 		rte_panic("Cannot mmap memory for rte_config\n");
 
-	rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
-}
-
-/* Detect if we are a primary or a secondary process */
-enum rte_proc_type_t
-eal_proc_type_detect(void)
-{
-	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
-	const char *pathname = eal_runtime_config_path();
-
-	/* if we can open the file but not get a write-lock we are a secondary
-	 * process. NOTE: if we get a file handle back, we keep that open
-	 * and don't close it to prevent a race condition between multiple opens */
-	if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
-			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
-		ptype = RTE_PROC_SECONDARY;
-
-	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
-			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
-
-	return ptype;
+	rte_config->mem_config = mem_config;
 }
 
-/* Sets up rte_config structure with the pointer to shared memory config.*/
-static void
-rte_config_init(void)
+/* NOP for BSD */
+void
+rte_eal_config_reattach(void)
 {
-	rte_config.process_type = internal_config.process_type;
-
-	switch (rte_config.process_type){
-	case RTE_PROC_PRIMARY:
-		rte_eal_config_create();
-		break;
-	case RTE_PROC_SECONDARY:
-		rte_eal_config_attach();
-		rte_eal_mcfg_wait_complete(rte_config.mem_config);
-		break;
-	case RTE_PROC_AUTO:
-	case RTE_PROC_INVALID:
-		rte_panic("Invalid process type\n");
-	}
 }
 
 /* display usage */
 static void
 eal_usage(const char *prgname)
 {
+	rte_usage_hook_t rte_application_usage_hook =
+		rte_get_application_usage_hook();
+
 	printf("\nUsage: %s ", prgname);
 	eal_common_usage();
 	/* Allow the application to print its usage message too if hook is set */
@@ -275,37 +149,6 @@ eal_usage(const char *prgname)
 	}
 }
 
-/* Set a per-application usage message */
-rte_usage_hook_t
-rte_set_application_usage_hook( rte_usage_hook_t usage_func )
-{
-	rte_usage_hook_t	old_func;
-
-	/* Will be NULL on the first call to denote the last usage routine. */
-	old_func					= rte_application_usage_hook;
-	rte_application_usage_hook	= usage_func;
-
-	return old_func;
-}
-
-static inline size_t
-eal_get_hugepage_mem_size(void)
-{
-	uint64_t size = 0;
-	unsigned i, j;
-
-	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
-		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
-		if (hpi->hugedir != NULL) {
-			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
-				size += hpi->hugepage_sz * hpi->num_pages[j];
-			}
-		}
-	}
-
-	return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
-}
-
 /* Parse the argument given in the command line of the application */
 static int
 eal_parse_args(int argc, char **argv)
@@ -378,45 +221,6 @@ eal_parse_args(int argc, char **argv)
 	return ret;
 }
 
-static void
-eal_check_mem_on_local_socket(void)
-{
-	const struct rte_memseg *ms;
-	int i, socket_id;
-
-	socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
-
-	ms = rte_eal_get_physmem_layout();
-
-	for (i = 0; i < RTE_MAX_MEMSEG; i++)
-		if (ms[i].socket_id == socket_id &&
-				ms[i].len > 0)
-			return;
-
-	RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
-			"memory on local socket!\n");
-}
-
-static int
-sync_func(__attribute__((unused)) void *arg)
-{
-	return 0;
-}
-
-inline static void
-rte_eal_mcfg_complete(void)
-{
-	/* ALL shared mem_config related INIT DONE */
-	if (rte_config.process_type == RTE_PROC_PRIMARY)
-		rte_config.mem_config->magic = RTE_MAGIC;
-}
-
-/* return non-zero if hugepages are enabled. */
-int rte_eal_has_hugepages(void)
-{
-	return !internal_config.no_hugetlbfs;
-}
-
 /* Abstraction for port I/0 privilege */
 int
 rte_eal_iopl_init(void)
@@ -437,8 +241,13 @@ rte_eal_init(int argc, char **argv)
 	int i, fctret, ret;
 	pthread_t thread_id;
 	static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
+	struct rte_config *rte_config;
 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
 
+	rte_config = rte_eal_get_configuration();
+	if (rte_config == NULL)
+		return -1;
+
 	if (!rte_atomic32_test_and_set(&run_once))
 		return -1;
 
@@ -512,12 +321,12 @@ rte_eal_init(int argc, char **argv)
 
 	rte_eal_mcfg_complete();
 
-	eal_thread_init_master(rte_config.master_lcore);
+	eal_thread_init_master(rte_config->master_lcore);
 
 	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
-		rte_config.master_lcore, thread_id, cpuset,
+		rte_config->master_lcore, thread_id, cpuset,
 		ret == 0 ? "" : "...");
 
 	if (rte_eal_dev_init() < 0)
@@ -556,17 +365,3 @@ rte_eal_init(int argc, char **argv)
 
 	return fctret;
 }
-
-/* get core role */
-enum rte_lcore_role_t
-rte_eal_lcore_role(unsigned lcore_id)
-{
-	return (rte_config.lcore_role[lcore_id]);
-}
-
-enum rte_proc_type_t
-rte_eal_process_type(void)
-{
-	return (rte_config.process_type);
-}
-
diff --git a/lib/librte_eal/common/eal_common_app_usage.c b/lib/librte_eal/common/eal_common_app_usage.c
new file mode 100644
index 0000000..5f64d35
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_app_usage.c
@@ -0,0 +1,63 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "eal_private.h"
+
+/* Allow the application to print its usage message too if set */
+rte_usage_hook_t rte_application_usage_hook = NULL;
+
+/* Get per-application usage message */
+rte_usage_hook_t
+rte_get_application_usage_hook(void)
+{
+	return rte_application_usage_hook;
+}
+
+/* Set a per-application usage message */
+rte_usage_hook_t
+rte_set_application_usage_hook(rte_usage_hook_t usage_func)
+{
+	rte_usage_hook_t	old_func;
+
+	/* Will be NULL on the first call to denote the last usage routine. */
+	old_func	= rte_application_usage_hook;
+	rte_application_usage_hook	= usage_func;
+
+	return old_func;
+}
diff --git a/lib/librte_eal/common/eal_common_mem_cfg.c b/lib/librte_eal/common/eal_common_mem_cfg.c
new file mode 100644
index 0000000..c8bf218
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_mem_cfg.c
@@ -0,0 +1,224 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <sys/file.h>
+#include <stddef.h>
+#include <errno.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_debug.h>
+#include <rte_eal_memconfig.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_options.h"
+
+/* early configuration structure, when memory config is not mmapped */
+static struct rte_mem_config early_mem_config;
+
+/* define fd variable here, because file needs to be kept open for the
+ * duration of the program, as we hold a write lock on it in the primary proc */
+static int mem_cfg_fd = -1;
+
+static struct flock wr_lock = {
+		.l_type = F_WRLCK,
+		.l_whence = SEEK_SET,
+		.l_start = offsetof(struct rte_mem_config, memseg),
+		.l_len = sizeof(((struct rte_mem_config *)0)->memseg),
+};
+
+/* Address of global and public configuration */
+static struct rte_config rte_config = {
+		.mem_config = &early_mem_config,
+};
+
+/* Return a pointer to the configuration structure */
+struct rte_config *
+rte_eal_get_configuration(void)
+{
+	return &rte_config;
+}
+
+/* Return memory config file descriptor */
+int*
+eal_get_mem_cfg_fd(void)
+{
+	return &mem_cfg_fd;
+}
+
+/* get core role */
+enum rte_lcore_role_t
+rte_eal_lcore_role(unsigned lcore_id)
+{
+	return rte_config.lcore_role[lcore_id];
+}
+
+/* create memory configuration in shared/mmap memory. Take out
+ * a write lock on the memsegs, so we can auto-detect primary/secondary.
+ * This means we never close the file while running (auto-close on exit).
+ * We also don't lock the whole file, so that in future we can use read-locks
+ * on other parts, e.g. memzones, to detect if there are running secondary
+ * processes. */
+static void
+rte_eal_config_create(void)
+{
+	void *rte_mem_cfg_addr;
+	int retval;
+
+	const char *pathname = eal_runtime_config_path();
+
+	if (internal_config.no_shconf)
+		return;
+
+	rte_mem_cfg_addr = rte_eal_get_mem_cfg_addr();
+
+	if (mem_cfg_fd < 0) {
+		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
+		if (mem_cfg_fd < 0)
+			rte_panic("Cannot open '%s' for rte_mem_config\n",
+					pathname);
+	}
+
+	retval = eal_ftruncate_and_fcntl(sizeof(*rte_config.mem_config));
+
+	if (retval == -1) {
+		close(mem_cfg_fd);
+		rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
+	} else if (retval == -2) {
+		close(mem_cfg_fd);
+		rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. "
+			"Is another primary process running?\n", pathname);
+	}
+
+	rte_mem_cfg_addr = mmap(rte_mem_cfg_addr,
+			sizeof(*rte_config.mem_config), PROT_READ | PROT_WRITE,
+			MAP_SHARED, mem_cfg_fd, 0);
+
+	if (rte_mem_cfg_addr == MAP_FAILED)
+		rte_panic("Cannot mmap memory for rte_config\n");
+
+	memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
+	rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+
+	/* store address of the config in the config itself so that secondary
+	 * processes could later map the config into this exact location
+	 */
+	rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
+}
+
+/* Sets up rte_config structure with the pointer to shared memory config.*/
+void
+rte_config_init(void)
+{
+	rte_config.process_type = internal_config.process_type;
+
+	switch (rte_config.process_type) {
+	case RTE_PROC_PRIMARY:
+		rte_eal_config_create();
+		break;
+	case RTE_PROC_SECONDARY:
+		rte_eal_config_attach();
+		rte_eal_mcfg_wait_complete(rte_config.mem_config);
+		rte_eal_config_reattach();
+		break;
+	case RTE_PROC_AUTO:
+	case RTE_PROC_INVALID:
+		rte_panic("Invalid process type\n");
+	}
+}
+
+inline void
+rte_eal_mcfg_complete(void)
+{
+	/* ALL shared mem_config related INIT DONE */
+	if (rte_config.process_type == RTE_PROC_PRIMARY)
+		rte_config.mem_config->magic = RTE_MAGIC;
+}
+
+/* Detect if we are a primary or a secondary process */
+enum rte_proc_type_t
+eal_proc_type_detect(void)
+{
+	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
+	const char *pathname = eal_runtime_config_path();
+
+	/* if we can open the file but not get a write-lock we are
+	 * a secondary process. NOTE: if we get a file handle back,
+	 * we keep that open and don't close it to prevent a race
+	 * condition between multiple opens
+	 */
+	mem_cfg_fd = open(pathname, O_RDWR);
+	if ((mem_cfg_fd >= 0) &&
+			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+		ptype = RTE_PROC_SECONDARY;
+
+	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
+			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
+
+	return ptype;
+}
+
+/*
+ * Perform ftruncate and fcntl operations on
+ * memory config file descriptor.
+ */
+int
+eal_ftruncate_and_fcntl(size_t size)
+{
+	int retval;
+
+	retval = ftruncate(mem_cfg_fd, size);
+	if (retval < 0)
+		return -1;
+
+	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
+	if (retval < 0)
+		return -2;
+	return 0;
+}
diff --git a/lib/librte_eal/common/eal_common_proc_type.c b/lib/librte_eal/common/eal_common_proc_type.c
new file mode 100644
index 0000000..f8bb47f
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_proc_type.c
@@ -0,0 +1,58 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+#include "eal_private.h"
+
+#include <rte_log.h>
+
+enum rte_proc_type_t
+rte_eal_process_type(void)
+{
+	struct rte_config *rte_config =
+		rte_eal_get_configuration();
+
+	if (rte_config == NULL) {
+		RTE_LOG(WARNING, EAL, "WARNING: rte_config NULL!\n");
+		return RTE_PROC_INVALID;
+	}
+
+	return rte_config->process_type;
+}
diff --git a/lib/librte_eal/common/eal_common_sysfs.c b/lib/librte_eal/common/eal_common_sysfs.c
new file mode 100644
index 0000000..e4dcd55
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_sysfs.c
@@ -0,0 +1,148 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <sys/file.h>
+#include <stddef.h>
+#include <errno.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+#include "eal_options.h"
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_lcore.h>
+
+/* parse a sysfs (or other) file containing one integer value */
+int
+eal_parse_sysfs_value(const char *filename, unsigned long *val)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+			__func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+			__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	*val = strtoul(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+inline size_t
+eal_get_hugepage_mem_size(void)
+{
+	uint64_t size = 0;
+	unsigned i, j;
+
+	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+
+		if (hpi->hugedir != NULL) {
+			for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
+				size += hpi->hugepage_sz * hpi->num_pages[j];
+		}
+	}
+
+	return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
+}
+
+void
+eal_check_mem_on_local_socket(void)
+{
+	const struct rte_memseg *ms;
+	int i, socket_id;
+	struct rte_config *rte_config =
+		rte_eal_get_configuration();
+
+	if (rte_config == NULL) {
+		RTE_LOG(WARNING, EAL, "WARNING: rte_config NULL!\n");
+		return;
+	}
+
+	socket_id = rte_lcore_to_socket_id(rte_config->master_lcore);
+
+	ms = rte_eal_get_physmem_layout();
+
+	for (i = 0; i < RTE_MAX_MEMSEG; i++)
+		if (ms[i].socket_id == socket_id &&
+				ms[i].len > 0)
+			return;
+
+	RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
+			"memory on local socket!\n");
+}
+
+int
+sync_func(__attribute__((unused)) void *arg)
+{
+	return 0;
+}
+
+/* return non-zero if hugepages are enabled. */
+int rte_eal_has_hugepages(void)
+{
+	return !internal_config.no_hugetlbfs;
+}
diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h
index 38edac0..d79ef8a 100644
--- a/lib/librte_eal/common/eal_hugepages.h
+++ b/lib/librte_eal/common/eal_hugepages.h
@@ -63,5 +63,6 @@ struct hugepage_file {
  * for the EAL to use
  */
 int eal_hugepage_info_init(void);
+size_t eal_get_hugepage_mem_size(void);
 
 #endif /* EAL_HUGEPAGES_H */
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 4acf5a0..bcf603f 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -36,6 +36,8 @@
 
 #include <stdio.h>
 
+#include <rte_eal.h>
+
 /**
  * Initialize the memzone subsystem (private to eal).
  *
@@ -232,4 +234,80 @@ int rte_eal_dev_init(void);
  */
 int rte_eal_check_module(const char *module_name);
 
+/**
+ * This function sets up rte_config structure
+ *
+ * This function is private to the EAL.
+ */
+void rte_config_init(void);
+
+/**
+ * This function checks memory on local socket(NUMA)
+ *
+ * This function is private to the EAL.
+ */
+void eal_check_mem_on_local_socket(void);
+
+/**
+ * This function updates shared mem_config INIT DONE
+ *
+ * This function is private to the EAL.
+ */
+void rte_eal_mcfg_complete(void);
+
+/**
+ *
+ * This function is private to the EAL.
+ */
+int sync_func(__attribute__((unused)) void *arg);
+
+/**
+ *
+ * This function is private to the EAL.
+ */
+void *rte_eal_get_mem_cfg_addr(void);
+
+/**
+ * Return a pointer to the configuration structure
+ *
+ * This function is private to the EAL.
+ */
+struct rte_config *rte_eal_get_configuration(void);
+
+/**
+ * Return memory config file descriptor
+ *
+ * This function is private to the EAL.
+ */
+int *eal_get_mem_cfg_fd(void);
+
+/**
+ * Perform ftruncate and fcntl operations on
+ * memory config file descriptor.
+ *
+ * This function is private to the EAL.
+ */
+int eal_ftruncate_and_fcntl(size_t size);
+
+/**
+ * Get per-application usage message
+ *
+ * This function is private to the EAL.
+ */
+rte_usage_hook_t rte_get_application_usage_hook(void);
+
+/**
+ * This function attaches shared memory config
+ *
+ * This function is private to the EAL.
+ */
+void rte_eal_config_attach(void);
+
+/**
+ * This function reattaches shared memory config
+ *
+ * This function is private to the EAL.
+ */
+void rte_eal_config_reattach(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 1385a73..daf2ee0 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -51,6 +51,10 @@ extern "C" {
 
 #define RTE_MAGIC 19820526 /**< Magic number written by the main partition when ready. */
 
+#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
+
+#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
+
 /**
  * The lcore role (used in RTE or not).
  */
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index f11ef59..8e872e0 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -55,6 +55,10 @@ CFLAGS += $(WERROR_FLAGS) -O3
 
 # specific to linuxapp exec-env
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_proc_type.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_app_usage.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_hugepage_info.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_memory.c
 ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index bd770cf..a7de8e0 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -84,13 +84,6 @@
 #include "eal_hugepages.h"
 #include "eal_options.h"
 
-#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
-
-#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
-
-/* Allow the application to print its usage message too if set */
-static rte_usage_hook_t	rte_application_usage_hook = NULL;
-
 TAILQ_HEAD(shared_driver_list, shared_driver);
 
 /* Definition for shared object drivers. */
@@ -105,25 +98,6 @@ struct shared_driver {
 static struct shared_driver_list solib_list =
 TAILQ_HEAD_INITIALIZER(solib_list);
 
-/* early configuration structure, when memory config is not mmapped */
-static struct rte_mem_config early_mem_config;
-
-/* define fd variable here, because file needs to be kept open for the
- * duration of the program, as we hold a write lock on it in the primary proc */
-static int mem_cfg_fd = -1;
-
-static struct flock wr_lock = {
-		.l_type = F_WRLCK,
-		.l_whence = SEEK_SET,
-		.l_start = offsetof(struct rte_mem_config, memseg),
-		.l_len = sizeof(early_mem_config.memseg),
-};
-
-/* Address of global and public configuration */
-static struct rte_config rte_config = {
-		.mem_config = &early_mem_config,
-};
-
 /* internal configuration (per-core) */
 struct lcore_config lcore_config[RTE_MAX_LCORE];
 
@@ -133,196 +107,85 @@ struct internal_config internal_config;
 /* used by rte_rdtsc() */
 int rte_cycles_vmware_tsc_map;
 
-/* Return a pointer to the configuration structure */
-struct rte_config *
-rte_eal_get_configuration(void)
-{
-	return &rte_config;
-}
-
-/* parse a sysfs (or other) file containing one integer value */
-int
-eal_parse_sysfs_value(const char *filename, unsigned long *val)
-{
-	FILE *f;
-	char buf[BUFSIZ];
-	char *end = NULL;
-
-	if ((f = fopen(filename, "r")) == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
-			__func__, filename);
-		return -1;
-	}
-
-	if (fgets(buf, sizeof(buf), f) == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
-			__func__, filename);
-		fclose(f);
-		return -1;
-	}
-	*val = strtoul(buf, &end, 0);
-	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
-		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
-				__func__, filename);
-		fclose(f);
-		return -1;
-	}
-	fclose(f);
-	return 0;
-}
-
-
-/* create memory configuration in shared/mmap memory. Take out
- * a write lock on the memsegs, so we can auto-detect primary/secondary.
- * This means we never close the file while running (auto-close on exit).
- * We also don't lock the whole file, so that in future we can use read-locks
- * on other parts, e.g. memzones, to detect if there are running secondary
- * processes. */
-static void
-rte_eal_config_create(void)
+inline void *
+rte_eal_get_mem_cfg_addr(void)
 {
-	void *rte_mem_cfg_addr;
-	int retval;
-
-	const char *pathname = eal_runtime_config_path();
-
-	if (internal_config.no_shconf)
-		return;
+	void *mem_cfg_addr;
 
-	/* map the config before hugepage address so that we don't waste a page */
 	if (internal_config.base_virtaddr != 0)
-		rte_mem_cfg_addr = (void *)
+		mem_cfg_addr = (void *)
 			RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
 			sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE));
 	else
-		rte_mem_cfg_addr = NULL;
-
-	if (mem_cfg_fd < 0){
-		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
-		if (mem_cfg_fd < 0)
-			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
-	}
-
-	retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
-	if (retval < 0){
-		close(mem_cfg_fd);
-		rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
-	}
-
-	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
-	if (retval < 0){
-		close(mem_cfg_fd);
-		rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
-				"process running?\n", pathname);
-	}
-
-	rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
-				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
-
-	if (rte_mem_cfg_addr == MAP_FAILED){
-		rte_panic("Cannot mmap memory for rte_config\n");
-	}
-	memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
-	rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
-
-	/* store address of the config in the config itself so that secondary
-	 * processes could later map the config into this exact location */
-	rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
+		mem_cfg_addr = NULL;
 
+	return mem_cfg_addr;
 }
 
 /* attach to an existing shared memory config */
-static void
+void
 rte_eal_config_attach(void)
 {
 	struct rte_mem_config *mem_config;
+	struct rte_config *rte_config;
+	int *mem_cfg_fd = eal_get_mem_cfg_fd();
 
 	const char *pathname = eal_runtime_config_path();
 
 	if (internal_config.no_shconf)
 		return;
 
-	if (mem_cfg_fd < 0){
-		mem_cfg_fd = open(pathname, O_RDWR);
-		if (mem_cfg_fd < 0)
+	rte_config = rte_eal_get_configuration();
+	if (rte_config == NULL)
+		return;
+
+	if (*mem_cfg_fd < 0) {
+		*mem_cfg_fd = open(pathname, O_RDWR);
+		if (*mem_cfg_fd < 0)
 			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
 	}
 
 	/* map it as read-only first */
 	mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
-			PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
+			PROT_READ, MAP_SHARED, *mem_cfg_fd, 0);
 	if (mem_config == MAP_FAILED)
 		rte_panic("Cannot mmap memory for rte_config\n");
 
-	rte_config.mem_config = mem_config;
+	rte_config->mem_config = mem_config;
 }
 
 /* reattach the shared config at exact memory location primary process has it */
-static void
+void
 rte_eal_config_reattach(void)
 {
 	struct rte_mem_config *mem_config;
 	void *rte_mem_cfg_addr;
+	struct rte_config *rte_config;
+	int *mem_cfg_fd = eal_get_mem_cfg_fd();
 
 	if (internal_config.no_shconf)
 		return;
 
+	rte_config = rte_eal_get_configuration();
+	if (rte_config == NULL)
+		return;
+
 	/* save the address primary process has mapped shared config to */
-	rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr;
+	rte_mem_cfg_addr =
+		(void *) (uintptr_t) rte_config->mem_config->mem_cfg_addr;
 
 	/* unmap original config */
-	munmap(rte_config.mem_config, sizeof(struct rte_mem_config));
+	munmap(rte_config->mem_config, sizeof(struct rte_mem_config));
 
 	/* remap the config at proper address */
 	mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
-			sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
-			mem_cfg_fd, 0);
-	close(mem_cfg_fd);
+			sizeof(*mem_config), PROT_READ | PROT_WRITE,
+			MAP_SHARED, *mem_cfg_fd, 0);
+	close(*mem_cfg_fd);
 	if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr)
 		rte_panic("Cannot mmap memory for rte_config\n");
 
-	rte_config.mem_config = mem_config;
-}
-
-/* Detect if we are a primary or a secondary process */
-enum rte_proc_type_t
-eal_proc_type_detect(void)
-{
-	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
-	const char *pathname = eal_runtime_config_path();
-
-	/* if we can open the file but not get a write-lock we are a secondary
-	 * process. NOTE: if we get a file handle back, we keep that open
-	 * and don't close it to prevent a race condition between multiple opens */
-	if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
-			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
-		ptype = RTE_PROC_SECONDARY;
-
-	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
-			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
-
-	return ptype;
-}
-
-/* Sets up rte_config structure with the pointer to shared memory config.*/
-static void
-rte_config_init(void)
-{
-	rte_config.process_type = internal_config.process_type;
-
-	switch (rte_config.process_type){
-	case RTE_PROC_PRIMARY:
-		rte_eal_config_create();
-		break;
-	case RTE_PROC_SECONDARY:
-		rte_eal_config_attach();
-		rte_eal_mcfg_wait_complete(rte_config.mem_config);
-		rte_eal_config_reattach();
-		break;
-	case RTE_PROC_AUTO:
-	case RTE_PROC_INVALID:
-		rte_panic("Invalid process type\n");
-	}
+	rte_config->mem_config = mem_config;
 }
 
 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
@@ -348,6 +211,9 @@ eal_hugedirs_unlock(void)
 static void
 eal_usage(const char *prgname)
 {
+	rte_usage_hook_t rte_application_usage_hook =
+		rte_get_application_usage_hook();
+
 	printf("\nUsage: %s ", prgname);
 	eal_common_usage();
 	printf("EAL Linux options:\n"
@@ -367,19 +233,6 @@ eal_usage(const char *prgname)
 	}
 }
 
-/* Set a per-application usage message */
-rte_usage_hook_t
-rte_set_application_usage_hook( rte_usage_hook_t usage_func )
-{
-	rte_usage_hook_t	old_func;
-
-	/* Will be NULL on the first call to denote the last usage routine. */
-	old_func					= rte_application_usage_hook;
-	rte_application_usage_hook	= usage_func;
-
-	return old_func;
-}
-
 static int
 eal_parse_socket_mem(char *socket_mem)
 {
@@ -481,24 +334,6 @@ eal_parse_vfio_intr(const char *mode)
 	return -1;
 }
 
-static inline size_t
-eal_get_hugepage_mem_size(void)
-{
-	uint64_t size = 0;
-	unsigned i, j;
-
-	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
-		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
-		if (hpi->hugedir != NULL) {
-			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
-				size += hpi->hugepage_sz * hpi->num_pages[j];
-			}
-		}
-	}
-
-	return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
-}
-
 /* Parse the argument given in the command line of the application */
 static int
 eal_parse_args(int argc, char **argv)
@@ -645,39 +480,6 @@ eal_parse_args(int argc, char **argv)
 	return ret;
 }
 
-static void
-eal_check_mem_on_local_socket(void)
-{
-	const struct rte_memseg *ms;
-	int i, socket_id;
-
-	socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
-
-	ms = rte_eal_get_physmem_layout();
-
-	for (i = 0; i < RTE_MAX_MEMSEG; i++)
-		if (ms[i].socket_id == socket_id &&
-				ms[i].len > 0)
-			return;
-
-	RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
-			"memory on local socket!\n");
-}
-
-static int
-sync_func(__attribute__((unused)) void *arg)
-{
-	return 0;
-}
-
-inline static void
-rte_eal_mcfg_complete(void)
-{
-	/* ALL shared mem_config related INIT DONE */
-	if (rte_config.process_type == RTE_PROC_PRIMARY)
-		rte_config.mem_config->magic = RTE_MAGIC;
-}
-
 /*
  * Request iopl privilege for all RPL, returns 0 on success
  * iopl() call is mostly for the i386 architecture. For other architectures,
@@ -706,6 +508,12 @@ rte_eal_init(int argc, char **argv)
 	const char *logid;
 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
 
+	struct rte_config *rte_config;
+
+	rte_config = rte_eal_get_configuration();
+	if (rte_config == NULL)
+		return -1;
+
 	if (!rte_atomic32_test_and_set(&run_once))
 		return -1;
 
@@ -803,12 +611,12 @@ rte_eal_init(int argc, char **argv)
 			RTE_LOG(WARNING, EAL, "%s\n", dlerror());
 	}
 
-	eal_thread_init_master(rte_config.master_lcore);
+	eal_thread_init_master(rte_config->master_lcore);
 
 	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
-		rte_config.master_lcore, (int)thread_id, cpuset,
+		rte_config->master_lcore, (int)thread_id, cpuset,
 		ret == 0 ? "" : "...");
 
 	if (rte_eal_dev_init() < 0)
@@ -848,24 +656,6 @@ rte_eal_init(int argc, char **argv)
 	return fctret;
 }
 
-/* get core role */
-enum rte_lcore_role_t
-rte_eal_lcore_role(unsigned lcore_id)
-{
-	return (rte_config.lcore_role[lcore_id]);
-}
-
-enum rte_proc_type_t
-rte_eal_process_type(void)
-{
-	return (rte_config.process_type);
-}
-
-int rte_eal_has_hugepages(void)
-{
-	return ! internal_config.no_hugetlbfs;
-}
-
 int
 rte_eal_check_module(const char *module_name)
 {
-- 
1.9.1

^ permalink raw reply	[relevance 1%]

* [dpdk-dev] [PATCH v8 1/6] Move common functions in eal_thread.c
  2015-04-28 23:46  4% [dpdk-dev] [PATCH v8 0/6] Move common functions in EAL Ravi Kerur
@ 2015-04-28 23:46  2% ` Ravi Kerur
  2015-04-28 23:46  1%   ` [dpdk-dev] [PATCH v8 2/6] Move common functions in eal.c Ravi Kerur
  2015-04-29 10:14  0% ` [dpdk-dev] [PATCH v8 0/6] Move common functions in EAL Neil Horman
  1 sibling, 1 reply; 200+ results
From: Ravi Kerur @ 2015-04-28 23:46 UTC (permalink / raw)
  To: dev

Changes in v8
Fixed ABI warnings by reordering compilation of
eal_common_thread.c.

Changes in v7
Remove _setname_ pthread calls.
Use rte_gettid() API in RTE_LOG to print thread_id.

Changes in v6
Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
respectively. Plan to use _getname_ in RTE_LOG when available.
Use existing rte_get_systid() in RTE_LOG to print thread_id.

Changes in v5
Rebase to latest code.

Changes in v4
None

Changes in v3
Changed subject to be more explicit on file name inclusion.

Changes in v2
None

Changes in v1
eal_thread.c has minor differences between Linux and BSD, move
entire file into common directory.
Use RTE_EXEC_ENV_BSDAPP to differentiate on minor differences.
Rename eal_thread.c to eal_common_thread.c
Makefile changes to reflect file move and name change.
Fix checkpatch warnings.

Signed-off-by: Ravi Kerur <rkerur@gmail.com>
---
 lib/librte_eal/bsdapp/eal/Makefile        |   3 +-
 lib/librte_eal/bsdapp/eal/eal_thread.c    | 152 ------------------------------
 lib/librte_eal/common/eal_common_thread.c | 147 ++++++++++++++++++++++++++++-
 lib/librte_eal/linuxapp/eal/Makefile      |   3 +-
 lib/librte_eal/linuxapp/eal/eal_thread.c  | 152 +-----------------------------
 5 files changed, 151 insertions(+), 306 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 2357cfa..b7ca47c 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -55,6 +55,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) := eal.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_memory.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_hugepage_info.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_thread.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_log.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_debug.c
@@ -77,7 +78,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_hexdump.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_devargs.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_dev.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_options.c
-SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
 
 CFLAGS_eal.o := -D_GNU_SOURCE
 #CFLAGS_eal_thread.o := -D_GNU_SOURCE
@@ -88,6 +88,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
 # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
 ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
 CFLAGS_eal_thread.o += -Wno-return-type
+CFLAGS_eal_common_thread.o += -Wno-return-type
 CFLAGS_eal_hpet.o += -Wno-return-type
 endif
 
diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
index 9a03437..5714b8f 100644
--- a/lib/librte_eal/bsdapp/eal/eal_thread.c
+++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
@@ -35,163 +35,11 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
-#include <unistd.h>
-#include <sched.h>
-#include <pthread_np.h>
-#include <sys/queue.h>
 #include <sys/thr.h>
 
-#include <rte_debug.h>
-#include <rte_atomic.h>
-#include <rte_launch.h>
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_per_lcore.h>
-#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_lcore.h>
-
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
-/*
- * Send a message to a slave lcore identified by slave_id to call a
- * function f with argument arg. Once the execution is done, the
- * remote lcore switch in FINISHED state.
- */
-int
-rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
-{
-	int n;
-	char c = 0;
-	int m2s = lcore_config[slave_id].pipe_master2slave[1];
-	int s2m = lcore_config[slave_id].pipe_slave2master[0];
-
-	if (lcore_config[slave_id].state != WAIT)
-		return -EBUSY;
-
-	lcore_config[slave_id].f = f;
-	lcore_config[slave_id].arg = arg;
-
-	/* send message */
-	n = 0;
-	while (n == 0 || (n < 0 && errno == EINTR))
-		n = write(m2s, &c, 1);
-	if (n < 0)
-		rte_panic("cannot write on configuration pipe\n");
-
-	/* wait ack */
-	do {
-		n = read(s2m, &c, 1);
-	} while (n < 0 && errno == EINTR);
-
-	if (n <= 0)
-		rte_panic("cannot read on configuration pipe\n");
-
-	return 0;
-}
-
-/* set affinity for current thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
-/* main loop of threads */
-__attribute__((noreturn)) void *
-eal_thread_loop(__attribute__((unused)) void *arg)
-{
-	char c;
-	int n, ret;
-	unsigned lcore_id;
-	pthread_t thread_id;
-	int m2s, s2m;
-	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
-
-	thread_id = pthread_self();
-
-	/* retrieve our lcore_id from the configuration structure */
-	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
-		if (thread_id == lcore_config[lcore_id].thread_id)
-			break;
-	}
-	if (lcore_id == RTE_MAX_LCORE)
-		rte_panic("cannot retrieve lcore id\n");
-
-	m2s = lcore_config[lcore_id].pipe_master2slave[0];
-	s2m = lcore_config[lcore_id].pipe_slave2master[1];
-
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-
-	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
-
-	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
-		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
-
-	/* read on our pipe to get commands */
-	while (1) {
-		void *fct_arg;
-
-		/* wait command */
-		do {
-			n = read(m2s, &c, 1);
-		} while (n < 0 && errno == EINTR);
-
-		if (n <= 0)
-			rte_panic("cannot read on configuration pipe\n");
-
-		lcore_config[lcore_id].state = RUNNING;
-
-		/* send ack */
-		n = 0;
-		while (n == 0 || (n < 0 && errno == EINTR))
-			n = write(s2m, &c, 1);
-		if (n < 0)
-			rte_panic("cannot write on configuration pipe\n");
-
-		if (lcore_config[lcore_id].f == NULL)
-			rte_panic("NULL function pointer\n");
-
-		/* call the function and store the return value */
-		fct_arg = lcore_config[lcore_id].arg;
-		ret = lcore_config[lcore_id].f(fct_arg);
-		lcore_config[lcore_id].ret = ret;
-		rte_wmb();
-		lcore_config[lcore_id].state = FINISHED;
-	}
-
-	/* never reached */
-	/* pthread_exit(NULL); */
-	/* return NULL; */
-}
-
 /* require calling thread tid by gettid() */
 int rte_sys_gettid(void)
 {
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 2405e93..5e55401 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -31,11 +31,12 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <unistd.h>
-#include <pthread.h>
+#include <sys/queue.h>
 #include <sched.h>
 #include <assert.h>
 #include <string.h>
@@ -43,10 +44,21 @@
 #include <rte_lcore.h>
 #include <rte_memory.h>
 #include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_memzone.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
 
+#include "eal_private.h"
 #include "eal_thread.h"
 
 RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
+RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
 
 unsigned rte_socket_id(void)
 {
@@ -155,3 +167,136 @@ exit:
 
 	return ret;
 }
+
+/*
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg. Once the execution is done, the
+ * remote lcore switch in FINISHED state.
+ */
+int
+rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
+{
+	int n;
+	char c = 0;
+	int m2s = lcore_config[slave_id].pipe_master2slave[1];
+	int s2m = lcore_config[slave_id].pipe_slave2master[0];
+
+	if (lcore_config[slave_id].state != WAIT)
+		return -EBUSY;
+
+	lcore_config[slave_id].f = f;
+	lcore_config[slave_id].arg = arg;
+
+	/* send message */
+	n = 0;
+	while (n == 0 || (n < 0 && errno == EINTR))
+		n = write(m2s, &c, 1);
+	if (n < 0)
+		rte_panic("cannot write on configuration pipe\n");
+
+	/* wait ack */
+	do {
+		n = read(s2m, &c, 1);
+	} while (n < 0 && errno == EINTR);
+
+	if (n <= 0)
+		rte_panic("cannot read on configuration pipe\n");
+
+	return 0;
+}
+
+/* set affinity for current EAL thread */
+static int
+eal_thread_set_affinity(void)
+{
+	unsigned lcore_id = rte_lcore_id();
+
+	/* acquire system unique id  */
+	rte_gettid();
+
+	/* update EAL thread core affinity */
+	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
+}
+
+void eal_thread_init_master(unsigned lcore_id)
+{
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* set CPU affinity */
+	if (eal_thread_set_affinity() < 0)
+		rte_panic("cannot set affinity\n");
+}
+
+/* main loop of threads */
+__attribute__((noreturn)) void *
+eal_thread_loop(__attribute__((unused)) void *arg)
+{
+	char c;
+	int n, ret;
+	unsigned lcore_id;
+	pthread_t thread_id;
+	int m2s, s2m;
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+
+	thread_id = pthread_self();
+
+	/* retrieve our lcore_id from the configuration structure */
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+		if (thread_id == lcore_config[lcore_id].thread_id)
+			break;
+	}
+	if (lcore_id == RTE_MAX_LCORE)
+		rte_panic("cannot retrieve lcore id\n");
+
+	m2s = lcore_config[lcore_id].pipe_master2slave[0];
+	s2m = lcore_config[lcore_id].pipe_slave2master[1];
+
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* set CPU affinity */
+	if (eal_thread_set_affinity() < 0)
+		rte_panic("cannot set affinity\n");
+
+	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+
+	RTE_LOG(DEBUG, EAL, "lcore %u is ready (thread=%d;cpuset=[%s%s])\n",
+		lcore_id, rte_gettid(), cpuset, ret == 0 ? "" : "...");
+
+	/* read on our pipe to get commands */
+	while (1) {
+		void *fct_arg;
+
+		/* wait command */
+		do {
+			n = read(m2s, &c, 1);
+		} while (n < 0 && errno == EINTR);
+
+		if (n <= 0)
+			rte_panic("cannot read on configuration pipe\n");
+
+		lcore_config[lcore_id].state = RUNNING;
+
+		/* send ack */
+		n = 0;
+		while (n == 0 || (n < 0 && errno == EINTR))
+			n = write(s2m, &c, 1);
+		if (n < 0)
+			rte_panic("cannot write on configuration pipe\n");
+
+		if (lcore_config[lcore_id].f == NULL)
+			rte_panic("NULL function pointer\n");
+
+		/* call the function and store the return value */
+		fct_arg = lcore_config[lcore_id].arg;
+		ret = lcore_config[lcore_id].f(fct_arg);
+		lcore_config[lcore_id].ret = ret;
+		rte_wmb();
+		lcore_config[lcore_id].state = FINISHED;
+	}
+
+	/* never reached */
+	/* pthread_exit(NULL); */
+	/* return NULL; */
+}
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 01f7b70..f11ef59 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -61,6 +61,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_xen_memory.c
 endif
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_thread.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_log.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_uio.c
@@ -89,7 +90,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_hexdump.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_devargs.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_dev.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_options.c
-SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
 
 CFLAGS_eal.o := -D_GNU_SOURCE
 CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
@@ -109,6 +109,7 @@ CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
 # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
 ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
 CFLAGS_eal_thread.o += -Wno-return-type
+CFLAGS_eal_common_thread.o += -Wno-return-type
 endif
 
 INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index 18bd8e0..51dca37 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -34,163 +34,13 @@
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdint.h>
 #include <unistd.h>
-#include <pthread.h>
-#include <sched.h>
-#include <sys/queue.h>
 #include <sys/syscall.h>
 
-#include <rte_debug.h>
-#include <rte_atomic.h>
-#include <rte_launch.h>
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_per_lcore.h>
-#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_lcore.h>
-
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
-/*
- * Send a message to a slave lcore identified by slave_id to call a
- * function f with argument arg. Once the execution is done, the
- * remote lcore switch in FINISHED state.
- */
-int
-rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
-{
-	int n;
-	char c = 0;
-	int m2s = lcore_config[slave_id].pipe_master2slave[1];
-	int s2m = lcore_config[slave_id].pipe_slave2master[0];
-
-	if (lcore_config[slave_id].state != WAIT)
-		return -EBUSY;
-
-	lcore_config[slave_id].f = f;
-	lcore_config[slave_id].arg = arg;
-
-	/* send message */
-	n = 0;
-	while (n == 0 || (n < 0 && errno == EINTR))
-		n = write(m2s, &c, 1);
-	if (n < 0)
-		rte_panic("cannot write on configuration pipe\n");
-
-	/* wait ack */
-	do {
-		n = read(s2m, &c, 1);
-	} while (n < 0 && errno == EINTR);
-
-	if (n <= 0)
-		rte_panic("cannot read on configuration pipe\n");
-
-	return 0;
-}
-
-/* set affinity for current EAL thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
-/* main loop of threads */
-__attribute__((noreturn)) void *
-eal_thread_loop(__attribute__((unused)) void *arg)
-{
-	char c;
-	int n, ret;
-	unsigned lcore_id;
-	pthread_t thread_id;
-	int m2s, s2m;
-	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
-
-	thread_id = pthread_self();
-
-	/* retrieve our lcore_id from the configuration structure */
-	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
-		if (thread_id == lcore_config[lcore_id].thread_id)
-			break;
-	}
-	if (lcore_id == RTE_MAX_LCORE)
-		rte_panic("cannot retrieve lcore id\n");
-
-	m2s = lcore_config[lcore_id].pipe_master2slave[0];
-	s2m = lcore_config[lcore_id].pipe_slave2master[1];
-
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-
-	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
-
-	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
-		lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "...");
-
-	/* read on our pipe to get commands */
-	while (1) {
-		void *fct_arg;
-
-		/* wait command */
-		do {
-			n = read(m2s, &c, 1);
-		} while (n < 0 && errno == EINTR);
-
-		if (n <= 0)
-			rte_panic("cannot read on configuration pipe\n");
-
-		lcore_config[lcore_id].state = RUNNING;
-
-		/* send ack */
-		n = 0;
-		while (n == 0 || (n < 0 && errno == EINTR))
-			n = write(s2m, &c, 1);
-		if (n < 0)
-			rte_panic("cannot write on configuration pipe\n");
-
-		if (lcore_config[lcore_id].f == NULL)
-			rte_panic("NULL function pointer\n");
-
-		/* call the function and store the return value */
-		fct_arg = lcore_config[lcore_id].arg;
-		ret = lcore_config[lcore_id].f(fct_arg);
-		lcore_config[lcore_id].ret = ret;
-		rte_wmb();
-		lcore_config[lcore_id].state = FINISHED;
-	}
-
-	/* never reached */
-	/* pthread_exit(NULL); */
-	/* return NULL; */
-}
+#include <rte_log.h>
 
 /* require calling thread tid by gettid() */
 int rte_sys_gettid(void)
-- 
1.9.1

^ permalink raw reply	[relevance 2%]

* [dpdk-dev] [PATCH v8 0/6] Move common functions in EAL
@ 2015-04-28 23:46  4% Ravi Kerur
  2015-04-28 23:46  2% ` [dpdk-dev] [PATCH v8 1/6] Move common functions in eal_thread.c Ravi Kerur
  2015-04-29 10:14  0% ` [dpdk-dev] [PATCH v8 0/6] Move common functions in EAL Neil Horman
  0 siblings, 2 replies; 200+ results
From: Ravi Kerur @ 2015-04-28 23:46 UTC (permalink / raw)
  To: dev

Changes in v8 includes
Re-ordering source file compilation to fix ABI warning.
Ran validate-abi against x86_64-native-linuxapp-gcc,
x86_64-native-linuxapp-clang and x86_64-ivshmem-linuxapp-gcc
environments.

Testing:
Linux - Ubuntu x86_64 14.04
Compilation successful (x86_64-native-linuxapp-gcc and
x86_64-native-linuxapp-clang).
"make test" results match baseline code.
testpmd utility on I217/I218 Intel chipset.

FreeBSD 10.0 x86_64
Compilation successful (x86_64-native-bsdapp-gcc and
x86_64-native-bsdapp-clang).
Tested with helloworld, timer and cmdline examples.

Ravi Kerur (6):
  Move common functions in eal_thread.c
  Move common functions in eal.c
  Move common functions in eal_lcore.c
  Move common functions in eal_timer.c
  Move common functions in eal_memory.c
  Move common functions in eal_pci.c

 lib/librte_eal/bsdapp/eal/Makefile           |   9 +-
 lib/librte_eal/bsdapp/eal/eal.c              | 271 +++---------------------
 lib/librte_eal/bsdapp/eal/eal_lcore.c        |  72 ++-----
 lib/librte_eal/bsdapp/eal/eal_memory.c       |  47 ++---
 lib/librte_eal/bsdapp/eal/eal_pci.c          |  72 +------
 lib/librte_eal/bsdapp/eal/eal_thread.c       | 152 --------------
 lib/librte_eal/bsdapp/eal/eal_timer.c        |  52 +----
 lib/librte_eal/common/eal_common_app_usage.c |  63 ++++++
 lib/librte_eal/common/eal_common_lcore.c     | 107 ++++++++++
 lib/librte_eal/common/eal_common_mem_cfg.c   | 224 ++++++++++++++++++++
 lib/librte_eal/common/eal_common_memory.c    |  38 +++-
 lib/librte_eal/common/eal_common_pci.c       |  72 +++++++
 lib/librte_eal/common/eal_common_proc_type.c |  58 ++++++
 lib/librte_eal/common/eal_common_sysfs.c     | 148 ++++++++++++++
 lib/librte_eal/common/eal_common_thread.c    | 147 ++++++++++++-
 lib/librte_eal/common/eal_common_timer.c     | 102 +++++++++
 lib/librte_eal/common/eal_hugepages.h        |   1 +
 lib/librte_eal/common/eal_private.h          | 171 +++++++++++++++-
 lib/librte_eal/common/include/rte_eal.h      |   4 +
 lib/librte_eal/linuxapp/eal/Makefile         |  10 +-
 lib/librte_eal/linuxapp/eal/eal.c            | 296 ++++-----------------------
 lib/librte_eal/linuxapp/eal/eal_lcore.c      |  66 +-----
 lib/librte_eal/linuxapp/eal/eal_memory.c     |  36 +---
 lib/librte_eal/linuxapp/eal/eal_pci.c        |  75 +------
 lib/librte_eal/linuxapp/eal/eal_thread.c     | 152 +-------------
 lib/librte_eal/linuxapp/eal/eal_timer.c      |  55 +----
 26 files changed, 1277 insertions(+), 1223 deletions(-)
 create mode 100644 lib/librte_eal/common/eal_common_app_usage.c
 create mode 100644 lib/librte_eal/common/eal_common_lcore.c
 create mode 100644 lib/librte_eal/common/eal_common_mem_cfg.c
 create mode 100644 lib/librte_eal/common/eal_common_proc_type.c
 create mode 100644 lib/librte_eal/common/eal_common_sysfs.c
 create mode 100644 lib/librte_eal/common/eal_common_timer.c

-- 
1.9.1

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-27 22:39  3%                             ` Ravi Kerur
@ 2015-04-28 19:35  0%                               ` Neil Horman
  2015-04-28 23:52  4%                                 ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-28 19:35 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Mon, Apr 27, 2015 at 03:39:41PM -0700, Ravi Kerur wrote:
> On Mon, Apr 27, 2015 at 6:44 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Sat, Apr 25, 2015 at 05:09:01PM -0700, Ravi Kerur wrote:
> > > On Sat, Apr 25, 2015 at 6:02 AM, Neil Horman <nhorman@tuxdriver.com>
> > wrote:
> > >
> > > > On Sat, Apr 25, 2015 at 08:32:42AM -0400, Neil Horman wrote:
> > > > > On Fri, Apr 24, 2015 at 06:45:06PM -0700, Ravi Kerur wrote:
> > > > > > On Fri, Apr 24, 2015 at 2:24 PM, Ravi Kerur <rkerur@gmail.com>
> > wrote:
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > > On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <
> > nhorman@tuxdriver.com
> > > > >
> > > > > > > wrote:
> > > > > > >
> > > > > > >> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> > > > > > >> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <
> > > > nhorman@tuxdriver.com>
> > > > > > >> wrote:
> > > > > > >> >
> > > > > > >> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> > > > > > >> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <
> > > > nhorman@tuxdriver.com
> > > > > > >> >
> > > > > > >> > > wrote:
> > > > > > >> > > >
> > > > > > >> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur
> > wrote:
> > > > > > >> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
> > > > > > >> nhorman@tuxdriver.com>
> > > > > > >> > > > > wrote:
> > > > > > >> > > > > >
> > > > > > >> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur
> > > > wrote:
> > > > > > >> > > > > > > > Changes in v7
> > > > > > >> > > > > > > > Remove _setname_ pthread calls.
> > > > > > >> > > > > > > > Use rte_gettid() API in RTE_LOG to print
> > thread_id.
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > Changes in v6
> > > > > > >> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from
> > eal_common_thread.c
> > > > file.
> > > > > > >> > > > > > > > Add pthread_setname_np/pthread_set_name_np for
> > > > Linux/FreeBSD
> > > > > > >> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG
> > when
> > > > > > >> available.
> > > > > > >> > > > > > > > Use existing rte_get_systid() in RTE_LOG to print
> > > > thread_id.
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > Changes in v5
> > > > > > >> > > > > > > > Rebase to latest code.
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > Changes in v4
> > > > > > >> > > > > > > > None
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > Changes in v3
> > > > > > >> > > > > > > > Changed subject to be more explicit on file name
> > > > inclusion.
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > Changes in v2
> > > > > > >> > > > > > > > None
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > Changes in v1
> > > > > > >> > > > > > > > eal_thread.c has minor differences between Linux
> > and
> > > > BSD,
> > > > > > >> move
> > > > > > >> > > > > > > > entire file into common directory.
> > > > > > >> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor
> > > > > > >> differences.
> > > > > > >> > > > > > > > Rename eal_thread.c to eal_common_thread.c
> > > > > > >> > > > > > > > Makefile changes to reflect file move and name
> > change.
> > > > > > >> > > > > > > > Fix checkpatch warnings.
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > > >> > > > > > > > ---
> > > > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2
> > +-
> > > > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > > > > > >> > > > > > > ------------------------------
> > > > > > >> > > > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > > > > > >> > > > > > > ++++++++++++++++++++++++++++-
> > > > > > >> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > > > > > >> > > > > > > +-----------------------------
> > > > > > >> > > > > > > >  4 files changed, 148 insertions(+), 305
> > deletions(-)
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > >> > > > > > > > index 2357cfa..55971b9 100644
> > > > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > >> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o :=
> > > > -D_GNU_SOURCE
> > > > > > >> > > > > > > >  # workaround for a gcc bug with noreturn
> > attribute
> > > > > > >> > > > > > > >  #
> > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > > > > >> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > > > >> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > > > >> > > > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > > > > > >> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > > > > >> > > > > > > >  endif
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > diff --git
> > a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > >> > > > > > > > index 9a03437..5714b8f 100644
> > > > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > >> > > > > > > > @@ -35,163 +35,11 @@
> > > > > > >> > > > > > > >  #include <stdio.h>
> > > > > > >> > > > > > > >  #include <stdlib.h>
> > > > > > >> > > > > > > >  #include <stdint.h>
> > > > > > >> > > > > > > > -#include <unistd.h>
> > > > > > >> > > > > > > > -#include <sched.h>
> > > > > > >> > > > > > > > -#include <pthread_np.h>
> > > > > > >> > > > > > > > -#include <sys/queue.h>
> > > > > > >> > > > > > > >  #include <sys/thr.h>
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > -#include <rte_debug.h>
> > > > > > >> > > > > > > > -#include <rte_atomic.h>
> > > > > > >> > > > > > > > -#include <rte_launch.h>
> > > > > > >> > > > > > > > -#include <rte_log.h>
> > > > > > >> > > > > > > > -#include <rte_memory.h>
> > > > > > >> > > > > > > > -#include <rte_memzone.h>
> > > > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > > > >> > > > > > > > -#include <rte_eal.h>
> > > > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > > > >> > > > > > > > -#include <rte_lcore.h>
> > > > > > >> > > > > > > > -
> > > > > > >> > > > > > > >  #include "eal_private.h"
> > > > > > >> > > > > > > >  #include "eal_thread.h"
> > > > > > >> > > > > > > >
> > > > > > >> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) =
> > > > LCORE_ID_ANY;
> > > > > > >> > > > > > > NAK, these are exported symbols, you can't remove
> > them
> > > > without
> > > > > > >> > > going
> > > > > > >> > > > > > > through the
> > > > > > >> > > > > > > deprecation process.
> > > > > > >> > > > > > >
> > > > > > >> > > > > > >
> > > > > > >> > > > > > They are not removed/deleted, they are moved from
> > > > eal_thread.c
> > > > > > >> to
> > > > > > >> > > > > > eal_common_thread.c file since it is common to both
> > Linux
> > > > and
> > > > > > >> BSD.
> > > > > > >> > > > > >
> > > > > > >> > > > > Then perhaps you forgot to export the symbol?  Its
> > showing
> > > > up as
> > > > > > >> > > removed
> > > > > > >> > > > > on the
> > > > > > >> > > > > ABI checker utility.
> > > > > > >> > > > >
> > > > > > >> > > > > Neil
> > > > > > >> > > > >
> > > > > > >> > > >
> > > > > > >> > > > Can you please show me in the current code where it is
> > being
> > > > > > >> exported? I
> > > > > > >> > > > have only moved definitions to _common_ files, not sure
> > why it
> > > > > > >> should be
> > > > > > >> > > > exported now.  I searched in the current code for
> > > > > > >> RTE_DEFINE_PER_LCORE
> > > > > > >> > > >
> > > > > > >> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE
> > *
> > > > > > >> > > > app/test/test_per_lcore.c:static
> > > > RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > >> test) =
> > > > > > >> > > > 0x12345678;
> > > > > > >> > > >
> > > > > > >>
> > > > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > > > >> > > >
> > > > > > >>
> > > > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > > > >> > > >
> > > > > > >> > >
> > > > > > >>
> > > >
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > > > >> > > > _cpuset);
> > > > > > >> > > >
> > > > > > >>
> > > > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > > > >> > > >
> > > > > > >>
> > > > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > > > >> > > >
> > > > > > >>
> > > >
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > > > >> > > > _cpuset);
> > > > > > >> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > > > > > >> > > > RTE_DEFINE_PER_LCORE(type, name)            \
> > > > > > >> > > > lib/librte_eal/common/include/rte_eal.h:    static
> > > > > > >> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > > > > > >> > > >
> > > > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > > > > > >> > > > _rte_errno);
> > > > > > >> > > > lib/librte_eal/common/eal_common_errno.c:    static
> > > > > > >> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> > > > > > >> > > >
> > > > > > >> > > >
> > > > > > >> > > > > > Thanks
> > > > > > >> > > > > > Ravi
> > > > > > >> > > > > >
> > > > > > >> > > > > > Regards
> > > > > > >> > > > > > > Neil
> > > > > > >> > > > > > >
> > > > > > >> > > > > > >
> > > > > > >> > > > >
> > > > > > >> > > Its exported in the version map file:
> > > > > > >> > >  per_lcore__lcore_id;
> > > > > > >> > >
> > > > > > >> > >
> > > > > > >> > Thanks Neil, I checked and both linux and bsd
> > rte_eal_version.map
> > > > have
> > > > > > >> it.
> > > > > > >> > I compared .map file between "changed code" and the original,
> > > > they are
> > > > > > >> same
> > > > > > >> > for both linux and bsd. In fact you had ACK'd v4 version of
> > this
> > > > patch
> > > > > > >> > series and no major changes after that. Please let me know if
> > I
> > > > missed
> > > > > > >> > something.
> > > > > > >> >
> > > > > > >> I did, and I'm retracting that, because I didn't think to check
> > the
> > > > ABI
> > > > > > >> compatibility on this.  But I ran it throught the ABI checking
> > > > script
> > > > > > >> this and
> > > > > > >> this error popped out.  You should run it as well, its in the
> > > > scripts
> > > > > > >> directory.
> > > > > > >>
> > > > > > >>
> > > > > > >> I see in your first patch you removed it and re-added it in the
> > > > common
> > > > > > >> section.
> > > > > > >> But something about how its building is causing it to not show
> > up
> > > > as an
> > > > > > >> exported
> > > > > > >> symbol, which is problematic, as other applications are going to
> > > > want
> > > > > > >> access to
> > > > > > >> it.
> > > > > > >>
> > > > > > >> It also possible that the ABI checker is throwing a false
> > positive,
> > > > but
> > > > > > >> either
> > > > > > >> way, it needs to be looked into prior to moving forward with
> > this.
> > > > > > >>
> > > > > > >>
> > > > > > > I did following things.
> > > > > > >
> > > > > > > Put a tag (v2.0.0-before-common-eal)  before EAL common functions
> > > > changes
> > > > > > > for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net: remove
> > > > unneeded
> > > > > > > include)
> > > > > > >
> > > > > > > Put a tag (v2.0.0-common-eal) after EAL common functions changes
> > for
> > > > > > > commit (25737e5a7212630a7b5d8ca756860a062f403789 Move common
> > > > functions in
> > > > > > > eal_pci.c)
> > > > > > >
> > > > > > > Ran validate-abi against x86_64-native-linuxapp-gcc and
> > > > > > >
> > > > > > > v2.0.0-rc3 and v2.0.0-before-common-eal, html report for
> > > > librte_eal.so
> > > > > > > shows removed symbols for "per_lcore__cpuset"
> > > > > > >
> > > > > > > v2.0.0-rc3 and v2.0.0-common-eal, html report for librte_eal.so
> > shows
> > > > > > > removed symbols for "per_lcore__cpuset"
> > > > > > >
> > > > > > > Removed symbol is different from what you have reported and in my
> > > > case I
> > > > > > > see it even before my commit. If you are interested I can unicast
> > > > you html
> > > > > > > report file. Please let me know how to proceed.
> > > > > > >
> > > > > > >
> > > > > >
> > > > > > I did some experiment and found some interesting things.  I will
> > take
> > > > eal.c
> > > > > > as an example
> > > > > >
> > > > > > eal.c is split into eal_common_sysfs.c eal_common_mem_cfg.c
> > > > > > eal_common_proc_type.c and eal_common_app_usage.c. In
> > > > linuxapp/eal/Makefile
> > > > > > if I compile new files right after eal.c as shown below
> > > > > >
> > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
> > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
> > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
> > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_proc_type.c
> > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_app_usage.c
> > > > > > ...
> > > > > >
> > > > > > validate-abi results matches baseline. Instead if i place new
> > _common_
> > > > > > files in common area in linuxapp/eal/Makefile as shown below
> > > > > >
> > > > > > # from common dir
> > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
> > > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
> > > > > > ...
> > > > > >
> > > > > > validate-abi reports problem in binary compatibility and source
> > > > > > compatiblity
> > > > > >
> > > > > > eal_filesystem.h, librte_eal.so.1
> > > > > >  [+] eal_parse_sysfs_value ( char const* filename, unsigned long*
> > val )
> > > > > >  @@ DPDK_2.0 (2)
> > > > > >
> > > > > > I believe files in common and linuxapp directory are compiled same
> > way
> > > > so
> > > > > > not sure why placement in makefile makes difference.
> > > > > >
> > > > > > Could this be false-positive from validate-abi script??
> > > > > >
> > > > > It could be, yes.  Though I'm more inclined to think that perhaps in
> > the
> > > > new
> > > > > version of the code we're not generating ithe same dwarf information
> > out
> > > > of it.
> > > > > In fact for some reason, I've checked both the build before and after
> > > > your
> > > > > patch series, and the exported CFLAGS aren't getting passed to the
> > build
> > > > > properly, implying that we're not building all the code in the
> > validator
> > > > with
> > > > > the -g flag, which the validator need to function properly.  I'm
> > looking
> > > > into
> > > > > that
> > > > > Neil
> > > > >
> > > > >
> > > > Found the problem, I was stupidly reading the report incorrectly.  The
> > > > problem
> > > > regarding _lcore_id is a source compatibilty issue (because the symbol
> > > > moved to
> > > > a new location), which is irrelevant to us.  Its not in any way a
> > binary
> > > > compat
> > > > problem, which is what we care about.  Sorry for the noise.
> > > >
> > > > I do still have a few concerns about some changed calling conventions
> > with
> > > > a few
> > > > other functions, which I'll look into on monday.
> > > >
> > > >
> > > Please let me know your inputs on changed calling conventions. Most of
> > them
> > > can be fixed by re-arranging moved code in _common_ files and order of
> > > compilation.
> > >
> > If moving the order of compliation around fixes the problem, then I am
> > reasonably convinced that it is, if not a false positive, a minor issue
> > with the
> > compilers dwarf information (The compiler just can't sanely change the
> > location
> > in which parameters are passed).  If you make those changes, I'll ACK
> > them, and
> > look into whats going on with the calling conventions
> >
> 
> Issues like the one shown below are taken care by reordering the code
> compilation.
> 
> eal_parse_sysfs_value ( char const* filename, unsigned long* val )
> 
> Change
> The parameter filename became passed on stack instead of rdi register
> 
> Effect
> Violation of the calling convention. This may result in crash or incorrect
> behavior of applications.
> 
> Last one that is left out is in
> 
> rte_thread_set_affinity ( rte_cpuset_t* p1 )
> 
> Change
> The parameter *p1* became passed in *rdi* register instead of stack.
> 
> Effect
> Violation of the calling convention. This may result in crash or incorrect
> behavior of applications.
> 
> After checking abi-0.99.pdf (x86-64.org) looks like for
> "rte_thread_set_affinity" new code is doing the right thing by passing the
> parameter in "rdi" register since pointer is classified as "integer_class".
> Nothing needs to be fixed here. After you confirm that warning can be
> ignored I will work on sending new revision.
> 
ACK then, send the new revision, this appears to be a false positive.

Thanks for taking the time to confirm.

Best
Neil

> Thanks,
> Ravi
> 
> 
> > Thanks!
> > Neil
> >
> > > Thanks,
> > > Ravi
> > >
> > > Regards
> > > > Neil
> > > >
> > > >
> >

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-27 13:44  0%                           ` Neil Horman
@ 2015-04-27 22:39  3%                             ` Ravi Kerur
  2015-04-28 19:35  0%                               ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Ravi Kerur @ 2015-04-27 22:39 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Mon, Apr 27, 2015 at 6:44 AM, Neil Horman <nhorman@tuxdriver.com> wrote:

> On Sat, Apr 25, 2015 at 05:09:01PM -0700, Ravi Kerur wrote:
> > On Sat, Apr 25, 2015 at 6:02 AM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
> >
> > > On Sat, Apr 25, 2015 at 08:32:42AM -0400, Neil Horman wrote:
> > > > On Fri, Apr 24, 2015 at 06:45:06PM -0700, Ravi Kerur wrote:
> > > > > On Fri, Apr 24, 2015 at 2:24 PM, Ravi Kerur <rkerur@gmail.com>
> wrote:
> > > > >
> > > > > >
> > > > > >
> > > > > > On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <
> nhorman@tuxdriver.com
> > > >
> > > > > > wrote:
> > > > > >
> > > > > >> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> > > > > >> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <
> > > nhorman@tuxdriver.com>
> > > > > >> wrote:
> > > > > >> >
> > > > > >> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> > > > > >> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <
> > > nhorman@tuxdriver.com
> > > > > >> >
> > > > > >> > > wrote:
> > > > > >> > > >
> > > > > >> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur
> wrote:
> > > > > >> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
> > > > > >> nhorman@tuxdriver.com>
> > > > > >> > > > > wrote:
> > > > > >> > > > > >
> > > > > >> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur
> > > wrote:
> > > > > >> > > > > > > > Changes in v7
> > > > > >> > > > > > > > Remove _setname_ pthread calls.
> > > > > >> > > > > > > > Use rte_gettid() API in RTE_LOG to print
> thread_id.
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > Changes in v6
> > > > > >> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from
> eal_common_thread.c
> > > file.
> > > > > >> > > > > > > > Add pthread_setname_np/pthread_set_name_np for
> > > Linux/FreeBSD
> > > > > >> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG
> when
> > > > > >> available.
> > > > > >> > > > > > > > Use existing rte_get_systid() in RTE_LOG to print
> > > thread_id.
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > Changes in v5
> > > > > >> > > > > > > > Rebase to latest code.
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > Changes in v4
> > > > > >> > > > > > > > None
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > Changes in v3
> > > > > >> > > > > > > > Changed subject to be more explicit on file name
> > > inclusion.
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > Changes in v2
> > > > > >> > > > > > > > None
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > Changes in v1
> > > > > >> > > > > > > > eal_thread.c has minor differences between Linux
> and
> > > BSD,
> > > > > >> move
> > > > > >> > > > > > > > entire file into common directory.
> > > > > >> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor
> > > > > >> differences.
> > > > > >> > > > > > > > Rename eal_thread.c to eal_common_thread.c
> > > > > >> > > > > > > > Makefile changes to reflect file move and name
> change.
> > > > > >> > > > > > > > Fix checkpatch warnings.
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > >> > > > > > > > ---
> > > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2
> +-
> > > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > > > > >> > > > > > > ------------------------------
> > > > > >> > > > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > > > > >> > > > > > > ++++++++++++++++++++++++++++-
> > > > > >> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > > > > >> > > > > > > +-----------------------------
> > > > > >> > > > > > > >  4 files changed, 148 insertions(+), 305
> deletions(-)
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > >> > > > > > > > index 2357cfa..55971b9 100644
> > > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > >> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o :=
> > > -D_GNU_SOURCE
> > > > > >> > > > > > > >  # workaround for a gcc bug with noreturn
> attribute
> > > > > >> > > > > > > >  #
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > > > >> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > > >> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > > >> > > > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > > > > >> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > > > >> > > > > > > >  endif
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > diff --git
> a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > >> > > > > > > > index 9a03437..5714b8f 100644
> > > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > >> > > > > > > > @@ -35,163 +35,11 @@
> > > > > >> > > > > > > >  #include <stdio.h>
> > > > > >> > > > > > > >  #include <stdlib.h>
> > > > > >> > > > > > > >  #include <stdint.h>
> > > > > >> > > > > > > > -#include <unistd.h>
> > > > > >> > > > > > > > -#include <sched.h>
> > > > > >> > > > > > > > -#include <pthread_np.h>
> > > > > >> > > > > > > > -#include <sys/queue.h>
> > > > > >> > > > > > > >  #include <sys/thr.h>
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > -#include <rte_debug.h>
> > > > > >> > > > > > > > -#include <rte_atomic.h>
> > > > > >> > > > > > > > -#include <rte_launch.h>
> > > > > >> > > > > > > > -#include <rte_log.h>
> > > > > >> > > > > > > > -#include <rte_memory.h>
> > > > > >> > > > > > > > -#include <rte_memzone.h>
> > > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > > >> > > > > > > > -#include <rte_eal.h>
> > > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > > >> > > > > > > > -#include <rte_lcore.h>
> > > > > >> > > > > > > > -
> > > > > >> > > > > > > >  #include "eal_private.h"
> > > > > >> > > > > > > >  #include "eal_thread.h"
> > > > > >> > > > > > > >
> > > > > >> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) =
> > > LCORE_ID_ANY;
> > > > > >> > > > > > > NAK, these are exported symbols, you can't remove
> them
> > > without
> > > > > >> > > going
> > > > > >> > > > > > > through the
> > > > > >> > > > > > > deprecation process.
> > > > > >> > > > > > >
> > > > > >> > > > > > >
> > > > > >> > > > > > They are not removed/deleted, they are moved from
> > > eal_thread.c
> > > > > >> to
> > > > > >> > > > > > eal_common_thread.c file since it is common to both
> Linux
> > > and
> > > > > >> BSD.
> > > > > >> > > > > >
> > > > > >> > > > > Then perhaps you forgot to export the symbol?  Its
> showing
> > > up as
> > > > > >> > > removed
> > > > > >> > > > > on the
> > > > > >> > > > > ABI checker utility.
> > > > > >> > > > >
> > > > > >> > > > > Neil
> > > > > >> > > > >
> > > > > >> > > >
> > > > > >> > > > Can you please show me in the current code where it is
> being
> > > > > >> exported? I
> > > > > >> > > > have only moved definitions to _common_ files, not sure
> why it
> > > > > >> should be
> > > > > >> > > > exported now.  I searched in the current code for
> > > > > >> RTE_DEFINE_PER_LCORE
> > > > > >> > > >
> > > > > >> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE
> *
> > > > > >> > > > app/test/test_per_lcore.c:static
> > > RTE_DEFINE_PER_LCORE(unsigned,
> > > > > >> test) =
> > > > > >> > > > 0x12345678;
> > > > > >> > > >
> > > > > >>
> > > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > > >> > > >
> > > > > >>
> > > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > > >> > > >
> > > > > >> > >
> > > > > >>
> > >
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > > >> > > > _cpuset);
> > > > > >> > > >
> > > > > >>
> > > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > > >> > > >
> > > > > >>
> > > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > > >> > > >
> > > > > >>
> > >
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > > >> > > > _cpuset);
> > > > > >> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > > > > >> > > > RTE_DEFINE_PER_LCORE(type, name)            \
> > > > > >> > > > lib/librte_eal/common/include/rte_eal.h:    static
> > > > > >> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > > > > >> > > >
> > > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > > > > >> > > > _rte_errno);
> > > > > >> > > > lib/librte_eal/common/eal_common_errno.c:    static
> > > > > >> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> > > > > >> > > >
> > > > > >> > > >
> > > > > >> > > > > > Thanks
> > > > > >> > > > > > Ravi
> > > > > >> > > > > >
> > > > > >> > > > > > Regards
> > > > > >> > > > > > > Neil
> > > > > >> > > > > > >
> > > > > >> > > > > > >
> > > > > >> > > > >
> > > > > >> > > Its exported in the version map file:
> > > > > >> > >  per_lcore__lcore_id;
> > > > > >> > >
> > > > > >> > >
> > > > > >> > Thanks Neil, I checked and both linux and bsd
> rte_eal_version.map
> > > have
> > > > > >> it.
> > > > > >> > I compared .map file between "changed code" and the original,
> > > they are
> > > > > >> same
> > > > > >> > for both linux and bsd. In fact you had ACK'd v4 version of
> this
> > > patch
> > > > > >> > series and no major changes after that. Please let me know if
> I
> > > missed
> > > > > >> > something.
> > > > > >> >
> > > > > >> I did, and I'm retracting that, because I didn't think to check
> the
> > > ABI
> > > > > >> compatibility on this.  But I ran it throught the ABI checking
> > > script
> > > > > >> this and
> > > > > >> this error popped out.  You should run it as well, its in the
> > > scripts
> > > > > >> directory.
> > > > > >>
> > > > > >>
> > > > > >> I see in your first patch you removed it and re-added it in the
> > > common
> > > > > >> section.
> > > > > >> But something about how its building is causing it to not show
> up
> > > as an
> > > > > >> exported
> > > > > >> symbol, which is problematic, as other applications are going to
> > > want
> > > > > >> access to
> > > > > >> it.
> > > > > >>
> > > > > >> It also possible that the ABI checker is throwing a false
> positive,
> > > but
> > > > > >> either
> > > > > >> way, it needs to be looked into prior to moving forward with
> this.
> > > > > >>
> > > > > >>
> > > > > > I did following things.
> > > > > >
> > > > > > Put a tag (v2.0.0-before-common-eal)  before EAL common functions
> > > changes
> > > > > > for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net: remove
> > > unneeded
> > > > > > include)
> > > > > >
> > > > > > Put a tag (v2.0.0-common-eal) after EAL common functions changes
> for
> > > > > > commit (25737e5a7212630a7b5d8ca756860a062f403789 Move common
> > > functions in
> > > > > > eal_pci.c)
> > > > > >
> > > > > > Ran validate-abi against x86_64-native-linuxapp-gcc and
> > > > > >
> > > > > > v2.0.0-rc3 and v2.0.0-before-common-eal, html report for
> > > librte_eal.so
> > > > > > shows removed symbols for "per_lcore__cpuset"
> > > > > >
> > > > > > v2.0.0-rc3 and v2.0.0-common-eal, html report for librte_eal.so
> shows
> > > > > > removed symbols for "per_lcore__cpuset"
> > > > > >
> > > > > > Removed symbol is different from what you have reported and in my
> > > case I
> > > > > > see it even before my commit. If you are interested I can unicast
> > > you html
> > > > > > report file. Please let me know how to proceed.
> > > > > >
> > > > > >
> > > > >
> > > > > I did some experiment and found some interesting things.  I will
> take
> > > eal.c
> > > > > as an example
> > > > >
> > > > > eal.c is split into eal_common_sysfs.c eal_common_mem_cfg.c
> > > > > eal_common_proc_type.c and eal_common_app_usage.c. In
> > > linuxapp/eal/Makefile
> > > > > if I compile new files right after eal.c as shown below
> > > > >
> > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
> > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
> > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
> > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_proc_type.c
> > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_app_usage.c
> > > > > ...
> > > > >
> > > > > validate-abi results matches baseline. Instead if i place new
> _common_
> > > > > files in common area in linuxapp/eal/Makefile as shown below
> > > > >
> > > > > # from common dir
> > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
> > > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
> > > > > ...
> > > > >
> > > > > validate-abi reports problem in binary compatibility and source
> > > > > compatiblity
> > > > >
> > > > > eal_filesystem.h, librte_eal.so.1
> > > > >  [+] eal_parse_sysfs_value ( char const* filename, unsigned long*
> val )
> > > > >  @@ DPDK_2.0 (2)
> > > > >
> > > > > I believe files in common and linuxapp directory are compiled same
> way
> > > so
> > > > > not sure why placement in makefile makes difference.
> > > > >
> > > > > Could this be false-positive from validate-abi script??
> > > > >
> > > > It could be, yes.  Though I'm more inclined to think that perhaps in
> the
> > > new
> > > > version of the code we're not generating ithe same dwarf information
> out
> > > of it.
> > > > In fact for some reason, I've checked both the build before and after
> > > your
> > > > patch series, and the exported CFLAGS aren't getting passed to the
> build
> > > > properly, implying that we're not building all the code in the
> validator
> > > with
> > > > the -g flag, which the validator need to function properly.  I'm
> looking
> > > into
> > > > that
> > > > Neil
> > > >
> > > >
> > > Found the problem, I was stupidly reading the report incorrectly.  The
> > > problem
> > > regarding _lcore_id is a source compatibilty issue (because the symbol
> > > moved to
> > > a new location), which is irrelevant to us.  Its not in any way a
> binary
> > > compat
> > > problem, which is what we care about.  Sorry for the noise.
> > >
> > > I do still have a few concerns about some changed calling conventions
> with
> > > a few
> > > other functions, which I'll look into on monday.
> > >
> > >
> > Please let me know your inputs on changed calling conventions. Most of
> them
> > can be fixed by re-arranging moved code in _common_ files and order of
> > compilation.
> >
> If moving the order of compliation around fixes the problem, then I am
> reasonably convinced that it is, if not a false positive, a minor issue
> with the
> compilers dwarf information (The compiler just can't sanely change the
> location
> in which parameters are passed).  If you make those changes, I'll ACK
> them, and
> look into whats going on with the calling conventions
>

Issues like the one shown below are taken care by reordering the code
compilation.

eal_parse_sysfs_value ( char const* filename, unsigned long* val )

Change
The parameter filename became passed on stack instead of rdi register

Effect
Violation of the calling convention. This may result in crash or incorrect
behavior of applications.

Last one that is left out is in

rte_thread_set_affinity ( rte_cpuset_t* p1 )

Change
The parameter *p1* became passed in *rdi* register instead of stack.

Effect
Violation of the calling convention. This may result in crash or incorrect
behavior of applications.

After checking abi-0.99.pdf (x86-64.org) looks like for
"rte_thread_set_affinity" new code is doing the right thing by passing the
parameter in "rdi" register since pointer is classified as "integer_class".
Nothing needs to be fixed here. After you confirm that warning can be
ignored I will work on sending new revision.

Thanks,
Ravi


> Thanks!
> Neil
>
> > Thanks,
> > Ravi
> >
> > Regards
> > > Neil
> > >
> > >
>

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [RFC PATCH] ethdev: remove old flow director API
  2015-04-20 16:45  0%   ` Venky Venkatesan
@ 2015-04-27 16:08  0%     ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-04-27 16:08 UTC (permalink / raw)
  To: Venky Venkatesan, Neil Horman; +Cc: dev

2015-04-20 09:45, Venky Venkatesan:
> On 04/20/2015 09:33 AM, Neil Horman wrote:
> > On Mon, Apr 20, 2015 at 04:11:43PM +0200, Thomas Monjalon wrote:
> >> It's time to remove this old API.
> >> It seems some work is still needed to rely only on eth_ctrl API.
> >> At least ixgbe, i40e and testpmd must be fixed.
> >> Jingjing, do you think it's possible to remove all these structures
> >> from rte_ethdev.h?
> >>
> >> Thanks
> >>
> > NAK.
> >
> > I'm certainly not opposed to removing the API's if they are truly no longer
> > needed.  But they have been codified as part of the ABI, so the deprecation
> > schedule needs to be followed.  Given what you've said above, it seems like that
> > might be worthwhile anyway, as it will provide the needed runway to allow users
> > to convert to the new API.
> >
> > Neil
> +1 NAK. Agree with Neil.

+1 Agree with you :)

The goal of this RFC proposal is to see how to progress on API cleanup.
There are actually 2 parts:
1/ The flow director functions of rte_ethdev.h were only used for enic in
DPDK 2.0. We could set a deprecation notice to remove them in DPDK 2.2.
2/ Some associated structures are also used for rte_eth_conf.
My question was to check how it would be relevant to remove this rte_fdir_conf.

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] Beyond DPDK 2.0
  2015-04-27 13:39  0%                 ` Wiles, Keith
@ 2015-04-27 15:34  0%                   ` Marc Sune
  0 siblings, 0 replies; 200+ results
From: Marc Sune @ 2015-04-27 15:34 UTC (permalink / raw)
  To: Wiles, Keith, Neil Horman; +Cc: dev



On 27/04/15 15:39, Wiles, Keith wrote:
>
> On 4/27/15, 4:52 AM, "Marc Sune" <marc.sune@bisdn.de> wrote:
>
>>
>> On 27/04/15 03:41, Wiles, Keith wrote:
>>> On 4/26/15, 4:56 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>>>
>>>> On Sat, Apr 25, 2015 at 04:08:23PM +0000, Wiles, Keith wrote:
>>>>> On 4/25/15, 8:30 AM, "Marc Sune" <marc.sune@bisdn.de> wrote:
>>>>>
>>>>>> On 24/04/15 19:51, Matthew Hall wrote:
>>>>>>> On Fri, Apr 24, 2015 at 12:39:47PM -0500, Jay Rolette wrote:
>>>>>>>> I can tell you that if DPDK were GPL-based, my company wouldn't be
>>>>>>>> using
>>>>>>>> it. I suspect we wouldn't be the only ones...
>>>>>>>>
>>>>>>>> Jay
>>>>>>> I could second this, from the past employer where I used it. Right
>>>>> now
>>>>>>> I am
>>>>>>> using it in an open source app, I have a bit of GPL here and there
>>>>> but
>>>>>>> I'm
>>>>>>> trying to get rid of it or confine it to separate address spaces,
>>>>> where
>>>>>>> it
>>>>>>> won't impact the core code written around DPDK, as I don't want to
>>>>> cause
>>>>>>> headaches for any downstream users I attract someday.
>>>>>>>
>>>>>>> Hard-core GPL would not be possible for most. LGPL could be
>>>>>>> possible,
>>>>>>> but I
>>>>>>> don't think it could be worth the relicensing headache for that
>>>>>>> small
>>>>>>> change.
>>>>>>>
>>>>>>> Instead we should make the patch process as easy as humanly possible
>>>>> so
>>>>>>> people
>>>>>>> are encouraged to send us the fixes and not cart them around their
>>>>>>> companies
>>>>>>> constantly.
>>>>> +1 and besides the GPL or LGPL ship has sailed IMHO and we can not go
>>>>> back.
>>>> Actually, IANAL, but I think we can.  The BSD license allows us to fork
>>>> and
>>>> relicense the code I think, under GPL or any other license.  I'm not
>>>> advocating
>>>> for that mind you, just suggesting that its possible should it ever
>>>> become
>>>> needed.
>>>>
>>>>>> I agree. My feeling is that as the number of patches in the mailing
>>>>> list
>>>>>> grows, keeping track of them gets more and more complicated.
>>>>>> Patchwork
>>>>>> website was a way to try to address this issue. I think it was an
>>>>>> improvement, but to be honest, patchwork lacks a lot of
>>>>>> functionality,
>>>>>> such as properly tracking multiple versions of the patch (superseding
>>>>>> them automatically), and it lacks some filtering capabilities e.g.
>>>>>> per
>>>>>> user, per tag/label or library, automatically track if it has been
>>>>>> merged, give an overall status of the pending vs merged patches, set
>>>>>> milestones... Is there any alternative tool or improved version for
>>>>> that?
>>>>>
>>>> Agreed, this has come up before, off list unfortunately.  The volume of
>>>> patches
>>>> seems to be increasing at such a rate that a single maintainer has
>>>> difficulty
>>>> keeping up.  I proposed that the workload be split out to multiple
>>>> subtrees,
>>>> with prefixes being added to patch subjects on the list for local
>>>> filtering to
>>>> stem the tide.  Specifically I had proposed that the PMD's be split
>>>> into a
>>>> separate subtree, but that received pushback in favor of having each
>>>> library
>>>> having its own separate subtree, with a pilot program being made out of
>>>> the I40e
>>>> driver (which you might note sends pull requests to the list now).  I'd
>>>> still
>>>> like to see all PMD's come under a single subtree, but thats likely an
>>>> argument
>>>> for later.
>>>>
>>>> That said, Do you think that this patch latency is really a contributor
>>>> to low
>>>> project participation?  It definately a problem, but it seems to me
>>>> that
>>>> this
>>>> sort of issue would lead to people trying to parcitipate, then giving
>>>> up
>>>> (i.e.
>>>> we would see 1-2 emails from an individual, then not see them again).
>>>> I'd need
>>>> to look through the mailing list for such a pattern, but anecdotally
>>>> I've
>>>> not
>>>> seen that happen.  The problem you describe above is definately a
>>>> problem, but
>>>> its one for those individuals who are participating, not for those who
>>>> are
>>>> simply choosing not to.  And I think we need to address both.
>>>>
>>>>> I agree patchwork has some limitation, but I think the biggest issue
>>>>> is
>>>>> keeping up with the patches. Getting patches introduced into the main
>>>>> line
>>>>> is very slow. A patch submitted today may not get applied for weeks or
>>>>> months, then when another person submits a patch he is starting to
>>>>> run a
>>>>> very high risk of having to redo that patch, because a pervious patch
>>>>> makes his fail weeks/months later. I would love to see a better tool
>>>>> then
>>>>> patchwork, but the biggest issue is we have a huge backlog of patches.
>>>>> Personally I am not sure how Thomas or any is able to keep up with the
>>>>> patches.
>>>>>
>>>> This is absolutely a problem.  I'd like to think, more than a tool like
>>>> patchwork, a subtree organization to allow some modicum of parallel
>>>> review and
>>>> integration would really be a benefit here.
>>> Subtrees could work, but the real problem I think is the number of
>>> committers must be higher then one. Something like GitHub (and I assume
>>> Linux Foundation) have a method to add committers to a project. In the
>>> case of GitHub they just have to have a free GitHub account and they can
>>> become committers of the project buying the owner of the project enables
>>> them.
>>>
>>> On GitHub they have personal accounts and organization accounts I know
>>> only about the personal accounts, but they allow for 5 private repos and
>>> any number of public repos. The organization account has a lot of extra
>>> features that seem better for a DPDK community IMO and should be the one
>>> we use if we decide it is the right direction. We can always give it a
>>> shot for while and keep the dpdk.org and use dev@dpdk.org and its repo
>>> mirrored from GitHub as a transition phase. This way we can fall back to
>>> dpdk.org or move one to something else if we like.
>>>
>>> https://help.github.com/categories/organizations/
>>>
>>> The developers could still send patches via email list, but creating a
>>> repo and forking dpdk is easy, then send a pull request.
>> For the github "community" or free service, organization accounts just
>> allow you to set teams, where each time can be assigned to one or more
>> repositories. The differences are summarized here:
>>
>> https://help.github.com/articles/what-s-the-difference-between-user-and-or
>> ganization-accounts/
>>
>> And the permission schema, per team, is summarized here:
>>
>> https://help.github.com/articles/permission-levels-for-an-organization-rep
>> ository/
>>
>> Some limitations: i) only if the team has write permissions (IOW push
>> permissions) you can manage issues ii) there cannot be per-branch ACLs.
> I was assuming the organization GitHub is just to allow more then one
> admin/maintainers along with teams if needed. I would assume the repos are
> still public and others are allowed to fork or pull the repos. I think of
> the org version is just extra controls on top of a personal repo like
> design. The org/personal one should appear to the
> non-maintainers/admins/owner as a normal repo on GitHub, correct?

Right

>
> The GitHub organization is built for open-source and you can still have
> private repos, but then you start to have a cost depending on the number
> of private repos you want. If you do not have a lot of private repos then
> you should have no cost (I think). I do not see any reason for private
> repos, but I guest we could have some and we get 5 free and 10 is $25 per
> month.

I don't see the reason either, and I don't know why private repos would 
be useful here.

>>>
>>>>> The other problem I see is how patches are agreed on to be included in
>>>>> the
>>>>> mainline. Today it is just an ACK or a NAK on the mailing list. Then I
>>>>> see
>>>>> what I think to be only a few people ACKing or NAKing patches. This
>>>>> process has a lot of problems from a patch being ignore for some
>>>>> reason
>>>>> or
>>>>> someone having negative feed back on very minor detail or no way to
>>>>> push a
>>>>> patch forward a single NAK or comment.
>>>>>
>>>> So, this is an interesting issue in ideal meritocracies.  Currently
>>>> is/should be
>>>> looking for ACKs/NAK/s from the individuals listed in the MAINTAINER
>>>> files, and
>>>> those people should be the definitive subject matter experts on the
>>>> code
>>>> they
>>>> cover.  As such, I would agrue that they should be entitled to a
>>>> modicum
>>>> of
>>>> stylistic/trivial leeway.  That is to say, if they choose to block a
>>>> patch
>>>> around a very minor detail, then between them changing their position,
>>>> and the
>>>> patch author changing the code, the latter is likely the easier course
>>>> of
>>>> action, especially if the author can't make an argument for their
>>>> position.
>>>> That said, if such patch blockage becomes so egregious that individuals
>>>> stop
>>>> contributing, that needs to be known as well.  If you as a patch
>>>> author:
>>>>
>>>> 1) Have tried to submit patches
>>>> 2) Had them blocked for what you consider trivial reasons
>>>> 3) Plan to not contribute further because of this
>>>> 4) Still rely on the DPDK for your product
>>>>
>>>> Please, say something.  People in charge need to know when they're
>>>> pushing
>>>> contributors away.
>>>>
>>>> FWIW, I've tried to do some correlation between the git history and the
>>>> mailing
>>>> list.  I need to do more searches, but I have a feeling that early on,
>>>> the
>>>> majority of people who stopped contributing, did so because their
>>>> patches
>>>> weren't expressely blocked, but rather because they were simply
>>>> ignored.
>>>> No one
>>>> working on DPDK bothered to review those patches, and so they never got
>>>> merged.
>>>> Hopefully that problem has been addressed somewhat now.
>> I agree 100%
>>>>> I would like to see some type of layering process to allow patches to
>>>>> be
>>>>> applied in a timely manner a few weeks not months or completely
>>>>> ignored.
>>>>> Maybe some type of voting is reasonable, but we need to do something
>>>>> to
>>>>> turn around the patches in clean reasonable manner.
>>>>>
>>>>> Think we need some type of group meeting every week to look at the
>>>>> patches
>>>>> and determining which ones get applied, this gives quick feedback to
>>>>> the
>>>>> submitter as to the status of the patch.
>>>>>
>>>> I think a group meeting is going to be way too much overhead to manage
>>>> properly.
>>>> You'll get different people every week with agenda that may not line up
>>>> with
>>>> code quality, which is really what the review is meant to provide.  I
>>>> think
>>> I was only suggesting the maintainers attend the meeting. Of course they
>>> have to attend or have someone attend for them, just to get the voting
>>> done. If you do not attend then you do not get to vote or something like
>>> that is reasonable. Not that we should try and define the process here.
>>>
>>>> perhaps a better approach would be to require that that code owners
>>>> from
>>>> the
>>>> maintainer file provide and ACK/NAK on their patches within 3-4 days,
>>>> and
>>>> require a corresponding tree maintainer to apply the patch within 7 or
>>>> so.  That
>>>> would cap our patch latency.  Likewise, if a patch slips in creating a
>>>> regression, the author needs to be alerted and given a time window in
>>>> which to
>>>> fix the problem before the offending patch is reverted during the QE
>>>> cycle.
>>>>
>>>>
>>>>>> On the other side, since user questions, community discussions and
>>>>>> development happens in the same mailing list, things get really
>>>>>> complicated, specially for users seeking for help. Even though I
>>>>>> think
>>>>>> the average skills of the users of DPDK is generally higher than in
>>>>>> other software projects, if DPDK wants to attract more users, having
>>>>>> a
>>>>>> better user support is key, IMHO.
>>>>>>
>>>>>> So I would see with good eyes a separation between, at least,
>>>>>> dpdk-user
>>>>>> and dpdk-dev.
>>>> I wouldn't argue with this separation, seems like a reasonable
>>>> approach.
>>>>
>>>>> I do not remember seeing too many users on the list and making a list
>>>>> just
>>>>> for then is OK if everyone is fine with a list that has very few
>>>>> emails.
>>>>>> If the number of patches keeps growing, splitting the "dev" mailing
>>>>>> lists into different categories (eal and common, pmds, higher level
>>>>>> abstractions...) could be an option. However, this last point opens a
>>>>>> lot of questions on how to minimize interference between the
>>>>>> different
>>>>>> parts and API/ABI compatibility during the development.
>>>>> I believe if we just make sure we use tags in the subject line then we
>>>>> can
>>>>> have our email clients do the splitting of the emails instead of
>>>>> adding
>>>>> more emails lists.
>>>>>
>>>> Agreed
>> I think it is a good idea too. Maybe we can standardize some format e.g.
>> [TAG][PATCH vX], or something like that.
>>
>>>>>>> Perhaps it means having some ReviewBoard type of tools, a clone in
>>>>>>> Github or
>>>>>>> Bitbucket where the less hardcore kernel-workflow types could send
>>>>> back
>>>>>>> their
>>>>>>> small bug fixes a bit more easily, this kind of stuff. Google has
>>>>> been
>>>>>>> getting
>>>>>>> good uptake since they moved most of their open source across to
>>>>> Github,
>>>>>>> because the contribution workflow was more convenient than Google
>>>>> Code
>>>>>>> was.
>>>>> I like GitHub it is a much better designed tool then patchwork, plus
>>>>> it
>>>>> could get more eyes as it is very well know to the developer community
>>>>> in
>>>>> general. I feel GitHub has many advantages over the current systems in
>>>>> place but, it does not solve the all patch issues.
>>>>>
>>>> Github is actually a bit irritating for this sort of thing, as it
>>>> presumes a web
>>>> based interface for discussion.  They have some modicum of email
>>>> forwarding
>>>> enabled, but it has never quite worked right, or integrated properly.
>> An alternative to githubs and bitbuckets is a self-hosted forge, like
>> gitlab:
>>
>> https://about.gitlab.com/
>>
>> To be honest, I mostly work on open-source repositories, and in our
>> organization we use only gitlab for private repositories, so I haven't
>> played that much with it. But it seems to do its job and has almost all
>> of the features of the "community" github, if not more. I don't know if
>> you can even integrate it with github's accounts somehow, to prevent to
>> have to register.
>>
>> However, one of the important points of using github/bitbucket is
>> visibility and ease the contribution process. By using an self-hosted
>> solution, even if it is similar to github and well advertised in DPDK's
>> website, you kind of loose part of that advantage.
> I would suggest we use GitHub then picking yet another not as well know
> Git Repo system, if we decide to change.

I agree. I was just pointing out this as an option instead of 
github/bitbucket. Basically to (still) self-host the repository and tools.

>>> Email forwarding has seemed to work for me and in one case it took a bit
>>> to have GitHub stop sending me emails on a repo I did not want anymore
>>> :-)
>>>>> The only way we can get patch issues resolved is to put a bit more
>>>>> process
>>>>> in place.
>>>>>> Although I agree, we have to be careful on how github or bitbucket is
>>>>>> used. Having issues or even (e.g. github) pull requests *in addition*
>>>>> to
>>>>>> the normal contribution workflow can be a nightmare to deal with, in
>>>>>> terms of synchronization and preventing double work. So I guess
>>>>>> setting
>>>>>> up an official github or bitbucket mirror would be fine, via some
>>>>> simple
>>>>>> cronjob, but I guess it would end-up not using PRs or issues in
>>>>>> github
>>>>>> like the Linux kernel does.
>>>> 100% agree, we can't be split about this.  Allowing contributions from
>>>> n
>>>> channels just means most developers will only see/reviews 1/nth of the
>>>> patches
>>>> of interest to them.
>>> If we setup a GitHub or some other site, we would need to make Github
>>> the
>>> primary site to remove this type of problem IMO.
>> You mean changing the workflow from email based to issues and pull-req
>> or github pull req? Do you really think this is possible?
> Yes, I think pull-req is the standard GitHub method as everyone needs a
> repo anyway. If we can figure out how to integrate the email patches that
> would be great.

I think it is quite complicated. It needs to be completely seemless or 
it won't work, and we will have part of the discussions in the mailing 
list, and part in the pull-req issues.

I would think it the other way around => pull requests are "echoed" to 
the mailing list to be discussed there, and always CCed (how) to the 
issue to capture the discussion there too. Not trivial at all.

marc

>>>>>   From what I can tell GitHub seems to be a better solution for a free
>>>>> open
>>>>> environment. Bitbucket I have never used and GitHub seems more popular
>>>>> from one article I read.
>>>>>
>>>>>
>>>>>
>>>>> https://www.google.com/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UT
>>>>> F-
>>>>> 8#
>>>>> q=bitbucket%20vs%20github
>>>>>
>>>>>
>>>>>> Btw, is this github organization already registered by Intel or some
>>>>>> other company of the community?
>>>>>>
>>>>>> https://github.com/dpdk
>>>>>>
>>> I was hoping someone would own up to the GitHub dpdk site.
>> Just wanted to know if this was the case. But, even if that would not be
>> the case, I *guess* that, as it happens with other services like
>> twitter, facebook..., Intel could claim the user, since it has the
>> registered trademark.
>>
>> marc
>>
>>>>>> Marc
>>>>> If we can used the above that would be great, but a name like
>>>>> Œdpdk-community¹ or something could work too.
>>>>>
>>>>> We can host the web site here and have many sub-projects like
>>>>> Pktgen-DPDK
>>>>> :-) under the same page. Not to say anything bad about our current web
>>>>> pages as I find it difficult to use sometimes and find things like
>>>>> patchwork link. Maintaining a web site is a full time job and GitHub
>>>>> does
>>>>> maintain the site, plus we can collaborate on host web page on the
>>>>> GitHub
>>>>> site easier.
>>>>>
>>>>> Moving to the Linux Foundation is an option as well as it is very well
>>>>> know and has some nice ways to get your project promoted. It does
>>>>> have a
>>>>> few drawbacks in process handling and cost to state a few. The process
>>>>> model is all ready defined, which is good and bad it just depends on
>>>>> your
>>>>> needs IMO.
>>>>>
>>>>> Regards,
>>>>> ++Keith
>>>>>
>>>>>>> Matthew.

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] Beyond DPDK 2.0
  2015-04-27 10:29  0%               ` Neil Horman
@ 2015-04-27 13:50  0%                 ` Wiles, Keith
  0 siblings, 0 replies; 200+ results
From: Wiles, Keith @ 2015-04-27 13:50 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev



On 4/27/15, 5:29 AM, "Neil Horman" <nhorman@tuxdriver.com> wrote:

>On Mon, Apr 27, 2015 at 01:41:11AM +0000, Wiles, Keith wrote:
>> 
>> 
>> On 4/26/15, 4:56 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>> 
>> >On Sat, Apr 25, 2015 at 04:08:23PM +0000, Wiles, Keith wrote:
>> >> 
>> >> 
>> >> On 4/25/15, 8:30 AM, "Marc Sune" <marc.sune@bisdn.de> wrote:
>> >> 
>> >> >
>> >> >
>> >> >On 24/04/15 19:51, Matthew Hall wrote:
>> >> >> On Fri, Apr 24, 2015 at 12:39:47PM -0500, Jay Rolette wrote:
>> >> >>> I can tell you that if DPDK were GPL-based, my company wouldn't
>>be
>> >> >>>using
>> >> >>> it. I suspect we wouldn't be the only ones...
>> >> >>>
>> >> >>> Jay
>> >> >> I could second this, from the past employer where I used it. Right
>> >>now
>> >> >>I am
>> >> >> using it in an open source app, I have a bit of GPL here and there
>> >>but
>> >> >>I'm
>> >> >> trying to get rid of it or confine it to separate address spaces,
>> >>where
>> >> >>it
>> >> >> won't impact the core code written around DPDK, as I don't want to
>> >>cause
>> >> >> headaches for any downstream users I attract someday.
>> >> >>
>> >> >> Hard-core GPL would not be possible for most. LGPL could be
>>possible,
>> >> >>but I
>> >> >> don't think it could be worth the relicensing headache for that
>>small
>> >> >>change.
>> >> >>
>> >> >> Instead we should make the patch process as easy as humanly
>>possible
>> >>so
>> >> >>people
>> >> >> are encouraged to send us the fixes and not cart them around their
>> >> >>companies
>> >> >> constantly.
>> >> 
>> >> +1 and besides the GPL or LGPL ship has sailed IMHO and we can not go
>> >>back.
>> >Actually, IANAL, but I think we can.  The BSD license allows us to fork
>> >and
>> >relicense the code I think, under GPL or any other license.  I'm not
>> >advocating
>> >for that mind you, just suggesting that its possible should it ever
>>become
>> >needed.
>> >
>> >> >
>> >> >I agree. My feeling is that as the number of patches in the mailing
>> >>list
>> >> >grows, keeping track of them gets more and more complicated.
>>Patchwork
>> >> >website was a way to try to address this issue. I think it was an
>> >> >improvement, but to be honest, patchwork lacks a lot of
>>functionality,
>> >> >such as properly tracking multiple versions of the patch
>>(superseding
>> >> >them automatically), and it lacks some filtering capabilities e.g.
>>per
>> >> >user, per tag/label or library, automatically track if it has been
>> >> >merged, give an overall status of the pending vs merged patches, set
>> >> >milestones... Is there any alternative tool or improved version for
>> >>that?
>> >> 
>> >Agreed, this has come up before, off list unfortunately.  The volume of
>> >patches
>> >seems to be increasing at such a rate that a single maintainer has
>> >difficulty
>> >keeping up.  I proposed that the workload be split out to multiple
>> >subtrees,
>> >with prefixes being added to patch subjects on the list for local
>> >filtering to
>> >stem the tide.  Specifically I had proposed that the PMD's be split
>>into a
>> >separate subtree, but that received pushback in favor of having each
>> >library
>> >having its own separate subtree, with a pilot program being made out of
>> >the I40e
>> >driver (which you might note sends pull requests to the list now).  I'd
>> >still
>> >like to see all PMD's come under a single subtree, but thats likely an
>> >argument
>> >for later.
>> >
>> >That said, Do you think that this patch latency is really a contributor
>> >to low
>> >project participation?  It definately a problem, but it seems to me
>>that
>> >this
>> >sort of issue would lead to people trying to parcitipate, then giving
>>up
>> >(i.e.
>> >we would see 1-2 emails from an individual, then not see them again).
>> >I'd need
>> >to look through the mailing list for such a pattern, but anecdotally
>>I've
>> >not
>> >seen that happen.  The problem you describe above is definately a
>> >problem, but
>> >its one for those individuals who are participating, not for those who
>>are
>> >simply choosing not to.  And I think we need to address both.
>> >
>> >> I agree patchwork has some limitation, but I think the biggest issue
>>is
>> >> keeping up with the patches. Getting patches introduced into the main
>> >>line
>> >> is very slow. A patch submitted today may not get applied for weeks
>>or
>> >> months, then when another person submits a patch he is starting to
>>run a
>> >> very high risk of having to redo that patch, because a pervious patch
>> >> makes his fail weeks/months later. I would love to see a better tool
>> >>then
>> >> patchwork, but the biggest issue is we have a huge backlog of
>>patches.
>> >> Personally I am not sure how Thomas or any is able to keep up with
>>the
>> >> patches.
>> >> 
>> >This is absolutely a problem.  I'd like to think, more than a tool like
>> >patchwork, a subtree organization to allow some modicum of parallel
>> >review and
>> >integration would really be a benefit here.
>> Subtrees could work, but the real problem I think is the number of
>> committers must be higher then one. Something like GitHub (and I assume
>> Linux Foundation) have a method to add committers to a project. In the
>> case of GitHub they just have to have a free GitHub account and they can
>> become committers of the project buying the owner of the project enables
>> them.
>> 
>> On GitHub they have personal accounts and organization accounts I know
>> only about the personal accounts, but they allow for 5 private repos and
>> any number of public repos. The organization account has a lot of extra
>> features that seem better for a DPDK community IMO and should be the one
>> we use if we decide it is the right direction. We can always give it a
>> shot for while and keep the dpdk.org and use dev@dpdk.org and its repo
>> mirrored from GitHub as a transition phase. This way we can fall back to
>> dpdk.org or move one to something else if we like.
>> 
>> https://help.github.com/categories/organizations/
>> 
>> The developers could still send patches via email list, but creating a
>> repo and forking dpdk is easy, then send a pull request.
>> 
>I'm not opposed to github per-se, but nothing described above is unique to
>github. Theres no reason we can't allow multiple comitters to the current
>tree
>as hosted on the current server, we just have to configure it as such.
>
>And FWIW, the assumption is that, with multiple subtrees, you implicitly
>have
>multiple comitters, assuming that pull requests from those subtree
>maintainers
>are trusted by the top level tree maintainer.
>
>In fact I feel somewhat better about that model as it provides a nice
>stairstep
>integration path for new features.
>
>Not explicitly opposed to a movement to github, I just feel like it may
>not
>address the problem at hand.

As I see your concerns is creating multiple repos or splitting up the
current repo, which can be done in a single GitHub org account and they
all appear on the page. This way we can move the current other repos like
Pktgen to this location and everyone sees all of the repos in a much
easier way IMO. The org account at GitHub gives you the multiple
committers and even teams. I see we only need one team today for DPDK repo
and then we have something like Pktgen as a different team and so on.
>
>> 
>> >
>> >> The other problem I see is how patches are agreed on to be included
>>in
>> >>the
>> >> mainline. Today it is just an ACK or a NAK on the mailing list. Then
>>I
>> >>see
>> >> what I think to be only a few people ACKing or NAKing patches. This
>> >> process has a lot of problems from a patch being ignore for some
>>reason
>> >>or
>> >> someone having negative feed back on very minor detail or no way to
>> >>push a
>> >> patch forward a single NAK or comment.
>> >> 
>> >
>> >So, this is an interesting issue in ideal meritocracies.  Currently
>> >is/should be
>> >looking for ACKs/NAK/s from the individuals listed in the MAINTAINER
>> >files, and
>> >those people should be the definitive subject matter experts on the
>>code
>> >they
>> >cover.  As such, I would agrue that they should be entitled to a
>>modicum
>> >of
>> >stylistic/trivial leeway.  That is to say, if they choose to block a
>>patch
>> >around a very minor detail, then between them changing their position,
>> >and the
>> >patch author changing the code, the latter is likely the easier course
>>of
>> >action, especially if the author can't make an argument for their
>> >position.
>> >That said, if such patch blockage becomes so egregious that individuals
>> >stop
>> >contributing, that needs to be known as well.  If you as a patch
>>author:
>> >
>> >1) Have tried to submit patches
>> >2) Had them blocked for what you consider trivial reasons
>> >3) Plan to not contribute further because of this
>> >4) Still rely on the DPDK for your product
>> >
>> >Please, say something.  People in charge need to know when they're
>>pushing
>> >contributors away.
>> >
>> >FWIW, I've tried to do some correlation between the git history and the
>> >mailing
>> >list.  I need to do more searches, but I have a feeling that early on,
>>the
>> >majority of people who stopped contributing, did so because their
>>patches
>> >weren't expressely blocked, but rather because they were simply
>>ignored.
>> >No one
>> >working on DPDK bothered to review those patches, and so they never got
>> >merged.
>> >Hopefully that problem has been addressed somewhat now.
>> >
>> >> I would like to see some type of layering process to allow patches
>>to be
>> >> applied in a timely manner a few weeks not months or completely
>>ignored.
>> >> Maybe some type of voting is reasonable, but we need to do something
>>to
>> >> turn around the patches in clean reasonable manner.
>> >> 
>> >> Think we need some type of group meeting every week to look at the
>> >>patches
>> >> and determining which ones get applied, this gives quick feedback to
>>the
>> >> submitter as to the status of the patch.
>> >> 
>> >I think a group meeting is going to be way too much overhead to manage
>> >properly.
>> >You'll get different people every week with agenda that may not line up
>> >with
>> >code quality, which is really what the review is meant to provide.  I
>> >think
>> 
>> I was only suggesting the maintainers attend the meeting. Of course they
>> have to attend or have someone attend for them, just to get the voting
>> done. If you do not attend then you do not get to vote or something like
>> that is reasonable. Not that we should try and define the process here.
>> 
>If you use multiple subtrees, theres no need for a meeting, or any sort of
>defiend process for voting, theres only an implicitly defined heirarchy of
>acceptance in bundled changesets.  If a desire of the community is to see
>more
>efficient review and lower changeset acceptance latency, it seems to be
>that a
>weekly meeting of any sort is somewhat anathema to that.

That is fine if you do not want a meeting, but just trying to figure out
the best solution to the problems.
>
>> >perhaps a better approach would be to require that that code owners
>>from
>> >the
>> >maintainer file provide and ACK/NAK on their patches within 3-4 days,
>>and
>> >require a corresponding tree maintainer to apply the patch within 7 or
>> >so.  That
>> >would cap our patch latency.  Likewise, if a patch slips in creating a
>> >regression, the author needs to be alerted and given a time window in
>> >which to
>> >fix the problem before the offending patch is reverted during the QE
>> >cycle.
>> >
>> >
>> >> >
>> >> >On the other side, since user questions, community discussions and
>> >> >development happens in the same mailing list, things get really
>> >> >complicated, specially for users seeking for help. Even though I
>>think
>> >> >the average skills of the users of DPDK is generally higher than in
>> >> >other software projects, if DPDK wants to attract more users,
>>having a
>> >> >better user support is key, IMHO.
>> >> >
>> >> >So I would see with good eyes a separation between, at least,
>>dpdk-user
>> >> >and dpdk-dev.
>> >> 
>> >I wouldn't argue with this separation, seems like a reasonable
>>approach.
>> >
>> >> I do not remember seeing too many users on the list and making a list
>> >>just
>> >> for then is OK if everyone is fine with a list that has very few
>>emails.
>> >> >
>> >> >If the number of patches keeps growing, splitting the "dev" mailing
>> >> >lists into different categories (eal and common, pmds, higher level
>> >> >abstractions...) could be an option. However, this last point opens
>>a
>> >> >lot of questions on how to minimize interference between the
>>different
>> >> >parts and API/ABI compatibility during the development.
>> >> 
>> >> I believe if we just make sure we use tags in the subject line then
>>we
>> >>can
>> >> have our email clients do the splitting of the emails instead of
>>adding
>> >> more emails lists.
>> >> 
>> >Agreed
>> >
>> >> >
>> >> >>
>> >> >> Perhaps it means having some ReviewBoard type of tools, a clone in
>> >> >>Github or
>> >> >> Bitbucket where the less hardcore kernel-workflow types could send
>> >>back
>> >> >>their
>> >> >> small bug fixes a bit more easily, this kind of stuff. Google has
>> >>been
>> >> >>getting
>> >> >> good uptake since they moved most of their open source across to
>> >>Github,
>> >> >> because the contribution workflow was more convenient than Google
>> >>Code
>> >> >>was.
>> >> 
>> >> I like GitHub it is a much better designed tool then patchwork, plus
>>it
>> >> could get more eyes as it is very well know to the developer
>>community
>> >>in
>> >> general. I feel GitHub has many advantages over the current systems
>>in
>> >> place but, it does not solve the all patch issues.
>> >> 
>> >Github is actually a bit irritating for this sort of thing, as it
>> >presumes a web
>> >based interface for discussion.  They have some modicum of email
>> >forwarding
>> >enabled, but it has never quite worked right, or integrated properly.
>> 
>> Email forwarding has seemed to work for me and in one case it took a bit
>> to have GitHub stop sending me emails on a repo I did not want anymore
>>:-)
>
>Forwarding works fine, its responding that doesn't usually work well.
>Emails
>from pull requests and issues are forwarded from a 'do not reply' address
>and
>responding requires that you visit the github page in a browser.  Thats
>especially trying with patch review, as there is no real concept of
>patches on a
>list, only pull requests which require that you pull a new branch in and
>review
>it.  Once you have to leave your MUA, your efficiency quickly goes down,
>which
>is what we're trying to avoid here.

OK, I see your point and yes that could be an issue, but not a huge impact
in efficiency IMO. Not everyone and not all of the time will you need to
go to the web site.
>
>
>> >
>> >> The only way we can get patch issues resolved is to put a bit more
>> >>process
>> >> in place.
>> >> >
>> >> >Although I agree, we have to be careful on how github or bitbucket
>>is
>> >> >used. Having issues or even (e.g. github) pull requests *in
>>addition*
>> >>to
>> >> >the normal contribution workflow can be a nightmare to deal with, in
>> >> >terms of synchronization and preventing double work. So I guess
>>setting
>> >> >up an official github or bitbucket mirror would be fine, via some
>> >>simple
>> >> >cronjob, but I guess it would end-up not using PRs or issues in
>>github
>> >> >like the Linux kernel does.
>> >> 
>> >100% agree, we can't be split about this.  Allowing contributions from
>>n
>> >channels just means most developers will only see/reviews 1/nth of the
>> >patches
>> >of interest to them.
>> 
>> If we setup a GitHub or some other site, we would need to make Github
>>the
>> primary site to remove this type of problem IMO.
>> >
>> >> From what I can tell GitHub seems to be a better solution for a free
>> >>open
>> >> environment. Bitbucket I have never used and GitHub seems more
>>popular
>> >> from one article I read.
>> >> 
>> >> 
>> 
>>>>https://www.google.com/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UT
>>>>F-
>> >>8#
>> >> q=bitbucket%20vs%20github
>> >> 
>> >> 
>> >> >Btw, is this github organization already registered by Intel or some
>> >> >other company of the community?
>> >> >
>> >> >https://github.com/dpdk
>> >> >
>> 
>> I was hoping someone would own up to the GitHub dpdk site.
>> 
>Hmm, looks almost defunct.  If no one steps up, perhaps reporting abuse
>for
>camping on the name might be worthwhile?  That would at least get their
>attention.

+1
>
>Neil
>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-26  0:09  0%                         ` Ravi Kerur
@ 2015-04-27 13:44  0%                           ` Neil Horman
  2015-04-27 22:39  3%                             ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-27 13:44 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Sat, Apr 25, 2015 at 05:09:01PM -0700, Ravi Kerur wrote:
> On Sat, Apr 25, 2015 at 6:02 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Sat, Apr 25, 2015 at 08:32:42AM -0400, Neil Horman wrote:
> > > On Fri, Apr 24, 2015 at 06:45:06PM -0700, Ravi Kerur wrote:
> > > > On Fri, Apr 24, 2015 at 2:24 PM, Ravi Kerur <rkerur@gmail.com> wrote:
> > > >
> > > > >
> > > > >
> > > > > On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <nhorman@tuxdriver.com
> > >
> > > > > wrote:
> > > > >
> > > > >> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> > > > >> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <
> > nhorman@tuxdriver.com>
> > > > >> wrote:
> > > > >> >
> > > > >> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> > > > >> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <
> > nhorman@tuxdriver.com
> > > > >> >
> > > > >> > > wrote:
> > > > >> > > >
> > > > >> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> > > > >> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
> > > > >> nhorman@tuxdriver.com>
> > > > >> > > > > wrote:
> > > > >> > > > > >
> > > > >> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur
> > wrote:
> > > > >> > > > > > > > Changes in v7
> > > > >> > > > > > > > Remove _setname_ pthread calls.
> > > > >> > > > > > > > Use rte_gettid() API in RTE_LOG to print thread_id.
> > > > >> > > > > > > >
> > > > >> > > > > > > > Changes in v6
> > > > >> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c
> > file.
> > > > >> > > > > > > > Add pthread_setname_np/pthread_set_name_np for
> > Linux/FreeBSD
> > > > >> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG when
> > > > >> available.
> > > > >> > > > > > > > Use existing rte_get_systid() in RTE_LOG to print
> > thread_id.
> > > > >> > > > > > > >
> > > > >> > > > > > > > Changes in v5
> > > > >> > > > > > > > Rebase to latest code.
> > > > >> > > > > > > >
> > > > >> > > > > > > > Changes in v4
> > > > >> > > > > > > > None
> > > > >> > > > > > > >
> > > > >> > > > > > > > Changes in v3
> > > > >> > > > > > > > Changed subject to be more explicit on file name
> > inclusion.
> > > > >> > > > > > > >
> > > > >> > > > > > > > Changes in v2
> > > > >> > > > > > > > None
> > > > >> > > > > > > >
> > > > >> > > > > > > > Changes in v1
> > > > >> > > > > > > > eal_thread.c has minor differences between Linux and
> > BSD,
> > > > >> move
> > > > >> > > > > > > > entire file into common directory.
> > > > >> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor
> > > > >> differences.
> > > > >> > > > > > > > Rename eal_thread.c to eal_common_thread.c
> > > > >> > > > > > > > Makefile changes to reflect file move and name change.
> > > > >> > > > > > > > Fix checkpatch warnings.
> > > > >> > > > > > > >
> > > > >> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > >> > > > > > > > ---
> > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> > > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > > > >> > > > > > > ------------------------------
> > > > >> > > > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > > > >> > > > > > > ++++++++++++++++++++++++++++-
> > > > >> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > > > >> > > > > > > +-----------------------------
> > > > >> > > > > > > >  4 files changed, 148 insertions(+), 305 deletions(-)
> > > > >> > > > > > > >
> > > > >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > >> > > > > > > > index 2357cfa..55971b9 100644
> > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > >> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o :=
> > -D_GNU_SOURCE
> > > > >> > > > > > > >  # workaround for a gcc bug with noreturn attribute
> > > > >> > > > > > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > > >> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > >> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > >> > > > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > > > >> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > > >> > > > > > > >  endif
> > > > >> > > > > > > >
> > > > >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > >> > > > > > > > index 9a03437..5714b8f 100644
> > > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > >> > > > > > > > @@ -35,163 +35,11 @@
> > > > >> > > > > > > >  #include <stdio.h>
> > > > >> > > > > > > >  #include <stdlib.h>
> > > > >> > > > > > > >  #include <stdint.h>
> > > > >> > > > > > > > -#include <unistd.h>
> > > > >> > > > > > > > -#include <sched.h>
> > > > >> > > > > > > > -#include <pthread_np.h>
> > > > >> > > > > > > > -#include <sys/queue.h>
> > > > >> > > > > > > >  #include <sys/thr.h>
> > > > >> > > > > > > >
> > > > >> > > > > > > > -#include <rte_debug.h>
> > > > >> > > > > > > > -#include <rte_atomic.h>
> > > > >> > > > > > > > -#include <rte_launch.h>
> > > > >> > > > > > > > -#include <rte_log.h>
> > > > >> > > > > > > > -#include <rte_memory.h>
> > > > >> > > > > > > > -#include <rte_memzone.h>
> > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > >> > > > > > > > -#include <rte_eal.h>
> > > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > > >> > > > > > > > -#include <rte_lcore.h>
> > > > >> > > > > > > > -
> > > > >> > > > > > > >  #include "eal_private.h"
> > > > >> > > > > > > >  #include "eal_thread.h"
> > > > >> > > > > > > >
> > > > >> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) =
> > LCORE_ID_ANY;
> > > > >> > > > > > > NAK, these are exported symbols, you can't remove them
> > without
> > > > >> > > going
> > > > >> > > > > > > through the
> > > > >> > > > > > > deprecation process.
> > > > >> > > > > > >
> > > > >> > > > > > >
> > > > >> > > > > > They are not removed/deleted, they are moved from
> > eal_thread.c
> > > > >> to
> > > > >> > > > > > eal_common_thread.c file since it is common to both Linux
> > and
> > > > >> BSD.
> > > > >> > > > > >
> > > > >> > > > > Then perhaps you forgot to export the symbol?  Its showing
> > up as
> > > > >> > > removed
> > > > >> > > > > on the
> > > > >> > > > > ABI checker utility.
> > > > >> > > > >
> > > > >> > > > > Neil
> > > > >> > > > >
> > > > >> > > >
> > > > >> > > > Can you please show me in the current code where it is being
> > > > >> exported? I
> > > > >> > > > have only moved definitions to _common_ files, not sure why it
> > > > >> should be
> > > > >> > > > exported now.  I searched in the current code for
> > > > >> RTE_DEFINE_PER_LCORE
> > > > >> > > >
> > > > >> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
> > > > >> > > > app/test/test_per_lcore.c:static
> > RTE_DEFINE_PER_LCORE(unsigned,
> > > > >> test) =
> > > > >> > > > 0x12345678;
> > > > >> > > >
> > > > >>
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > >> > > >
> > > > >>
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > >> > > >
> > > > >> > >
> > > > >>
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > >> > > > _cpuset);
> > > > >> > > >
> > > > >>
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > > >> > > >
> > > > >>
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > > >> > > >
> > > > >>
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > >> > > > _cpuset);
> > > > >> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > > > >> > > > RTE_DEFINE_PER_LCORE(type, name)            \
> > > > >> > > > lib/librte_eal/common/include/rte_eal.h:    static
> > > > >> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > > > >> > > >
> > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > > > >> > > > _rte_errno);
> > > > >> > > > lib/librte_eal/common/eal_common_errno.c:    static
> > > > >> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> > > > >> > > >
> > > > >> > > >
> > > > >> > > > > > Thanks
> > > > >> > > > > > Ravi
> > > > >> > > > > >
> > > > >> > > > > > Regards
> > > > >> > > > > > > Neil
> > > > >> > > > > > >
> > > > >> > > > > > >
> > > > >> > > > >
> > > > >> > > Its exported in the version map file:
> > > > >> > >  per_lcore__lcore_id;
> > > > >> > >
> > > > >> > >
> > > > >> > Thanks Neil, I checked and both linux and bsd rte_eal_version.map
> > have
> > > > >> it.
> > > > >> > I compared .map file between "changed code" and the original,
> > they are
> > > > >> same
> > > > >> > for both linux and bsd. In fact you had ACK'd v4 version of this
> > patch
> > > > >> > series and no major changes after that. Please let me know if I
> > missed
> > > > >> > something.
> > > > >> >
> > > > >> I did, and I'm retracting that, because I didn't think to check the
> > ABI
> > > > >> compatibility on this.  But I ran it throught the ABI checking
> > script
> > > > >> this and
> > > > >> this error popped out.  You should run it as well, its in the
> > scripts
> > > > >> directory.
> > > > >>
> > > > >>
> > > > >> I see in your first patch you removed it and re-added it in the
> > common
> > > > >> section.
> > > > >> But something about how its building is causing it to not show up
> > as an
> > > > >> exported
> > > > >> symbol, which is problematic, as other applications are going to
> > want
> > > > >> access to
> > > > >> it.
> > > > >>
> > > > >> It also possible that the ABI checker is throwing a false positive,
> > but
> > > > >> either
> > > > >> way, it needs to be looked into prior to moving forward with this.
> > > > >>
> > > > >>
> > > > > I did following things.
> > > > >
> > > > > Put a tag (v2.0.0-before-common-eal)  before EAL common functions
> > changes
> > > > > for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net: remove
> > unneeded
> > > > > include)
> > > > >
> > > > > Put a tag (v2.0.0-common-eal) after EAL common functions changes for
> > > > > commit (25737e5a7212630a7b5d8ca756860a062f403789 Move common
> > functions in
> > > > > eal_pci.c)
> > > > >
> > > > > Ran validate-abi against x86_64-native-linuxapp-gcc and
> > > > >
> > > > > v2.0.0-rc3 and v2.0.0-before-common-eal, html report for
> > librte_eal.so
> > > > > shows removed symbols for "per_lcore__cpuset"
> > > > >
> > > > > v2.0.0-rc3 and v2.0.0-common-eal, html report for librte_eal.so shows
> > > > > removed symbols for "per_lcore__cpuset"
> > > > >
> > > > > Removed symbol is different from what you have reported and in my
> > case I
> > > > > see it even before my commit. If you are interested I can unicast
> > you html
> > > > > report file. Please let me know how to proceed.
> > > > >
> > > > >
> > > >
> > > > I did some experiment and found some interesting things.  I will take
> > eal.c
> > > > as an example
> > > >
> > > > eal.c is split into eal_common_sysfs.c eal_common_mem_cfg.c
> > > > eal_common_proc_type.c and eal_common_app_usage.c. In
> > linuxapp/eal/Makefile
> > > > if I compile new files right after eal.c as shown below
> > > >
> > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
> > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
> > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
> > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_proc_type.c
> > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_app_usage.c
> > > > ...
> > > >
> > > > validate-abi results matches baseline. Instead if i place new _common_
> > > > files in common area in linuxapp/eal/Makefile as shown below
> > > >
> > > > # from common dir
> > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
> > > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
> > > > ...
> > > >
> > > > validate-abi reports problem in binary compatibility and source
> > > > compatiblity
> > > >
> > > > eal_filesystem.h, librte_eal.so.1
> > > >  [+] eal_parse_sysfs_value ( char const* filename, unsigned long* val )
> > > >  @@ DPDK_2.0 (2)
> > > >
> > > > I believe files in common and linuxapp directory are compiled same way
> > so
> > > > not sure why placement in makefile makes difference.
> > > >
> > > > Could this be false-positive from validate-abi script??
> > > >
> > > It could be, yes.  Though I'm more inclined to think that perhaps in the
> > new
> > > version of the code we're not generating ithe same dwarf information out
> > of it.
> > > In fact for some reason, I've checked both the build before and after
> > your
> > > patch series, and the exported CFLAGS aren't getting passed to the build
> > > properly, implying that we're not building all the code in the validator
> > with
> > > the -g flag, which the validator need to function properly.  I'm looking
> > into
> > > that
> > > Neil
> > >
> > >
> > Found the problem, I was stupidly reading the report incorrectly.  The
> > problem
> > regarding _lcore_id is a source compatibilty issue (because the symbol
> > moved to
> > a new location), which is irrelevant to us.  Its not in any way a binary
> > compat
> > problem, which is what we care about.  Sorry for the noise.
> >
> > I do still have a few concerns about some changed calling conventions with
> > a few
> > other functions, which I'll look into on monday.
> >
> >
> Please let me know your inputs on changed calling conventions. Most of them
> can be fixed by re-arranging moved code in _common_ files and order of
> compilation.
> 
If moving the order of compliation around fixes the problem, then I am
reasonably convinced that it is, if not a false positive, a minor issue with the
compilers dwarf information (The compiler just can't sanely change the location
in which parameters are passed).  If you make those changes, I'll ACK them, and
look into whats going on with the calling conventions

Thanks!
Neil

> Thanks,
> Ravi
> 
> Regards
> > Neil
> >
> >

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] Beyond DPDK 2.0
  2015-04-27  9:52  0%               ` Marc Sune
@ 2015-04-27 13:39  0%                 ` Wiles, Keith
  2015-04-27 15:34  0%                   ` Marc Sune
  0 siblings, 1 reply; 200+ results
From: Wiles, Keith @ 2015-04-27 13:39 UTC (permalink / raw)
  To: Marc Sune, Neil Horman; +Cc: dev



On 4/27/15, 4:52 AM, "Marc Sune" <marc.sune@bisdn.de> wrote:

>
>
>On 27/04/15 03:41, Wiles, Keith wrote:
>>
>> On 4/26/15, 4:56 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>>
>>> On Sat, Apr 25, 2015 at 04:08:23PM +0000, Wiles, Keith wrote:
>>>>
>>>> On 4/25/15, 8:30 AM, "Marc Sune" <marc.sune@bisdn.de> wrote:
>>>>
>>>>>
>>>>> On 24/04/15 19:51, Matthew Hall wrote:
>>>>>> On Fri, Apr 24, 2015 at 12:39:47PM -0500, Jay Rolette wrote:
>>>>>>> I can tell you that if DPDK were GPL-based, my company wouldn't be
>>>>>>> using
>>>>>>> it. I suspect we wouldn't be the only ones...
>>>>>>>
>>>>>>> Jay
>>>>>> I could second this, from the past employer where I used it. Right
>>>> now
>>>>>> I am
>>>>>> using it in an open source app, I have a bit of GPL here and there
>>>> but
>>>>>> I'm
>>>>>> trying to get rid of it or confine it to separate address spaces,
>>>> where
>>>>>> it
>>>>>> won't impact the core code written around DPDK, as I don't want to
>>>> cause
>>>>>> headaches for any downstream users I attract someday.
>>>>>>
>>>>>> Hard-core GPL would not be possible for most. LGPL could be
>>>>>>possible,
>>>>>> but I
>>>>>> don't think it could be worth the relicensing headache for that
>>>>>>small
>>>>>> change.
>>>>>>
>>>>>> Instead we should make the patch process as easy as humanly possible
>>>> so
>>>>>> people
>>>>>> are encouraged to send us the fixes and not cart them around their
>>>>>> companies
>>>>>> constantly.
>>>> +1 and besides the GPL or LGPL ship has sailed IMHO and we can not go
>>>> back.
>>> Actually, IANAL, but I think we can.  The BSD license allows us to fork
>>> and
>>> relicense the code I think, under GPL or any other license.  I'm not
>>> advocating
>>> for that mind you, just suggesting that its possible should it ever
>>>become
>>> needed.
>>>
>>>>> I agree. My feeling is that as the number of patches in the mailing
>>>> list
>>>>> grows, keeping track of them gets more and more complicated.
>>>>>Patchwork
>>>>> website was a way to try to address this issue. I think it was an
>>>>> improvement, but to be honest, patchwork lacks a lot of
>>>>>functionality,
>>>>> such as properly tracking multiple versions of the patch (superseding
>>>>> them automatically), and it lacks some filtering capabilities e.g.
>>>>>per
>>>>> user, per tag/label or library, automatically track if it has been
>>>>> merged, give an overall status of the pending vs merged patches, set
>>>>> milestones... Is there any alternative tool or improved version for
>>>> that?
>>>>
>>> Agreed, this has come up before, off list unfortunately.  The volume of
>>> patches
>>> seems to be increasing at such a rate that a single maintainer has
>>> difficulty
>>> keeping up.  I proposed that the workload be split out to multiple
>>> subtrees,
>>> with prefixes being added to patch subjects on the list for local
>>> filtering to
>>> stem the tide.  Specifically I had proposed that the PMD's be split
>>>into a
>>> separate subtree, but that received pushback in favor of having each
>>> library
>>> having its own separate subtree, with a pilot program being made out of
>>> the I40e
>>> driver (which you might note sends pull requests to the list now).  I'd
>>> still
>>> like to see all PMD's come under a single subtree, but thats likely an
>>> argument
>>> for later.
>>>
>>> That said, Do you think that this patch latency is really a contributor
>>> to low
>>> project participation?  It definately a problem, but it seems to me
>>>that
>>> this
>>> sort of issue would lead to people trying to parcitipate, then giving
>>>up
>>> (i.e.
>>> we would see 1-2 emails from an individual, then not see them again).
>>> I'd need
>>> to look through the mailing list for such a pattern, but anecdotally
>>>I've
>>> not
>>> seen that happen.  The problem you describe above is definately a
>>> problem, but
>>> its one for those individuals who are participating, not for those who
>>>are
>>> simply choosing not to.  And I think we need to address both.
>>>
>>>> I agree patchwork has some limitation, but I think the biggest issue
>>>>is
>>>> keeping up with the patches. Getting patches introduced into the main
>>>> line
>>>> is very slow. A patch submitted today may not get applied for weeks or
>>>> months, then when another person submits a patch he is starting to
>>>>run a
>>>> very high risk of having to redo that patch, because a pervious patch
>>>> makes his fail weeks/months later. I would love to see a better tool
>>>> then
>>>> patchwork, but the biggest issue is we have a huge backlog of patches.
>>>> Personally I am not sure how Thomas or any is able to keep up with the
>>>> patches.
>>>>
>>> This is absolutely a problem.  I'd like to think, more than a tool like
>>> patchwork, a subtree organization to allow some modicum of parallel
>>> review and
>>> integration would really be a benefit here.
>> Subtrees could work, but the real problem I think is the number of
>> committers must be higher then one. Something like GitHub (and I assume
>> Linux Foundation) have a method to add committers to a project. In the
>> case of GitHub they just have to have a free GitHub account and they can
>> become committers of the project buying the owner of the project enables
>> them.
>>
>> On GitHub they have personal accounts and organization accounts I know
>> only about the personal accounts, but they allow for 5 private repos and
>> any number of public repos. The organization account has a lot of extra
>> features that seem better for a DPDK community IMO and should be the one
>> we use if we decide it is the right direction. We can always give it a
>> shot for while and keep the dpdk.org and use dev@dpdk.org and its repo
>> mirrored from GitHub as a transition phase. This way we can fall back to
>> dpdk.org or move one to something else if we like.
>>
>> https://help.github.com/categories/organizations/
>>
>> The developers could still send patches via email list, but creating a
>> repo and forking dpdk is easy, then send a pull request.
>
>For the github "community" or free service, organization accounts just
>allow you to set teams, where each time can be assigned to one or more
>repositories. The differences are summarized here:
>
>https://help.github.com/articles/what-s-the-difference-between-user-and-or
>ganization-accounts/
>
>And the permission schema, per team, is summarized here:
>
>https://help.github.com/articles/permission-levels-for-an-organization-rep
>ository/
>
>Some limitations: i) only if the team has write permissions (IOW push
>permissions) you can manage issues ii) there cannot be per-branch ACLs.

I was assuming the organization GitHub is just to allow more then one
admin/maintainers along with teams if needed. I would assume the repos are
still public and others are allowed to fork or pull the repos. I think of
the org version is just extra controls on top of a personal repo like
design. The org/personal one should appear to the
non-maintainers/admins/owner as a normal repo on GitHub, correct?

The GitHub organization is built for open-source and you can still have
private repos, but then you start to have a cost depending on the number
of private repos you want. If you do not have a lot of private repos then
you should have no cost (I think). I do not see any reason for private
repos, but I guest we could have some and we get 5 free and 10 is $25 per
month.
>
>>
>>
>>>> The other problem I see is how patches are agreed on to be included in
>>>> the
>>>> mainline. Today it is just an ACK or a NAK on the mailing list. Then I
>>>> see
>>>> what I think to be only a few people ACKing or NAKing patches. This
>>>> process has a lot of problems from a patch being ignore for some
>>>>reason
>>>> or
>>>> someone having negative feed back on very minor detail or no way to
>>>> push a
>>>> patch forward a single NAK or comment.
>>>>
>>> So, this is an interesting issue in ideal meritocracies.  Currently
>>> is/should be
>>> looking for ACKs/NAK/s from the individuals listed in the MAINTAINER
>>> files, and
>>> those people should be the definitive subject matter experts on the
>>>code
>>> they
>>> cover.  As such, I would agrue that they should be entitled to a
>>>modicum
>>> of
>>> stylistic/trivial leeway.  That is to say, if they choose to block a
>>>patch
>>> around a very minor detail, then between them changing their position,
>>> and the
>>> patch author changing the code, the latter is likely the easier course
>>>of
>>> action, especially if the author can't make an argument for their
>>> position.
>>> That said, if such patch blockage becomes so egregious that individuals
>>> stop
>>> contributing, that needs to be known as well.  If you as a patch
>>>author:
>>>
>>> 1) Have tried to submit patches
>>> 2) Had them blocked for what you consider trivial reasons
>>> 3) Plan to not contribute further because of this
>>> 4) Still rely on the DPDK for your product
>>>
>>> Please, say something.  People in charge need to know when they're
>>>pushing
>>> contributors away.
>>>
>>> FWIW, I've tried to do some correlation between the git history and the
>>> mailing
>>> list.  I need to do more searches, but I have a feeling that early on,
>>>the
>>> majority of people who stopped contributing, did so because their
>>>patches
>>> weren't expressely blocked, but rather because they were simply
>>>ignored.
>>> No one
>>> working on DPDK bothered to review those patches, and so they never got
>>> merged.
>>> Hopefully that problem has been addressed somewhat now.
>I agree 100%
>>>
>>>> I would like to see some type of layering process to allow patches to
>>>>be
>>>> applied in a timely manner a few weeks not months or completely
>>>>ignored.
>>>> Maybe some type of voting is reasonable, but we need to do something
>>>>to
>>>> turn around the patches in clean reasonable manner.
>>>>
>>>> Think we need some type of group meeting every week to look at the
>>>> patches
>>>> and determining which ones get applied, this gives quick feedback to
>>>>the
>>>> submitter as to the status of the patch.
>>>>
>>> I think a group meeting is going to be way too much overhead to manage
>>> properly.
>>> You'll get different people every week with agenda that may not line up
>>> with
>>> code quality, which is really what the review is meant to provide.  I
>>> think
>> I was only suggesting the maintainers attend the meeting. Of course they
>> have to attend or have someone attend for them, just to get the voting
>> done. If you do not attend then you do not get to vote or something like
>> that is reasonable. Not that we should try and define the process here.
>>
>>> perhaps a better approach would be to require that that code owners
>>>from
>>> the
>>> maintainer file provide and ACK/NAK on their patches within 3-4 days,
>>>and
>>> require a corresponding tree maintainer to apply the patch within 7 or
>>> so.  That
>>> would cap our patch latency.  Likewise, if a patch slips in creating a
>>> regression, the author needs to be alerted and given a time window in
>>> which to
>>> fix the problem before the offending patch is reverted during the QE
>>> cycle.
>>>
>>>
>>>>> On the other side, since user questions, community discussions and
>>>>> development happens in the same mailing list, things get really
>>>>> complicated, specially for users seeking for help. Even though I
>>>>>think
>>>>> the average skills of the users of DPDK is generally higher than in
>>>>> other software projects, if DPDK wants to attract more users, having
>>>>>a
>>>>> better user support is key, IMHO.
>>>>>
>>>>> So I would see with good eyes a separation between, at least,
>>>>>dpdk-user
>>>>> and dpdk-dev.
>>> I wouldn't argue with this separation, seems like a reasonable
>>>approach.
>>>
>>>> I do not remember seeing too many users on the list and making a list
>>>> just
>>>> for then is OK if everyone is fine with a list that has very few
>>>>emails.
>>>>> If the number of patches keeps growing, splitting the "dev" mailing
>>>>> lists into different categories (eal and common, pmds, higher level
>>>>> abstractions...) could be an option. However, this last point opens a
>>>>> lot of questions on how to minimize interference between the
>>>>>different
>>>>> parts and API/ABI compatibility during the development.
>>>> I believe if we just make sure we use tags in the subject line then we
>>>> can
>>>> have our email clients do the splitting of the emails instead of
>>>>adding
>>>> more emails lists.
>>>>
>>> Agreed
>
>I think it is a good idea too. Maybe we can standardize some format e.g.
>[TAG][PATCH vX], or something like that.
>
>>>
>>>>>> Perhaps it means having some ReviewBoard type of tools, a clone in
>>>>>> Github or
>>>>>> Bitbucket where the less hardcore kernel-workflow types could send
>>>> back
>>>>>> their
>>>>>> small bug fixes a bit more easily, this kind of stuff. Google has
>>>> been
>>>>>> getting
>>>>>> good uptake since they moved most of their open source across to
>>>> Github,
>>>>>> because the contribution workflow was more convenient than Google
>>>> Code
>>>>>> was.
>>>> I like GitHub it is a much better designed tool then patchwork, plus
>>>>it
>>>> could get more eyes as it is very well know to the developer community
>>>> in
>>>> general. I feel GitHub has many advantages over the current systems in
>>>> place but, it does not solve the all patch issues.
>>>>
>>> Github is actually a bit irritating for this sort of thing, as it
>>> presumes a web
>>> based interface for discussion.  They have some modicum of email
>>> forwarding
>>> enabled, but it has never quite worked right, or integrated properly.
>
>An alternative to githubs and bitbuckets is a self-hosted forge, like
>gitlab:
>
>https://about.gitlab.com/
>
>To be honest, I mostly work on open-source repositories, and in our
>organization we use only gitlab for private repositories, so I haven't
>played that much with it. But it seems to do its job and has almost all
>of the features of the "community" github, if not more. I don't know if
>you can even integrate it with github's accounts somehow, to prevent to
>have to register.
>
>However, one of the important points of using github/bitbucket is
>visibility and ease the contribution process. By using an self-hosted
>solution, even if it is similar to github and well advertised in DPDK's
>website, you kind of loose part of that advantage.

I would suggest we use GitHub then picking yet another not as well know
Git Repo system, if we decide to change.
>
>> Email forwarding has seemed to work for me and in one case it took a bit
>> to have GitHub stop sending me emails on a repo I did not want anymore
>>:-)
>>>> The only way we can get patch issues resolved is to put a bit more
>>>> process
>>>> in place.
>>>>> Although I agree, we have to be careful on how github or bitbucket is
>>>>> used. Having issues or even (e.g. github) pull requests *in addition*
>>>> to
>>>>> the normal contribution workflow can be a nightmare to deal with, in
>>>>> terms of synchronization and preventing double work. So I guess
>>>>>setting
>>>>> up an official github or bitbucket mirror would be fine, via some
>>>> simple
>>>>> cronjob, but I guess it would end-up not using PRs or issues in
>>>>>github
>>>>> like the Linux kernel does.
>>> 100% agree, we can't be split about this.  Allowing contributions from
>>>n
>>> channels just means most developers will only see/reviews 1/nth of the
>>> patches
>>> of interest to them.
>> If we setup a GitHub or some other site, we would need to make Github
>>the
>> primary site to remove this type of problem IMO.
>
>You mean changing the workflow from email based to issues and pull-req
>or github pull req? Do you really think this is possible?

Yes, I think pull-req is the standard GitHub method as everyone needs a
repo anyway. If we can figure out how to integrate the email patches that
would be great.
>
>>>>  From what I can tell GitHub seems to be a better solution for a free
>>>> open
>>>> environment. Bitbucket I have never used and GitHub seems more popular
>>>> from one article I read.
>>>>
>>>>
>>>> 
>>>>https://www.google.com/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UT
>>>>F-
>>>> 8#
>>>> q=bitbucket%20vs%20github
>>>>
>>>>
>>>>> Btw, is this github organization already registered by Intel or some
>>>>> other company of the community?
>>>>>
>>>>> https://github.com/dpdk
>>>>>
>> I was hoping someone would own up to the GitHub dpdk site.
>
>Just wanted to know if this was the case. But, even if that would not be
>the case, I *guess* that, as it happens with other services like
>twitter, facebook..., Intel could claim the user, since it has the
>registered trademark.
>
>marc
>
>>
>>>>> Marc
>>>> If we can used the above that would be great, but a name like
>>>> Œdpdk-community¹ or something could work too.
>>>>
>>>> We can host the web site here and have many sub-projects like
>>>> Pktgen-DPDK
>>>> :-) under the same page. Not to say anything bad about our current web
>>>> pages as I find it difficult to use sometimes and find things like
>>>> patchwork link. Maintaining a web site is a full time job and GitHub
>>>> does
>>>> maintain the site, plus we can collaborate on host web page on the
>>>> GitHub
>>>> site easier.
>>>>
>>>> Moving to the Linux Foundation is an option as well as it is very well
>>>> know and has some nice ways to get your project promoted. It does
>>>>have a
>>>> few drawbacks in process handling and cost to state a few. The process
>>>> model is all ready defined, which is good and bad it just depends on
>>>> your
>>>> needs IMO.
>>>>
>>>> Regards,
>>>> ++Keith
>>>>
>>>>>> Matthew.
>>>>
>


^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] Beyond DPDK 2.0
       [not found]                 ` <D162FA4E.1DED8%keith.wiles@intel.com>
  2015-04-27  9:52  0%               ` Marc Sune
@ 2015-04-27 10:29  0%               ` Neil Horman
  2015-04-27 13:50  0%                 ` Wiles, Keith
  1 sibling, 1 reply; 200+ results
From: Neil Horman @ 2015-04-27 10:29 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev

On Mon, Apr 27, 2015 at 01:41:11AM +0000, Wiles, Keith wrote:
> 
> 
> On 4/26/15, 4:56 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> 
> >On Sat, Apr 25, 2015 at 04:08:23PM +0000, Wiles, Keith wrote:
> >> 
> >> 
> >> On 4/25/15, 8:30 AM, "Marc Sune" <marc.sune@bisdn.de> wrote:
> >> 
> >> >
> >> >
> >> >On 24/04/15 19:51, Matthew Hall wrote:
> >> >> On Fri, Apr 24, 2015 at 12:39:47PM -0500, Jay Rolette wrote:
> >> >>> I can tell you that if DPDK were GPL-based, my company wouldn't be
> >> >>>using
> >> >>> it. I suspect we wouldn't be the only ones...
> >> >>>
> >> >>> Jay
> >> >> I could second this, from the past employer where I used it. Right
> >>now
> >> >>I am
> >> >> using it in an open source app, I have a bit of GPL here and there
> >>but
> >> >>I'm
> >> >> trying to get rid of it or confine it to separate address spaces,
> >>where
> >> >>it
> >> >> won't impact the core code written around DPDK, as I don't want to
> >>cause
> >> >> headaches for any downstream users I attract someday.
> >> >>
> >> >> Hard-core GPL would not be possible for most. LGPL could be possible,
> >> >>but I
> >> >> don't think it could be worth the relicensing headache for that small
> >> >>change.
> >> >>
> >> >> Instead we should make the patch process as easy as humanly possible
> >>so
> >> >>people
> >> >> are encouraged to send us the fixes and not cart them around their
> >> >>companies
> >> >> constantly.
> >> 
> >> +1 and besides the GPL or LGPL ship has sailed IMHO and we can not go
> >>back.
> >Actually, IANAL, but I think we can.  The BSD license allows us to fork
> >and
> >relicense the code I think, under GPL or any other license.  I'm not
> >advocating
> >for that mind you, just suggesting that its possible should it ever become
> >needed.
> >
> >> >
> >> >I agree. My feeling is that as the number of patches in the mailing
> >>list
> >> >grows, keeping track of them gets more and more complicated. Patchwork
> >> >website was a way to try to address this issue. I think it was an
> >> >improvement, but to be honest, patchwork lacks a lot of functionality,
> >> >such as properly tracking multiple versions of the patch (superseding
> >> >them automatically), and it lacks some filtering capabilities e.g. per
> >> >user, per tag/label or library, automatically track if it has been
> >> >merged, give an overall status of the pending vs merged patches, set
> >> >milestones... Is there any alternative tool or improved version for
> >>that?
> >> 
> >Agreed, this has come up before, off list unfortunately.  The volume of
> >patches
> >seems to be increasing at such a rate that a single maintainer has
> >difficulty
> >keeping up.  I proposed that the workload be split out to multiple
> >subtrees,
> >with prefixes being added to patch subjects on the list for local
> >filtering to
> >stem the tide.  Specifically I had proposed that the PMD's be split into a
> >separate subtree, but that received pushback in favor of having each
> >library
> >having its own separate subtree, with a pilot program being made out of
> >the I40e
> >driver (which you might note sends pull requests to the list now).  I'd
> >still
> >like to see all PMD's come under a single subtree, but thats likely an
> >argument
> >for later.
> >
> >That said, Do you think that this patch latency is really a contributor
> >to low
> >project participation?  It definately a problem, but it seems to me that
> >this
> >sort of issue would lead to people trying to parcitipate, then giving up
> >(i.e.
> >we would see 1-2 emails from an individual, then not see them again).
> >I'd need
> >to look through the mailing list for such a pattern, but anecdotally I've
> >not
> >seen that happen.  The problem you describe above is definately a
> >problem, but
> >its one for those individuals who are participating, not for those who are
> >simply choosing not to.  And I think we need to address both.
> >
> >> I agree patchwork has some limitation, but I think the biggest issue is
> >> keeping up with the patches. Getting patches introduced into the main
> >>line
> >> is very slow. A patch submitted today may not get applied for weeks or
> >> months, then when another person submits a patch he is starting to run a
> >> very high risk of having to redo that patch, because a pervious patch
> >> makes his fail weeks/months later. I would love to see a better tool
> >>then
> >> patchwork, but the biggest issue is we have a huge backlog of patches.
> >> Personally I am not sure how Thomas or any is able to keep up with the
> >> patches.
> >> 
> >This is absolutely a problem.  I'd like to think, more than a tool like
> >patchwork, a subtree organization to allow some modicum of parallel
> >review and
> >integration would really be a benefit here.
> Subtrees could work, but the real problem I think is the number of
> committers must be higher then one. Something like GitHub (and I assume
> Linux Foundation) have a method to add committers to a project. In the
> case of GitHub they just have to have a free GitHub account and they can
> become committers of the project buying the owner of the project enables
> them.
> 
> On GitHub they have personal accounts and organization accounts I know
> only about the personal accounts, but they allow for 5 private repos and
> any number of public repos. The organization account has a lot of extra
> features that seem better for a DPDK community IMO and should be the one
> we use if we decide it is the right direction. We can always give it a
> shot for while and keep the dpdk.org and use dev@dpdk.org and its repo
> mirrored from GitHub as a transition phase. This way we can fall back to
> dpdk.org or move one to something else if we like.
> 
> https://help.github.com/categories/organizations/
> 
> The developers could still send patches via email list, but creating a
> repo and forking dpdk is easy, then send a pull request.
> 
I'm not opposed to github per-se, but nothing described above is unique to
github. Theres no reason we can't allow multiple comitters to the current tree
as hosted on the current server, we just have to configure it as such.

And FWIW, the assumption is that, with multiple subtrees, you implicitly have
multiple comitters, assuming that pull requests from those subtree maintainers
are trusted by the top level tree maintainer.

In fact I feel somewhat better about that model as it provides a nice stairstep
integration path for new features.

Not explicitly opposed to a movement to github, I just feel like it may not
address the problem at hand.

> 
> >
> >> The other problem I see is how patches are agreed on to be included in
> >>the
> >> mainline. Today it is just an ACK or a NAK on the mailing list. Then I
> >>see
> >> what I think to be only a few people ACKing or NAKing patches. This
> >> process has a lot of problems from a patch being ignore for some reason
> >>or
> >> someone having negative feed back on very minor detail or no way to
> >>push a
> >> patch forward a single NAK or comment.
> >> 
> >
> >So, this is an interesting issue in ideal meritocracies.  Currently
> >is/should be
> >looking for ACKs/NAK/s from the individuals listed in the MAINTAINER
> >files, and
> >those people should be the definitive subject matter experts on the code
> >they
> >cover.  As such, I would agrue that they should be entitled to a modicum
> >of
> >stylistic/trivial leeway.  That is to say, if they choose to block a patch
> >around a very minor detail, then between them changing their position,
> >and the
> >patch author changing the code, the latter is likely the easier course of
> >action, especially if the author can't make an argument for their
> >position.
> >That said, if such patch blockage becomes so egregious that individuals
> >stop
> >contributing, that needs to be known as well.  If you as a patch author:
> >
> >1) Have tried to submit patches
> >2) Had them blocked for what you consider trivial reasons
> >3) Plan to not contribute further because of this
> >4) Still rely on the DPDK for your product
> >
> >Please, say something.  People in charge need to know when they're pushing
> >contributors away.
> >
> >FWIW, I've tried to do some correlation between the git history and the
> >mailing
> >list.  I need to do more searches, but I have a feeling that early on, the
> >majority of people who stopped contributing, did so because their patches
> >weren't expressely blocked, but rather because they were simply ignored.
> >No one
> >working on DPDK bothered to review those patches, and so they never got
> >merged.
> >Hopefully that problem has been addressed somewhat now.
> >
> >> I would like to see some type of layering process to allow patches to be
> >> applied in a timely manner a few weeks not months or completely ignored.
> >> Maybe some type of voting is reasonable, but we need to do something to
> >> turn around the patches in clean reasonable manner.
> >> 
> >> Think we need some type of group meeting every week to look at the
> >>patches
> >> and determining which ones get applied, this gives quick feedback to the
> >> submitter as to the status of the patch.
> >> 
> >I think a group meeting is going to be way too much overhead to manage
> >properly.
> >You'll get different people every week with agenda that may not line up
> >with
> >code quality, which is really what the review is meant to provide.  I
> >think
> 
> I was only suggesting the maintainers attend the meeting. Of course they
> have to attend or have someone attend for them, just to get the voting
> done. If you do not attend then you do not get to vote or something like
> that is reasonable. Not that we should try and define the process here.
> 
If you use multiple subtrees, theres no need for a meeting, or any sort of
defiend process for voting, theres only an implicitly defined heirarchy of
acceptance in bundled changesets.  If a desire of the community is to see more
efficient review and lower changeset acceptance latency, it seems to be that a
weekly meeting of any sort is somewhat anathema to that.  

> >perhaps a better approach would be to require that that code owners from
> >the
> >maintainer file provide and ACK/NAK on their patches within 3-4 days, and
> >require a corresponding tree maintainer to apply the patch within 7 or
> >so.  That
> >would cap our patch latency.  Likewise, if a patch slips in creating a
> >regression, the author needs to be alerted and given a time window in
> >which to
> >fix the problem before the offending patch is reverted during the QE
> >cycle.
> >
> >
> >> >
> >> >On the other side, since user questions, community discussions and
> >> >development happens in the same mailing list, things get really
> >> >complicated, specially for users seeking for help. Even though I think
> >> >the average skills of the users of DPDK is generally higher than in
> >> >other software projects, if DPDK wants to attract more users, having a
> >> >better user support is key, IMHO.
> >> >
> >> >So I would see with good eyes a separation between, at least, dpdk-user
> >> >and dpdk-dev.
> >> 
> >I wouldn't argue with this separation, seems like a reasonable approach.
> >
> >> I do not remember seeing too many users on the list and making a list
> >>just
> >> for then is OK if everyone is fine with a list that has very few emails.
> >> >
> >> >If the number of patches keeps growing, splitting the "dev" mailing
> >> >lists into different categories (eal and common, pmds, higher level
> >> >abstractions...) could be an option. However, this last point opens a
> >> >lot of questions on how to minimize interference between the different
> >> >parts and API/ABI compatibility during the development.
> >> 
> >> I believe if we just make sure we use tags in the subject line then we
> >>can
> >> have our email clients do the splitting of the emails instead of adding
> >> more emails lists.
> >> 
> >Agreed
> >
> >> >
> >> >>
> >> >> Perhaps it means having some ReviewBoard type of tools, a clone in
> >> >>Github or
> >> >> Bitbucket where the less hardcore kernel-workflow types could send
> >>back
> >> >>their
> >> >> small bug fixes a bit more easily, this kind of stuff. Google has
> >>been
> >> >>getting
> >> >> good uptake since they moved most of their open source across to
> >>Github,
> >> >> because the contribution workflow was more convenient than Google
> >>Code
> >> >>was.
> >> 
> >> I like GitHub it is a much better designed tool then patchwork, plus it
> >> could get more eyes as it is very well know to the developer community
> >>in
> >> general. I feel GitHub has many advantages over the current systems in
> >> place but, it does not solve the all patch issues.
> >> 
> >Github is actually a bit irritating for this sort of thing, as it
> >presumes a web
> >based interface for discussion.  They have some modicum of email
> >forwarding
> >enabled, but it has never quite worked right, or integrated properly.
> 
> Email forwarding has seemed to work for me and in one case it took a bit
> to have GitHub stop sending me emails on a repo I did not want anymore :-)

Forwarding works fine, its responding that doesn't usually work well.  Emails
from pull requests and issues are forwarded from a 'do not reply' address and
responding requires that you visit the github page in a browser.  Thats
especially trying with patch review, as there is no real concept of patches on a
list, only pull requests which require that you pull a new branch in and review
it.  Once you have to leave your MUA, your efficiency quickly goes down, which
is what we're trying to avoid here.


> >
> >> The only way we can get patch issues resolved is to put a bit more
> >>process
> >> in place.
> >> >
> >> >Although I agree, we have to be careful on how github or bitbucket is
> >> >used. Having issues or even (e.g. github) pull requests *in addition*
> >>to
> >> >the normal contribution workflow can be a nightmare to deal with, in
> >> >terms of synchronization and preventing double work. So I guess setting
> >> >up an official github or bitbucket mirror would be fine, via some
> >>simple
> >> >cronjob, but I guess it would end-up not using PRs or issues in github
> >> >like the Linux kernel does.
> >> 
> >100% agree, we can't be split about this.  Allowing contributions from n
> >channels just means most developers will only see/reviews 1/nth of the
> >patches
> >of interest to them.
> 
> If we setup a GitHub or some other site, we would need to make Github the
> primary site to remove this type of problem IMO.
> >
> >> From what I can tell GitHub seems to be a better solution for a free
> >>open
> >> environment. Bitbucket I have never used and GitHub seems more popular
> >> from one article I read.
> >> 
> >> 
> >>https://www.google.com/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UTF-
> >>8#
> >> q=bitbucket%20vs%20github
> >> 
> >> 
> >> >Btw, is this github organization already registered by Intel or some
> >> >other company of the community?
> >> >
> >> >https://github.com/dpdk
> >> >
> 
> I was hoping someone would own up to the GitHub dpdk site.
> 
Hmm, looks almost defunct.  If no one steps up, perhaps reporting abuse for
camping on the name might be worthwhile?  That would at least get their
attention.

Neil

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] Beyond DPDK 2.0
       [not found]                 ` <D162FA4E.1DED8%keith.wiles@intel.com>
@ 2015-04-27  9:52  0%               ` Marc Sune
  2015-04-27 13:39  0%                 ` Wiles, Keith
  2015-04-27 10:29  0%               ` Neil Horman
  1 sibling, 1 reply; 200+ results
From: Marc Sune @ 2015-04-27  9:52 UTC (permalink / raw)
  To: Wiles, Keith, Neil Horman; +Cc: dev



On 27/04/15 03:41, Wiles, Keith wrote:
>
> On 4/26/15, 4:56 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>
>> On Sat, Apr 25, 2015 at 04:08:23PM +0000, Wiles, Keith wrote:
>>>
>>> On 4/25/15, 8:30 AM, "Marc Sune" <marc.sune@bisdn.de> wrote:
>>>
>>>>
>>>> On 24/04/15 19:51, Matthew Hall wrote:
>>>>> On Fri, Apr 24, 2015 at 12:39:47PM -0500, Jay Rolette wrote:
>>>>>> I can tell you that if DPDK were GPL-based, my company wouldn't be
>>>>>> using
>>>>>> it. I suspect we wouldn't be the only ones...
>>>>>>
>>>>>> Jay
>>>>> I could second this, from the past employer where I used it. Right
>>> now
>>>>> I am
>>>>> using it in an open source app, I have a bit of GPL here and there
>>> but
>>>>> I'm
>>>>> trying to get rid of it or confine it to separate address spaces,
>>> where
>>>>> it
>>>>> won't impact the core code written around DPDK, as I don't want to
>>> cause
>>>>> headaches for any downstream users I attract someday.
>>>>>
>>>>> Hard-core GPL would not be possible for most. LGPL could be possible,
>>>>> but I
>>>>> don't think it could be worth the relicensing headache for that small
>>>>> change.
>>>>>
>>>>> Instead we should make the patch process as easy as humanly possible
>>> so
>>>>> people
>>>>> are encouraged to send us the fixes and not cart them around their
>>>>> companies
>>>>> constantly.
>>> +1 and besides the GPL or LGPL ship has sailed IMHO and we can not go
>>> back.
>> Actually, IANAL, but I think we can.  The BSD license allows us to fork
>> and
>> relicense the code I think, under GPL or any other license.  I'm not
>> advocating
>> for that mind you, just suggesting that its possible should it ever become
>> needed.
>>
>>>> I agree. My feeling is that as the number of patches in the mailing
>>> list
>>>> grows, keeping track of them gets more and more complicated. Patchwork
>>>> website was a way to try to address this issue. I think it was an
>>>> improvement, but to be honest, patchwork lacks a lot of functionality,
>>>> such as properly tracking multiple versions of the patch (superseding
>>>> them automatically), and it lacks some filtering capabilities e.g. per
>>>> user, per tag/label or library, automatically track if it has been
>>>> merged, give an overall status of the pending vs merged patches, set
>>>> milestones... Is there any alternative tool or improved version for
>>> that?
>>>
>> Agreed, this has come up before, off list unfortunately.  The volume of
>> patches
>> seems to be increasing at such a rate that a single maintainer has
>> difficulty
>> keeping up.  I proposed that the workload be split out to multiple
>> subtrees,
>> with prefixes being added to patch subjects on the list for local
>> filtering to
>> stem the tide.  Specifically I had proposed that the PMD's be split into a
>> separate subtree, but that received pushback in favor of having each
>> library
>> having its own separate subtree, with a pilot program being made out of
>> the I40e
>> driver (which you might note sends pull requests to the list now).  I'd
>> still
>> like to see all PMD's come under a single subtree, but thats likely an
>> argument
>> for later.
>>
>> That said, Do you think that this patch latency is really a contributor
>> to low
>> project participation?  It definately a problem, but it seems to me that
>> this
>> sort of issue would lead to people trying to parcitipate, then giving up
>> (i.e.
>> we would see 1-2 emails from an individual, then not see them again).
>> I'd need
>> to look through the mailing list for such a pattern, but anecdotally I've
>> not
>> seen that happen.  The problem you describe above is definately a
>> problem, but
>> its one for those individuals who are participating, not for those who are
>> simply choosing not to.  And I think we need to address both.
>>
>>> I agree patchwork has some limitation, but I think the biggest issue is
>>> keeping up with the patches. Getting patches introduced into the main
>>> line
>>> is very slow. A patch submitted today may not get applied for weeks or
>>> months, then when another person submits a patch he is starting to run a
>>> very high risk of having to redo that patch, because a pervious patch
>>> makes his fail weeks/months later. I would love to see a better tool
>>> then
>>> patchwork, but the biggest issue is we have a huge backlog of patches.
>>> Personally I am not sure how Thomas or any is able to keep up with the
>>> patches.
>>>
>> This is absolutely a problem.  I'd like to think, more than a tool like
>> patchwork, a subtree organization to allow some modicum of parallel
>> review and
>> integration would really be a benefit here.
> Subtrees could work, but the real problem I think is the number of
> committers must be higher then one. Something like GitHub (and I assume
> Linux Foundation) have a method to add committers to a project. In the
> case of GitHub they just have to have a free GitHub account and they can
> become committers of the project buying the owner of the project enables
> them.
>
> On GitHub they have personal accounts and organization accounts I know
> only about the personal accounts, but they allow for 5 private repos and
> any number of public repos. The organization account has a lot of extra
> features that seem better for a DPDK community IMO and should be the one
> we use if we decide it is the right direction. We can always give it a
> shot for while and keep the dpdk.org and use dev@dpdk.org and its repo
> mirrored from GitHub as a transition phase. This way we can fall back to
> dpdk.org or move one to something else if we like.
>
> https://help.github.com/categories/organizations/
>
> The developers could still send patches via email list, but creating a
> repo and forking dpdk is easy, then send a pull request.

For the github "community" or free service, organization accounts just 
allow you to set teams, where each time can be assigned to one or more 
repositories. The differences are summarized here:

https://help.github.com/articles/what-s-the-difference-between-user-and-organization-accounts/

And the permission schema, per team, is summarized here:

https://help.github.com/articles/permission-levels-for-an-organization-repository/

Some limitations: i) only if the team has write permissions (IOW push 
permissions) you can manage issues ii) there cannot be per-branch ACLs.

>
>
>>> The other problem I see is how patches are agreed on to be included in
>>> the
>>> mainline. Today it is just an ACK or a NAK on the mailing list. Then I
>>> see
>>> what I think to be only a few people ACKing or NAKing patches. This
>>> process has a lot of problems from a patch being ignore for some reason
>>> or
>>> someone having negative feed back on very minor detail or no way to
>>> push a
>>> patch forward a single NAK or comment.
>>>
>> So, this is an interesting issue in ideal meritocracies.  Currently
>> is/should be
>> looking for ACKs/NAK/s from the individuals listed in the MAINTAINER
>> files, and
>> those people should be the definitive subject matter experts on the code
>> they
>> cover.  As such, I would agrue that they should be entitled to a modicum
>> of
>> stylistic/trivial leeway.  That is to say, if they choose to block a patch
>> around a very minor detail, then between them changing their position,
>> and the
>> patch author changing the code, the latter is likely the easier course of
>> action, especially if the author can't make an argument for their
>> position.
>> That said, if such patch blockage becomes so egregious that individuals
>> stop
>> contributing, that needs to be known as well.  If you as a patch author:
>>
>> 1) Have tried to submit patches
>> 2) Had them blocked for what you consider trivial reasons
>> 3) Plan to not contribute further because of this
>> 4) Still rely on the DPDK for your product
>>
>> Please, say something.  People in charge need to know when they're pushing
>> contributors away.
>>
>> FWIW, I've tried to do some correlation between the git history and the
>> mailing
>> list.  I need to do more searches, but I have a feeling that early on, the
>> majority of people who stopped contributing, did so because their patches
>> weren't expressely blocked, but rather because they were simply ignored.
>> No one
>> working on DPDK bothered to review those patches, and so they never got
>> merged.
>> Hopefully that problem has been addressed somewhat now.
I agree 100%
>>
>>> I would like to see some type of layering process to allow patches to be
>>> applied in a timely manner a few weeks not months or completely ignored.
>>> Maybe some type of voting is reasonable, but we need to do something to
>>> turn around the patches in clean reasonable manner.
>>>
>>> Think we need some type of group meeting every week to look at the
>>> patches
>>> and determining which ones get applied, this gives quick feedback to the
>>> submitter as to the status of the patch.
>>>
>> I think a group meeting is going to be way too much overhead to manage
>> properly.
>> You'll get different people every week with agenda that may not line up
>> with
>> code quality, which is really what the review is meant to provide.  I
>> think
> I was only suggesting the maintainers attend the meeting. Of course they
> have to attend or have someone attend for them, just to get the voting
> done. If you do not attend then you do not get to vote or something like
> that is reasonable. Not that we should try and define the process here.
>
>> perhaps a better approach would be to require that that code owners from
>> the
>> maintainer file provide and ACK/NAK on their patches within 3-4 days, and
>> require a corresponding tree maintainer to apply the patch within 7 or
>> so.  That
>> would cap our patch latency.  Likewise, if a patch slips in creating a
>> regression, the author needs to be alerted and given a time window in
>> which to
>> fix the problem before the offending patch is reverted during the QE
>> cycle.
>>
>>
>>>> On the other side, since user questions, community discussions and
>>>> development happens in the same mailing list, things get really
>>>> complicated, specially for users seeking for help. Even though I think
>>>> the average skills of the users of DPDK is generally higher than in
>>>> other software projects, if DPDK wants to attract more users, having a
>>>> better user support is key, IMHO.
>>>>
>>>> So I would see with good eyes a separation between, at least, dpdk-user
>>>> and dpdk-dev.
>> I wouldn't argue with this separation, seems like a reasonable approach.
>>
>>> I do not remember seeing too many users on the list and making a list
>>> just
>>> for then is OK if everyone is fine with a list that has very few emails.
>>>> If the number of patches keeps growing, splitting the "dev" mailing
>>>> lists into different categories (eal and common, pmds, higher level
>>>> abstractions...) could be an option. However, this last point opens a
>>>> lot of questions on how to minimize interference between the different
>>>> parts and API/ABI compatibility during the development.
>>> I believe if we just make sure we use tags in the subject line then we
>>> can
>>> have our email clients do the splitting of the emails instead of adding
>>> more emails lists.
>>>
>> Agreed

I think it is a good idea too. Maybe we can standardize some format e.g. 
[TAG][PATCH vX], or something like that.

>>
>>>>> Perhaps it means having some ReviewBoard type of tools, a clone in
>>>>> Github or
>>>>> Bitbucket where the less hardcore kernel-workflow types could send
>>> back
>>>>> their
>>>>> small bug fixes a bit more easily, this kind of stuff. Google has
>>> been
>>>>> getting
>>>>> good uptake since they moved most of their open source across to
>>> Github,
>>>>> because the contribution workflow was more convenient than Google
>>> Code
>>>>> was.
>>> I like GitHub it is a much better designed tool then patchwork, plus it
>>> could get more eyes as it is very well know to the developer community
>>> in
>>> general. I feel GitHub has many advantages over the current systems in
>>> place but, it does not solve the all patch issues.
>>>
>> Github is actually a bit irritating for this sort of thing, as it
>> presumes a web
>> based interface for discussion.  They have some modicum of email
>> forwarding
>> enabled, but it has never quite worked right, or integrated properly.

An alternative to githubs and bitbuckets is a self-hosted forge, like 
gitlab:

https://about.gitlab.com/

To be honest, I mostly work on open-source repositories, and in our 
organization we use only gitlab for private repositories, so I haven't 
played that much with it. But it seems to do its job and has almost all 
of the features of the "community" github, if not more. I don't know if 
you can even integrate it with github's accounts somehow, to prevent to 
have to register.

However, one of the important points of using github/bitbucket is 
visibility and ease the contribution process. By using an self-hosted 
solution, even if it is similar to github and well advertised in DPDK's 
website, you kind of loose part of that advantage.

> Email forwarding has seemed to work for me and in one case it took a bit
> to have GitHub stop sending me emails on a repo I did not want anymore :-)
>>> The only way we can get patch issues resolved is to put a bit more
>>> process
>>> in place.
>>>> Although I agree, we have to be careful on how github or bitbucket is
>>>> used. Having issues or even (e.g. github) pull requests *in addition*
>>> to
>>>> the normal contribution workflow can be a nightmare to deal with, in
>>>> terms of synchronization and preventing double work. So I guess setting
>>>> up an official github or bitbucket mirror would be fine, via some
>>> simple
>>>> cronjob, but I guess it would end-up not using PRs or issues in github
>>>> like the Linux kernel does.
>> 100% agree, we can't be split about this.  Allowing contributions from n
>> channels just means most developers will only see/reviews 1/nth of the
>> patches
>> of interest to them.
> If we setup a GitHub or some other site, we would need to make Github the
> primary site to remove this type of problem IMO.

You mean changing the workflow from email based to issues and pull-req 
or github pull req? Do you really think this is possible?

>>>  From what I can tell GitHub seems to be a better solution for a free
>>> open
>>> environment. Bitbucket I have never used and GitHub seems more popular
>>> from one article I read.
>>>
>>>
>>> https://www.google.com/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UTF-
>>> 8#
>>> q=bitbucket%20vs%20github
>>>
>>>
>>>> Btw, is this github organization already registered by Intel or some
>>>> other company of the community?
>>>>
>>>> https://github.com/dpdk
>>>>
> I was hoping someone would own up to the GitHub dpdk site.

Just wanted to know if this was the case. But, even if that would not be 
the case, I *guess* that, as it happens with other services like 
twitter, facebook..., Intel could claim the user, since it has the 
registered trademark.

marc

>
>>>> Marc
>>> If we can used the above that would be great, but a name like
>>> Œdpdk-community¹ or something could work too.
>>>
>>> We can host the web site here and have many sub-projects like
>>> Pktgen-DPDK
>>> :-) under the same page. Not to say anything bad about our current web
>>> pages as I find it difficult to use sometimes and find things like
>>> patchwork link. Maintaining a web site is a full time job and GitHub
>>> does
>>> maintain the site, plus we can collaborate on host web page on the
>>> GitHub
>>> site easier.
>>>
>>> Moving to the Linux Foundation is an option as well as it is very well
>>> know and has some nice ways to get your project promoted. It does have a
>>> few drawbacks in process handling and cost to state a few. The process
>>> model is all ready defined, which is good and bad it just depends on
>>> your
>>> needs IMO.
>>>
>>> Regards,
>>> ++Keith
>>>
>>>>> Matthew.
>>>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] Beyond DPDK 2.0
  2015-04-25 16:08  0%         ` Wiles, Keith
@ 2015-04-26 21:56  0%           ` Neil Horman
       [not found]                 ` <D162FA4E.1DED8%keith.wiles@intel.com>
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-26 21:56 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev

On Sat, Apr 25, 2015 at 04:08:23PM +0000, Wiles, Keith wrote:
> 
> 
> On 4/25/15, 8:30 AM, "Marc Sune" <marc.sune@bisdn.de> wrote:
> 
> >
> >
> >On 24/04/15 19:51, Matthew Hall wrote:
> >> On Fri, Apr 24, 2015 at 12:39:47PM -0500, Jay Rolette wrote:
> >>> I can tell you that if DPDK were GPL-based, my company wouldn't be
> >>>using
> >>> it. I suspect we wouldn't be the only ones...
> >>>
> >>> Jay
> >> I could second this, from the past employer where I used it. Right now
> >>I am
> >> using it in an open source app, I have a bit of GPL here and there but
> >>I'm
> >> trying to get rid of it or confine it to separate address spaces, where
> >>it
> >> won't impact the core code written around DPDK, as I don't want to cause
> >> headaches for any downstream users I attract someday.
> >>
> >> Hard-core GPL would not be possible for most. LGPL could be possible,
> >>but I
> >> don't think it could be worth the relicensing headache for that small
> >>change.
> >>
> >> Instead we should make the patch process as easy as humanly possible so
> >>people
> >> are encouraged to send us the fixes and not cart them around their
> >>companies
> >> constantly.
> 
> +1 and besides the GPL or LGPL ship has sailed IMHO and we can not go back.
Actually, IANAL, but I think we can.  The BSD license allows us to fork and
relicense the code I think, under GPL or any other license.  I'm not advocating
for that mind you, just suggesting that its possible should it ever become
needed.

> >
> >I agree. My feeling is that as the number of patches in the mailing list
> >grows, keeping track of them gets more and more complicated. Patchwork
> >website was a way to try to address this issue. I think it was an
> >improvement, but to be honest, patchwork lacks a lot of functionality,
> >such as properly tracking multiple versions of the patch (superseding
> >them automatically), and it lacks some filtering capabilities e.g. per
> >user, per tag/label or library, automatically track if it has been
> >merged, give an overall status of the pending vs merged patches, set
> >milestones... Is there any alternative tool or improved version for that?
> 
Agreed, this has come up before, off list unfortunately.  The volume of patches
seems to be increasing at such a rate that a single maintainer has difficulty
keeping up.  I proposed that the workload be split out to multiple subtrees,
with prefixes being added to patch subjects on the list for local filtering to
stem the tide.  Specifically I had proposed that the PMD's be split into a
separate subtree, but that received pushback in favor of having each library
having its own separate subtree, with a pilot program being made out of the I40e
driver (which you might note sends pull requests to the list now).  I'd still
like to see all PMD's come under a single subtree, but thats likely an argument
for later.

That said, Do you think that this patch latency is really a contributor to low
project participation?  It definately a problem, but it seems to me that this
sort of issue would lead to people trying to parcitipate, then giving up (i.e.
we would see 1-2 emails from an individual, then not see them again).  I'd need
to look through the mailing list for such a pattern, but anecdotally I've not
seen that happen.  The problem you describe above is definately a problem, but
its one for those individuals who are participating, not for those who are
simply choosing not to.  And I think we need to address both.

> I agree patchwork has some limitation, but I think the biggest issue is
> keeping up with the patches. Getting patches introduced into the main line
> is very slow. A patch submitted today may not get applied for weeks or
> months, then when another person submits a patch he is starting to run a
> very high risk of having to redo that patch, because a pervious patch
> makes his fail weeks/months later. I would love to see a better tool then
> patchwork, but the biggest issue is we have a huge backlog of patches.
> Personally I am not sure how Thomas or any is able to keep up with the
> patches.
> 
This is absolutely a problem.  I'd like to think, more than a tool like
patchwork, a subtree organization to allow some modicum of parallel review and
integration would really be a benefit here.

> The other problem I see is how patches are agreed on to be included in the
> mainline. Today it is just an ACK or a NAK on the mailing list. Then I see
> what I think to be only a few people ACKing or NAKing patches. This
> process has a lot of problems from a patch being ignore for some reason or
> someone having negative feed back on very minor detail or no way to push a
> patch forward a single NAK or comment.
> 

So, this is an interesting issue in ideal meritocracies.  Currently is/should be
looking for ACKs/NAK/s from the individuals listed in the MAINTAINER files, and
those people should be the definitive subject matter experts on the code they
cover.  As such, I would agrue that they should be entitled to a modicum of
stylistic/trivial leeway.  That is to say, if they choose to block a patch
around a very minor detail, then between them changing their position, and the
patch author changing the code, the latter is likely the easier course of
action, especially if the author can't make an argument for their position.
That said, if such patch blockage becomes so egregious that individuals stop
contributing, that needs to be known as well.  If you as a patch author:

1) Have tried to submit patches 
2) Had them blocked for what you consider trivial reasons
3) Plan to not contribute further because of this
4) Still rely on the DPDK for your product

Please, say something.  People in charge need to know when they're pushing
contributors away.

FWIW, I've tried to do some correlation between the git history and the mailing
list.  I need to do more searches, but I have a feeling that early on, the
majority of people who stopped contributing, did so because their patches
weren't expressely blocked, but rather because they were simply ignored.  No one
working on DPDK bothered to review those patches, and so they never got merged.
Hopefully that problem has been addressed somewhat now.

> I would like to see some type of layering process to allow patches to be
> applied in a timely manner a few weeks not months or completely ignored.
> Maybe some type of voting is reasonable, but we need to do something to
> turn around the patches in clean reasonable manner.
> 
> Think we need some type of group meeting every week to look at the patches
> and determining which ones get applied, this gives quick feedback to the
> submitter as to the status of the patch.
> 
I think a group meeting is going to be way too much overhead to manage properly.
You'll get different people every week with agenda that may not line up with
code quality, which is really what the review is meant to provide.  I think
perhaps a better approach would be to require that that code owners from the
maintainer file provide and ACK/NAK on their patches within 3-4 days, and
require a corresponding tree maintainer to apply the patch within 7 or so.  That
would cap our patch latency.  Likewise, if a patch slips in creating a
regression, the author needs to be alerted and given a time window in which to
fix the problem before the offending patch is reverted during the QE cycle.


> >
> >On the other side, since user questions, community discussions and
> >development happens in the same mailing list, things get really
> >complicated, specially for users seeking for help. Even though I think
> >the average skills of the users of DPDK is generally higher than in
> >other software projects, if DPDK wants to attract more users, having a
> >better user support is key, IMHO.
> >
> >So I would see with good eyes a separation between, at least, dpdk-user
> >and dpdk-dev.
> 
I wouldn't argue with this separation, seems like a reasonable approach.

> I do not remember seeing too many users on the list and making a list just
> for then is OK if everyone is fine with a list that has very few emails.
> >
> >If the number of patches keeps growing, splitting the "dev" mailing
> >lists into different categories (eal and common, pmds, higher level
> >abstractions...) could be an option. However, this last point opens a
> >lot of questions on how to minimize interference between the different
> >parts and API/ABI compatibility during the development.
> 
> I believe if we just make sure we use tags in the subject line then we can
> have our email clients do the splitting of the emails instead of adding
> more emails lists.
> 
Agreed

> >
> >>
> >> Perhaps it means having some ReviewBoard type of tools, a clone in
> >>Github or
> >> Bitbucket where the less hardcore kernel-workflow types could send back
> >>their
> >> small bug fixes a bit more easily, this kind of stuff. Google has been
> >>getting
> >> good uptake since they moved most of their open source across to Github,
> >> because the contribution workflow was more convenient than Google Code
> >>was.
> 
> I like GitHub it is a much better designed tool then patchwork, plus it
> could get more eyes as it is very well know to the developer community in
> general. I feel GitHub has many advantages over the current systems in
> place but, it does not solve the all patch issues.
> 
Github is actually a bit irritating for this sort of thing, as it presumes a web
based interface for discussion.  They have some modicum of email forwarding
enabled, but it has never quite worked right, or integrated properly.

> The only way we can get patch issues resolved is to put a bit more process
> in place.
> >
> >Although I agree, we have to be careful on how github or bitbucket is
> >used. Having issues or even (e.g. github) pull requests *in addition* to
> >the normal contribution workflow can be a nightmare to deal with, in
> >terms of synchronization and preventing double work. So I guess setting
> >up an official github or bitbucket mirror would be fine, via some simple
> >cronjob, but I guess it would end-up not using PRs or issues in github
> >like the Linux kernel does.
> 
100% agree, we can't be split about this.  Allowing contributions from n
channels just means most developers will only see/reviews 1/nth of the patches
of interest to them.

> From what I can tell GitHub seems to be a better solution for a free open
> environment. Bitbucket I have never used and GitHub seems more popular
> from one article I read.
> 
> https://www.google.com/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UTF-8#
> q=bitbucket%20vs%20github
> 
> 
> >Btw, is this github organization already registered by Intel or some
> >other company of the community?
> >
> >https://github.com/dpdk
> >
> >Marc
> 
> If we can used the above that would be great, but a name like
> Œdpdk-community¹ or something could work too.
> 
> We can host the web site here and have many sub-projects like Pktgen-DPDK
> :-) under the same page. Not to say anything bad about our current web
> pages as I find it difficult to use sometimes and find things like
> patchwork link. Maintaining a web site is a full time job and GitHub does
> maintain the site, plus we can collaborate on host web page on the GitHub
> site easier.
> 
> Moving to the Linux Foundation is an option as well as it is very well
> know and has some nice ways to get your project promoted. It does have a
> few drawbacks in process handling and cost to state a few. The process
> model is all ready defined, which is good and bad it just depends on your
> needs IMO.
> 
> Regards,
> ++Keith
> 
> >
> >>
> >> Matthew.
> >
> 
> 

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-25 13:02  0%                       ` Neil Horman
@ 2015-04-26  0:09  0%                         ` Ravi Kerur
  2015-04-27 13:44  0%                           ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Ravi Kerur @ 2015-04-26  0:09 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Sat, Apr 25, 2015 at 6:02 AM, Neil Horman <nhorman@tuxdriver.com> wrote:

> On Sat, Apr 25, 2015 at 08:32:42AM -0400, Neil Horman wrote:
> > On Fri, Apr 24, 2015 at 06:45:06PM -0700, Ravi Kerur wrote:
> > > On Fri, Apr 24, 2015 at 2:24 PM, Ravi Kerur <rkerur@gmail.com> wrote:
> > >
> > > >
> > > >
> > > > On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <nhorman@tuxdriver.com
> >
> > > > wrote:
> > > >
> > > >> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> > > >> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <
> nhorman@tuxdriver.com>
> > > >> wrote:
> > > >> >
> > > >> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> > > >> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <
> nhorman@tuxdriver.com
> > > >> >
> > > >> > > wrote:
> > > >> > > >
> > > >> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> > > >> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
> > > >> nhorman@tuxdriver.com>
> > > >> > > > > wrote:
> > > >> > > > > >
> > > >> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur
> wrote:
> > > >> > > > > > > > Changes in v7
> > > >> > > > > > > > Remove _setname_ pthread calls.
> > > >> > > > > > > > Use rte_gettid() API in RTE_LOG to print thread_id.
> > > >> > > > > > > >
> > > >> > > > > > > > Changes in v6
> > > >> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c
> file.
> > > >> > > > > > > > Add pthread_setname_np/pthread_set_name_np for
> Linux/FreeBSD
> > > >> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG when
> > > >> available.
> > > >> > > > > > > > Use existing rte_get_systid() in RTE_LOG to print
> thread_id.
> > > >> > > > > > > >
> > > >> > > > > > > > Changes in v5
> > > >> > > > > > > > Rebase to latest code.
> > > >> > > > > > > >
> > > >> > > > > > > > Changes in v4
> > > >> > > > > > > > None
> > > >> > > > > > > >
> > > >> > > > > > > > Changes in v3
> > > >> > > > > > > > Changed subject to be more explicit on file name
> inclusion.
> > > >> > > > > > > >
> > > >> > > > > > > > Changes in v2
> > > >> > > > > > > > None
> > > >> > > > > > > >
> > > >> > > > > > > > Changes in v1
> > > >> > > > > > > > eal_thread.c has minor differences between Linux and
> BSD,
> > > >> move
> > > >> > > > > > > > entire file into common directory.
> > > >> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor
> > > >> differences.
> > > >> > > > > > > > Rename eal_thread.c to eal_common_thread.c
> > > >> > > > > > > > Makefile changes to reflect file move and name change.
> > > >> > > > > > > > Fix checkpatch warnings.
> > > >> > > > > > > >
> > > >> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > >> > > > > > > > ---
> > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> > > >> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > > >> > > > > > > ------------------------------
> > > >> > > > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > > >> > > > > > > ++++++++++++++++++++++++++++-
> > > >> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > > >> > > > > > > +-----------------------------
> > > >> > > > > > > >  4 files changed, 148 insertions(+), 305 deletions(-)
> > > >> > > > > > > >
> > > >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > >> > > > > > > > index 2357cfa..55971b9 100644
> > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > >> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o :=
> -D_GNU_SOURCE
> > > >> > > > > > > >  # workaround for a gcc bug with noreturn attribute
> > > >> > > > > > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > >> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > >> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > >> > > > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > > >> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > >> > > > > > > >  endif
> > > >> > > > > > > >
> > > >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > >> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > >> > > > > > > > index 9a03437..5714b8f 100644
> > > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > >> > > > > > > > @@ -35,163 +35,11 @@
> > > >> > > > > > > >  #include <stdio.h>
> > > >> > > > > > > >  #include <stdlib.h>
> > > >> > > > > > > >  #include <stdint.h>
> > > >> > > > > > > > -#include <unistd.h>
> > > >> > > > > > > > -#include <sched.h>
> > > >> > > > > > > > -#include <pthread_np.h>
> > > >> > > > > > > > -#include <sys/queue.h>
> > > >> > > > > > > >  #include <sys/thr.h>
> > > >> > > > > > > >
> > > >> > > > > > > > -#include <rte_debug.h>
> > > >> > > > > > > > -#include <rte_atomic.h>
> > > >> > > > > > > > -#include <rte_launch.h>
> > > >> > > > > > > > -#include <rte_log.h>
> > > >> > > > > > > > -#include <rte_memory.h>
> > > >> > > > > > > > -#include <rte_memzone.h>
> > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > >> > > > > > > > -#include <rte_eal.h>
> > > >> > > > > > > > -#include <rte_per_lcore.h>
> > > >> > > > > > > > -#include <rte_lcore.h>
> > > >> > > > > > > > -
> > > >> > > > > > > >  #include "eal_private.h"
> > > >> > > > > > > >  #include "eal_thread.h"
> > > >> > > > > > > >
> > > >> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) =
> LCORE_ID_ANY;
> > > >> > > > > > > NAK, these are exported symbols, you can't remove them
> without
> > > >> > > going
> > > >> > > > > > > through the
> > > >> > > > > > > deprecation process.
> > > >> > > > > > >
> > > >> > > > > > >
> > > >> > > > > > They are not removed/deleted, they are moved from
> eal_thread.c
> > > >> to
> > > >> > > > > > eal_common_thread.c file since it is common to both Linux
> and
> > > >> BSD.
> > > >> > > > > >
> > > >> > > > > Then perhaps you forgot to export the symbol?  Its showing
> up as
> > > >> > > removed
> > > >> > > > > on the
> > > >> > > > > ABI checker utility.
> > > >> > > > >
> > > >> > > > > Neil
> > > >> > > > >
> > > >> > > >
> > > >> > > > Can you please show me in the current code where it is being
> > > >> exported? I
> > > >> > > > have only moved definitions to _common_ files, not sure why it
> > > >> should be
> > > >> > > > exported now.  I searched in the current code for
> > > >> RTE_DEFINE_PER_LCORE
> > > >> > > >
> > > >> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
> > > >> > > > app/test/test_per_lcore.c:static
> RTE_DEFINE_PER_LCORE(unsigned,
> > > >> test) =
> > > >> > > > 0x12345678;
> > > >> > > >
> > > >>
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > >> > > >
> > > >>
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > >> > > >
> > > >> > >
> > > >>
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > >> > > > _cpuset);
> > > >> > > >
> > > >>
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > >> > > > _lcore_id) = LCORE_ID_ANY;
> > > >> > > >
> > > >>
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > >> > > >
> > > >>
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > >> > > > _cpuset);
> > > >> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > > >> > > > RTE_DEFINE_PER_LCORE(type, name)            \
> > > >> > > > lib/librte_eal/common/include/rte_eal.h:    static
> > > >> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > > >> > > >
> lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > > >> > > > _rte_errno);
> > > >> > > > lib/librte_eal/common/eal_common_errno.c:    static
> > > >> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> > > >> > > >
> > > >> > > >
> > > >> > > > > > Thanks
> > > >> > > > > > Ravi
> > > >> > > > > >
> > > >> > > > > > Regards
> > > >> > > > > > > Neil
> > > >> > > > > > >
> > > >> > > > > > >
> > > >> > > > >
> > > >> > > Its exported in the version map file:
> > > >> > >  per_lcore__lcore_id;
> > > >> > >
> > > >> > >
> > > >> > Thanks Neil, I checked and both linux and bsd rte_eal_version.map
> have
> > > >> it.
> > > >> > I compared .map file between "changed code" and the original,
> they are
> > > >> same
> > > >> > for both linux and bsd. In fact you had ACK'd v4 version of this
> patch
> > > >> > series and no major changes after that. Please let me know if I
> missed
> > > >> > something.
> > > >> >
> > > >> I did, and I'm retracting that, because I didn't think to check the
> ABI
> > > >> compatibility on this.  But I ran it throught the ABI checking
> script
> > > >> this and
> > > >> this error popped out.  You should run it as well, its in the
> scripts
> > > >> directory.
> > > >>
> > > >>
> > > >> I see in your first patch you removed it and re-added it in the
> common
> > > >> section.
> > > >> But something about how its building is causing it to not show up
> as an
> > > >> exported
> > > >> symbol, which is problematic, as other applications are going to
> want
> > > >> access to
> > > >> it.
> > > >>
> > > >> It also possible that the ABI checker is throwing a false positive,
> but
> > > >> either
> > > >> way, it needs to be looked into prior to moving forward with this.
> > > >>
> > > >>
> > > > I did following things.
> > > >
> > > > Put a tag (v2.0.0-before-common-eal)  before EAL common functions
> changes
> > > > for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net: remove
> unneeded
> > > > include)
> > > >
> > > > Put a tag (v2.0.0-common-eal) after EAL common functions changes for
> > > > commit (25737e5a7212630a7b5d8ca756860a062f403789 Move common
> functions in
> > > > eal_pci.c)
> > > >
> > > > Ran validate-abi against x86_64-native-linuxapp-gcc and
> > > >
> > > > v2.0.0-rc3 and v2.0.0-before-common-eal, html report for
> librte_eal.so
> > > > shows removed symbols for "per_lcore__cpuset"
> > > >
> > > > v2.0.0-rc3 and v2.0.0-common-eal, html report for librte_eal.so shows
> > > > removed symbols for "per_lcore__cpuset"
> > > >
> > > > Removed symbol is different from what you have reported and in my
> case I
> > > > see it even before my commit. If you are interested I can unicast
> you html
> > > > report file. Please let me know how to proceed.
> > > >
> > > >
> > >
> > > I did some experiment and found some interesting things.  I will take
> eal.c
> > > as an example
> > >
> > > eal.c is split into eal_common_sysfs.c eal_common_mem_cfg.c
> > > eal_common_proc_type.c and eal_common_app_usage.c. In
> linuxapp/eal/Makefile
> > > if I compile new files right after eal.c as shown below
> > >
> > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
> > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
> > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
> > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_proc_type.c
> > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_app_usage.c
> > > ...
> > >
> > > validate-abi results matches baseline. Instead if i place new _common_
> > > files in common area in linuxapp/eal/Makefile as shown below
> > >
> > > # from common dir
> > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
> > > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
> > > ...
> > >
> > > validate-abi reports problem in binary compatibility and source
> > > compatiblity
> > >
> > > eal_filesystem.h, librte_eal.so.1
> > >  [+] eal_parse_sysfs_value ( char const* filename, unsigned long* val )
> > >  @@ DPDK_2.0 (2)
> > >
> > > I believe files in common and linuxapp directory are compiled same way
> so
> > > not sure why placement in makefile makes difference.
> > >
> > > Could this be false-positive from validate-abi script??
> > >
> > It could be, yes.  Though I'm more inclined to think that perhaps in the
> new
> > version of the code we're not generating ithe same dwarf information out
> of it.
> > In fact for some reason, I've checked both the build before and after
> your
> > patch series, and the exported CFLAGS aren't getting passed to the build
> > properly, implying that we're not building all the code in the validator
> with
> > the -g flag, which the validator need to function properly.  I'm looking
> into
> > that
> > Neil
> >
> >
> Found the problem, I was stupidly reading the report incorrectly.  The
> problem
> regarding _lcore_id is a source compatibilty issue (because the symbol
> moved to
> a new location), which is irrelevant to us.  Its not in any way a binary
> compat
> problem, which is what we care about.  Sorry for the noise.
>
> I do still have a few concerns about some changed calling conventions with
> a few
> other functions, which I'll look into on monday.
>
>
Please let me know your inputs on changed calling conventions. Most of them
can be fixed by re-arranging moved code in _common_ files and order of
compilation.

Thanks,
Ravi

Regards
> Neil
>
>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] Beyond DPDK 2.0
  2015-04-25 13:30  3%       ` Marc Sune
@ 2015-04-25 16:08  0%         ` Wiles, Keith
  2015-04-26 21:56  0%           ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Wiles, Keith @ 2015-04-25 16:08 UTC (permalink / raw)
  To: Marc Sune, dev



On 4/25/15, 8:30 AM, "Marc Sune" <marc.sune@bisdn.de> wrote:

>
>
>On 24/04/15 19:51, Matthew Hall wrote:
>> On Fri, Apr 24, 2015 at 12:39:47PM -0500, Jay Rolette wrote:
>>> I can tell you that if DPDK were GPL-based, my company wouldn't be
>>>using
>>> it. I suspect we wouldn't be the only ones...
>>>
>>> Jay
>> I could second this, from the past employer where I used it. Right now
>>I am
>> using it in an open source app, I have a bit of GPL here and there but
>>I'm
>> trying to get rid of it or confine it to separate address spaces, where
>>it
>> won't impact the core code written around DPDK, as I don't want to cause
>> headaches for any downstream users I attract someday.
>>
>> Hard-core GPL would not be possible for most. LGPL could be possible,
>>but I
>> don't think it could be worth the relicensing headache for that small
>>change.
>>
>> Instead we should make the patch process as easy as humanly possible so
>>people
>> are encouraged to send us the fixes and not cart them around their
>>companies
>> constantly.

+1 and besides the GPL or LGPL ship has sailed IMHO and we can not go back.
>
>I agree. My feeling is that as the number of patches in the mailing list
>grows, keeping track of them gets more and more complicated. Patchwork
>website was a way to try to address this issue. I think it was an
>improvement, but to be honest, patchwork lacks a lot of functionality,
>such as properly tracking multiple versions of the patch (superseding
>them automatically), and it lacks some filtering capabilities e.g. per
>user, per tag/label or library, automatically track if it has been
>merged, give an overall status of the pending vs merged patches, set
>milestones... Is there any alternative tool or improved version for that?

I agree patchwork has some limitation, but I think the biggest issue is
keeping up with the patches. Getting patches introduced into the main line
is very slow. A patch submitted today may not get applied for weeks or
months, then when another person submits a patch he is starting to run a
very high risk of having to redo that patch, because a pervious patch
makes his fail weeks/months later. I would love to see a better tool then
patchwork, but the biggest issue is we have a huge backlog of patches.
Personally I am not sure how Thomas or any is able to keep up with the
patches.

The other problem I see is how patches are agreed on to be included in the
mainline. Today it is just an ACK or a NAK on the mailing list. Then I see
what I think to be only a few people ACKing or NAKing patches. This
process has a lot of problems from a patch being ignore for some reason or
someone having negative feed back on very minor detail or no way to push a
patch forward a single NAK or comment.

I would like to see some type of layering process to allow patches to be
applied in a timely manner a few weeks not months or completely ignored.
Maybe some type of voting is reasonable, but we need to do something to
turn around the patches in clean reasonable manner.

Think we need some type of group meeting every week to look at the patches
and determining which ones get applied, this gives quick feedback to the
submitter as to the status of the patch.

>
>On the other side, since user questions, community discussions and
>development happens in the same mailing list, things get really
>complicated, specially for users seeking for help. Even though I think
>the average skills of the users of DPDK is generally higher than in
>other software projects, if DPDK wants to attract more users, having a
>better user support is key, IMHO.
>
>So I would see with good eyes a separation between, at least, dpdk-user
>and dpdk-dev.

I do not remember seeing too many users on the list and making a list just
for then is OK if everyone is fine with a list that has very few emails.
>
>If the number of patches keeps growing, splitting the "dev" mailing
>lists into different categories (eal and common, pmds, higher level
>abstractions...) could be an option. However, this last point opens a
>lot of questions on how to minimize interference between the different
>parts and API/ABI compatibility during the development.

I believe if we just make sure we use tags in the subject line then we can
have our email clients do the splitting of the emails instead of adding
more emails lists.

>
>>
>> Perhaps it means having some ReviewBoard type of tools, a clone in
>>Github or
>> Bitbucket where the less hardcore kernel-workflow types could send back
>>their
>> small bug fixes a bit more easily, this kind of stuff. Google has been
>>getting
>> good uptake since they moved most of their open source across to Github,
>> because the contribution workflow was more convenient than Google Code
>>was.

I like GitHub it is a much better designed tool then patchwork, plus it
could get more eyes as it is very well know to the developer community in
general. I feel GitHub has many advantages over the current systems in
place but, it does not solve the all patch issues.

The only way we can get patch issues resolved is to put a bit more process
in place.
>
>Although I agree, we have to be careful on how github or bitbucket is
>used. Having issues or even (e.g. github) pull requests *in addition* to
>the normal contribution workflow can be a nightmare to deal with, in
>terms of synchronization and preventing double work. So I guess setting
>up an official github or bitbucket mirror would be fine, via some simple
>cronjob, but I guess it would end-up not using PRs or issues in github
>like the Linux kernel does.

>From what I can tell GitHub seems to be a better solution for a free open
environment. Bitbucket I have never used and GitHub seems more popular
from one article I read.

https://www.google.com/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UTF-8#
q=bitbucket%20vs%20github


>Btw, is this github organization already registered by Intel or some
>other company of the community?
>
>https://github.com/dpdk
>
>Marc

If we can used the above that would be great, but a name like
Œdpdk-community¹ or something could work too.

We can host the web site here and have many sub-projects like Pktgen-DPDK
:-) under the same page. Not to say anything bad about our current web
pages as I find it difficult to use sometimes and find things like
patchwork link. Maintaining a web site is a full time job and GitHub does
maintain the site, plus we can collaborate on host web page on the GitHub
site easier.

Moving to the Linux Foundation is an option as well as it is very well
know and has some nice ways to get your project promoted. It does have a
few drawbacks in process handling and cost to state a few. The process
model is all ready defined, which is good and bad it just depends on your
needs IMO.

Regards,
++Keith

>
>>
>> Matthew.
>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] Beyond DPDK 2.0
  @ 2015-04-25 13:30  3%       ` Marc Sune
  2015-04-25 16:08  0%         ` Wiles, Keith
  0 siblings, 1 reply; 200+ results
From: Marc Sune @ 2015-04-25 13:30 UTC (permalink / raw)
  To: dev



On 24/04/15 19:51, Matthew Hall wrote:
> On Fri, Apr 24, 2015 at 12:39:47PM -0500, Jay Rolette wrote:
>> I can tell you that if DPDK were GPL-based, my company wouldn't be using
>> it. I suspect we wouldn't be the only ones...
>>
>> Jay
> I could second this, from the past employer where I used it. Right now I am
> using it in an open source app, I have a bit of GPL here and there but I'm
> trying to get rid of it or confine it to separate address spaces, where it
> won't impact the core code written around DPDK, as I don't want to cause
> headaches for any downstream users I attract someday.
>
> Hard-core GPL would not be possible for most. LGPL could be possible, but I
> don't think it could be worth the relicensing headache for that small change.
>
> Instead we should make the patch process as easy as humanly possible so people
> are encouraged to send us the fixes and not cart them around their companies
> constantly.

I agree. My feeling is that as the number of patches in the mailing list 
grows, keeping track of them gets more and more complicated. Patchwork 
website was a way to try to address this issue. I think it was an 
improvement, but to be honest, patchwork lacks a lot of functionality, 
such as properly tracking multiple versions of the patch (superseding 
them automatically), and it lacks some filtering capabilities e.g. per 
user, per tag/label or library, automatically track if it has been 
merged, give an overall status of the pending vs merged patches, set 
milestones... Is there any alternative tool or improved version for that?

On the other side, since user questions, community discussions and 
development happens in the same mailing list, things get really 
complicated, specially for users seeking for help. Even though I think 
the average skills of the users of DPDK is generally higher than in 
other software projects, if DPDK wants to attract more users, having a 
better user support is key, IMHO.

So I would see with good eyes a separation between, at least, dpdk-user 
and dpdk-dev.

If the number of patches keeps growing, splitting the "dev" mailing 
lists into different categories (eal and common, pmds, higher level 
abstractions...) could be an option. However, this last point opens a 
lot of questions on how to minimize interference between the different 
parts and API/ABI compatibility during the development.

>
> Perhaps it means having some ReviewBoard type of tools, a clone in Github or
> Bitbucket where the less hardcore kernel-workflow types could send back their
> small bug fixes a bit more easily, this kind of stuff. Google has been getting
> good uptake since they moved most of their open source across to Github,
> because the contribution workflow was more convenient than Google Code was.

Although I agree, we have to be careful on how github or bitbucket is 
used. Having issues or even (e.g. github) pull requests *in addition* to 
the normal contribution workflow can be a nightmare to deal with, in 
terms of synchronization and preventing double work. So I guess setting 
up an official github or bitbucket mirror would be fine, via some simple 
cronjob, but I guess it would end-up not using PRs or issues in github 
like the Linux kernel does.

Btw, is this github organization already registered by Intel or some 
other company of the community?

https://github.com/dpdk

Marc

>
> Matthew.

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-25 12:32  0%                     ` Neil Horman
@ 2015-04-25 13:02  0%                       ` Neil Horman
  2015-04-26  0:09  0%                         ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-25 13:02 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Sat, Apr 25, 2015 at 08:32:42AM -0400, Neil Horman wrote:
> On Fri, Apr 24, 2015 at 06:45:06PM -0700, Ravi Kerur wrote:
> > On Fri, Apr 24, 2015 at 2:24 PM, Ravi Kerur <rkerur@gmail.com> wrote:
> > 
> > >
> > >
> > > On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <nhorman@tuxdriver.com>
> > > wrote:
> > >
> > >> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> > >> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <nhorman@tuxdriver.com>
> > >> wrote:
> > >> >
> > >> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> > >> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <nhorman@tuxdriver.com
> > >> >
> > >> > > wrote:
> > >> > > >
> > >> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> > >> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
> > >> nhorman@tuxdriver.com>
> > >> > > > > wrote:
> > >> > > > > >
> > >> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur wrote:
> > >> > > > > > > > Changes in v7
> > >> > > > > > > > Remove _setname_ pthread calls.
> > >> > > > > > > > Use rte_gettid() API in RTE_LOG to print thread_id.
> > >> > > > > > > >
> > >> > > > > > > > Changes in v6
> > >> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
> > >> > > > > > > > Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
> > >> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG when
> > >> available.
> > >> > > > > > > > Use existing rte_get_systid() in RTE_LOG to print thread_id.
> > >> > > > > > > >
> > >> > > > > > > > Changes in v5
> > >> > > > > > > > Rebase to latest code.
> > >> > > > > > > >
> > >> > > > > > > > Changes in v4
> > >> > > > > > > > None
> > >> > > > > > > >
> > >> > > > > > > > Changes in v3
> > >> > > > > > > > Changed subject to be more explicit on file name inclusion.
> > >> > > > > > > >
> > >> > > > > > > > Changes in v2
> > >> > > > > > > > None
> > >> > > > > > > >
> > >> > > > > > > > Changes in v1
> > >> > > > > > > > eal_thread.c has minor differences between Linux and BSD,
> > >> move
> > >> > > > > > > > entire file into common directory.
> > >> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor
> > >> differences.
> > >> > > > > > > > Rename eal_thread.c to eal_common_thread.c
> > >> > > > > > > > Makefile changes to reflect file move and name change.
> > >> > > > > > > > Fix checkpatch warnings.
> > >> > > > > > > >
> > >> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > >> > > > > > > > ---
> > >> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> > >> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > >> > > > > > > ------------------------------
> > >> > > > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > >> > > > > > > ++++++++++++++++++++++++++++-
> > >> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > >> > > > > > > +-----------------------------
> > >> > > > > > > >  4 files changed, 148 insertions(+), 305 deletions(-)
> > >> > > > > > > >
> > >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > >> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > >> > > > > > > > index 2357cfa..55971b9 100644
> > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > >> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
> > >> > > > > > > >  # workaround for a gcc bug with noreturn attribute
> > >> > > > > > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > >> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > >> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > >> > > > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > >> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > >> > > > > > > >  endif
> > >> > > > > > > >
> > >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > >> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > >> > > > > > > > index 9a03437..5714b8f 100644
> > >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > >> > > > > > > > @@ -35,163 +35,11 @@
> > >> > > > > > > >  #include <stdio.h>
> > >> > > > > > > >  #include <stdlib.h>
> > >> > > > > > > >  #include <stdint.h>
> > >> > > > > > > > -#include <unistd.h>
> > >> > > > > > > > -#include <sched.h>
> > >> > > > > > > > -#include <pthread_np.h>
> > >> > > > > > > > -#include <sys/queue.h>
> > >> > > > > > > >  #include <sys/thr.h>
> > >> > > > > > > >
> > >> > > > > > > > -#include <rte_debug.h>
> > >> > > > > > > > -#include <rte_atomic.h>
> > >> > > > > > > > -#include <rte_launch.h>
> > >> > > > > > > > -#include <rte_log.h>
> > >> > > > > > > > -#include <rte_memory.h>
> > >> > > > > > > > -#include <rte_memzone.h>
> > >> > > > > > > > -#include <rte_per_lcore.h>
> > >> > > > > > > > -#include <rte_eal.h>
> > >> > > > > > > > -#include <rte_per_lcore.h>
> > >> > > > > > > > -#include <rte_lcore.h>
> > >> > > > > > > > -
> > >> > > > > > > >  #include "eal_private.h"
> > >> > > > > > > >  #include "eal_thread.h"
> > >> > > > > > > >
> > >> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
> > >> > > > > > > NAK, these are exported symbols, you can't remove them without
> > >> > > going
> > >> > > > > > > through the
> > >> > > > > > > deprecation process.
> > >> > > > > > >
> > >> > > > > > >
> > >> > > > > > They are not removed/deleted, they are moved from eal_thread.c
> > >> to
> > >> > > > > > eal_common_thread.c file since it is common to both Linux and
> > >> BSD.
> > >> > > > > >
> > >> > > > > Then perhaps you forgot to export the symbol?  Its showing up as
> > >> > > removed
> > >> > > > > on the
> > >> > > > > ABI checker utility.
> > >> > > > >
> > >> > > > > Neil
> > >> > > > >
> > >> > > >
> > >> > > > Can you please show me in the current code where it is being
> > >> exported? I
> > >> > > > have only moved definitions to _common_ files, not sure why it
> > >> should be
> > >> > > > exported now.  I searched in the current code for
> > >> RTE_DEFINE_PER_LCORE
> > >> > > >
> > >> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
> > >> > > > app/test/test_per_lcore.c:static RTE_DEFINE_PER_LCORE(unsigned,
> > >> test) =
> > >> > > > 0x12345678;
> > >> > > >
> > >> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > >> > > > _lcore_id) = LCORE_ID_ANY;
> > >> > > >
> > >> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > >> > > >
> > >> > >
> > >> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > >> > > > _cpuset);
> > >> > > >
> > >> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > >> > > > _lcore_id) = LCORE_ID_ANY;
> > >> > > >
> > >> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > >> > > >
> > >> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > >> > > > _cpuset);
> > >> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > >> > > > RTE_DEFINE_PER_LCORE(type, name)            \
> > >> > > > lib/librte_eal/common/include/rte_eal.h:    static
> > >> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > >> > > > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > >> > > > _rte_errno);
> > >> > > > lib/librte_eal/common/eal_common_errno.c:    static
> > >> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> > >> > > >
> > >> > > >
> > >> > > > > > Thanks
> > >> > > > > > Ravi
> > >> > > > > >
> > >> > > > > > Regards
> > >> > > > > > > Neil
> > >> > > > > > >
> > >> > > > > > >
> > >> > > > >
> > >> > > Its exported in the version map file:
> > >> > >  per_lcore__lcore_id;
> > >> > >
> > >> > >
> > >> > Thanks Neil, I checked and both linux and bsd rte_eal_version.map have
> > >> it.
> > >> > I compared .map file between "changed code" and the original, they are
> > >> same
> > >> > for both linux and bsd. In fact you had ACK'd v4 version of this patch
> > >> > series and no major changes after that. Please let me know if I missed
> > >> > something.
> > >> >
> > >> I did, and I'm retracting that, because I didn't think to check the ABI
> > >> compatibility on this.  But I ran it throught the ABI checking script
> > >> this and
> > >> this error popped out.  You should run it as well, its in the scripts
> > >> directory.
> > >>
> > >>
> > >> I see in your first patch you removed it and re-added it in the common
> > >> section.
> > >> But something about how its building is causing it to not show up as an
> > >> exported
> > >> symbol, which is problematic, as other applications are going to want
> > >> access to
> > >> it.
> > >>
> > >> It also possible that the ABI checker is throwing a false positive, but
> > >> either
> > >> way, it needs to be looked into prior to moving forward with this.
> > >>
> > >>
> > > I did following things.
> > >
> > > Put a tag (v2.0.0-before-common-eal)  before EAL common functions changes
> > > for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net: remove unneeded
> > > include)
> > >
> > > Put a tag (v2.0.0-common-eal) after EAL common functions changes for
> > > commit (25737e5a7212630a7b5d8ca756860a062f403789 Move common functions in
> > > eal_pci.c)
> > >
> > > Ran validate-abi against x86_64-native-linuxapp-gcc and
> > >
> > > v2.0.0-rc3 and v2.0.0-before-common-eal, html report for librte_eal.so
> > > shows removed symbols for "per_lcore__cpuset"
> > >
> > > v2.0.0-rc3 and v2.0.0-common-eal, html report for librte_eal.so shows
> > > removed symbols for "per_lcore__cpuset"
> > >
> > > Removed symbol is different from what you have reported and in my case I
> > > see it even before my commit. If you are interested I can unicast you html
> > > report file. Please let me know how to proceed.
> > >
> > >
> > 
> > I did some experiment and found some interesting things.  I will take eal.c
> > as an example
> > 
> > eal.c is split into eal_common_sysfs.c eal_common_mem_cfg.c
> > eal_common_proc_type.c and eal_common_app_usage.c. In linuxapp/eal/Makefile
> > if I compile new files right after eal.c as shown below
> > 
> > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
> > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
> > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
> > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_proc_type.c
> > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_app_usage.c
> > ...
> > 
> > validate-abi results matches baseline. Instead if i place new _common_
> > files in common area in linuxapp/eal/Makefile as shown below
> > 
> > # from common dir
> > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
> > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
> > ...
> > 
> > validate-abi reports problem in binary compatibility and source
> > compatiblity
> > 
> > eal_filesystem.h, librte_eal.so.1
> >  [+] eal_parse_sysfs_value ( char const* filename, unsigned long* val )
> >  @@ DPDK_2.0 (2)
> > 
> > I believe files in common and linuxapp directory are compiled same way so
> > not sure why placement in makefile makes difference.
> > 
> > Could this be false-positive from validate-abi script??
> > 
> It could be, yes.  Though I'm more inclined to think that perhaps in the new
> version of the code we're not generating ithe same dwarf information out of it.
> In fact for some reason, I've checked both the build before and after your
> patch series, and the exported CFLAGS aren't getting passed to the build
> properly, implying that we're not building all the code in the validator with
> the -g flag, which the validator need to function properly.  I'm looking into
> that
> Neil
> 
> 
Found the problem, I was stupidly reading the report incorrectly.  The problem
regarding _lcore_id is a source compatibilty issue (because the symbol moved to
a new location), which is irrelevant to us.  Its not in any way a binary compat
problem, which is what we care about.  Sorry for the noise.

I do still have a few concerns about some changed calling conventions with a few
other functions, which I'll look into on monday.

Regards
Neil

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-25  1:45  4%                   ` Ravi Kerur
@ 2015-04-25 12:32  0%                     ` Neil Horman
  2015-04-25 13:02  0%                       ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-25 12:32 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Fri, Apr 24, 2015 at 06:45:06PM -0700, Ravi Kerur wrote:
> On Fri, Apr 24, 2015 at 2:24 PM, Ravi Kerur <rkerur@gmail.com> wrote:
> 
> >
> >
> > On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <nhorman@tuxdriver.com>
> > wrote:
> >
> >> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> >> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <nhorman@tuxdriver.com>
> >> wrote:
> >> >
> >> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> >> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <nhorman@tuxdriver.com
> >> >
> >> > > wrote:
> >> > > >
> >> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> >> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
> >> nhorman@tuxdriver.com>
> >> > > > > wrote:
> >> > > > > >
> >> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur wrote:
> >> > > > > > > > Changes in v7
> >> > > > > > > > Remove _setname_ pthread calls.
> >> > > > > > > > Use rte_gettid() API in RTE_LOG to print thread_id.
> >> > > > > > > >
> >> > > > > > > > Changes in v6
> >> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
> >> > > > > > > > Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
> >> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG when
> >> available.
> >> > > > > > > > Use existing rte_get_systid() in RTE_LOG to print thread_id.
> >> > > > > > > >
> >> > > > > > > > Changes in v5
> >> > > > > > > > Rebase to latest code.
> >> > > > > > > >
> >> > > > > > > > Changes in v4
> >> > > > > > > > None
> >> > > > > > > >
> >> > > > > > > > Changes in v3
> >> > > > > > > > Changed subject to be more explicit on file name inclusion.
> >> > > > > > > >
> >> > > > > > > > Changes in v2
> >> > > > > > > > None
> >> > > > > > > >
> >> > > > > > > > Changes in v1
> >> > > > > > > > eal_thread.c has minor differences between Linux and BSD,
> >> move
> >> > > > > > > > entire file into common directory.
> >> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor
> >> differences.
> >> > > > > > > > Rename eal_thread.c to eal_common_thread.c
> >> > > > > > > > Makefile changes to reflect file move and name change.
> >> > > > > > > > Fix checkpatch warnings.
> >> > > > > > > >
> >> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> >> > > > > > > > ---
> >> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> >> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> >> > > > > > > ------------------------------
> >> > > > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> >> > > > > > > ++++++++++++++++++++++++++++-
> >> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> >> > > > > > > +-----------------------------
> >> > > > > > > >  4 files changed, 148 insertions(+), 305 deletions(-)
> >> > > > > > > >
> >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> >> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> >> > > > > > > > index 2357cfa..55971b9 100644
> >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> >> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
> >> > > > > > > >  # workaround for a gcc bug with noreturn attribute
> >> > > > > > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> >> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> >> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> >> > > > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> >> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> >> > > > > > > >  endif
> >> > > > > > > >
> >> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> >> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> >> > > > > > > > index 9a03437..5714b8f 100644
> >> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> >> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> >> > > > > > > > @@ -35,163 +35,11 @@
> >> > > > > > > >  #include <stdio.h>
> >> > > > > > > >  #include <stdlib.h>
> >> > > > > > > >  #include <stdint.h>
> >> > > > > > > > -#include <unistd.h>
> >> > > > > > > > -#include <sched.h>
> >> > > > > > > > -#include <pthread_np.h>
> >> > > > > > > > -#include <sys/queue.h>
> >> > > > > > > >  #include <sys/thr.h>
> >> > > > > > > >
> >> > > > > > > > -#include <rte_debug.h>
> >> > > > > > > > -#include <rte_atomic.h>
> >> > > > > > > > -#include <rte_launch.h>
> >> > > > > > > > -#include <rte_log.h>
> >> > > > > > > > -#include <rte_memory.h>
> >> > > > > > > > -#include <rte_memzone.h>
> >> > > > > > > > -#include <rte_per_lcore.h>
> >> > > > > > > > -#include <rte_eal.h>
> >> > > > > > > > -#include <rte_per_lcore.h>
> >> > > > > > > > -#include <rte_lcore.h>
> >> > > > > > > > -
> >> > > > > > > >  #include "eal_private.h"
> >> > > > > > > >  #include "eal_thread.h"
> >> > > > > > > >
> >> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
> >> > > > > > > NAK, these are exported symbols, you can't remove them without
> >> > > going
> >> > > > > > > through the
> >> > > > > > > deprecation process.
> >> > > > > > >
> >> > > > > > >
> >> > > > > > They are not removed/deleted, they are moved from eal_thread.c
> >> to
> >> > > > > > eal_common_thread.c file since it is common to both Linux and
> >> BSD.
> >> > > > > >
> >> > > > > Then perhaps you forgot to export the symbol?  Its showing up as
> >> > > removed
> >> > > > > on the
> >> > > > > ABI checker utility.
> >> > > > >
> >> > > > > Neil
> >> > > > >
> >> > > >
> >> > > > Can you please show me in the current code where it is being
> >> exported? I
> >> > > > have only moved definitions to _common_ files, not sure why it
> >> should be
> >> > > > exported now.  I searched in the current code for
> >> RTE_DEFINE_PER_LCORE
> >> > > >
> >> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
> >> > > > app/test/test_per_lcore.c:static RTE_DEFINE_PER_LCORE(unsigned,
> >> test) =
> >> > > > 0x12345678;
> >> > > >
> >> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> >> > > > _lcore_id) = LCORE_ID_ANY;
> >> > > >
> >> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> >> > > >
> >> > >
> >> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> >> > > > _cpuset);
> >> > > >
> >> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> >> > > > _lcore_id) = LCORE_ID_ANY;
> >> > > >
> >> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> >> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> >> > > >
> >> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> >> > > > _cpuset);
> >> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> >> > > > RTE_DEFINE_PER_LCORE(type, name)            \
> >> > > > lib/librte_eal/common/include/rte_eal.h:    static
> >> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> >> > > > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> >> > > > _rte_errno);
> >> > > > lib/librte_eal/common/eal_common_errno.c:    static
> >> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> >> > > >
> >> > > >
> >> > > > > > Thanks
> >> > > > > > Ravi
> >> > > > > >
> >> > > > > > Regards
> >> > > > > > > Neil
> >> > > > > > >
> >> > > > > > >
> >> > > > >
> >> > > Its exported in the version map file:
> >> > >  per_lcore__lcore_id;
> >> > >
> >> > >
> >> > Thanks Neil, I checked and both linux and bsd rte_eal_version.map have
> >> it.
> >> > I compared .map file between "changed code" and the original, they are
> >> same
> >> > for both linux and bsd. In fact you had ACK'd v4 version of this patch
> >> > series and no major changes after that. Please let me know if I missed
> >> > something.
> >> >
> >> I did, and I'm retracting that, because I didn't think to check the ABI
> >> compatibility on this.  But I ran it throught the ABI checking script
> >> this and
> >> this error popped out.  You should run it as well, its in the scripts
> >> directory.
> >>
> >>
> >> I see in your first patch you removed it and re-added it in the common
> >> section.
> >> But something about how its building is causing it to not show up as an
> >> exported
> >> symbol, which is problematic, as other applications are going to want
> >> access to
> >> it.
> >>
> >> It also possible that the ABI checker is throwing a false positive, but
> >> either
> >> way, it needs to be looked into prior to moving forward with this.
> >>
> >>
> > I did following things.
> >
> > Put a tag (v2.0.0-before-common-eal)  before EAL common functions changes
> > for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net: remove unneeded
> > include)
> >
> > Put a tag (v2.0.0-common-eal) after EAL common functions changes for
> > commit (25737e5a7212630a7b5d8ca756860a062f403789 Move common functions in
> > eal_pci.c)
> >
> > Ran validate-abi against x86_64-native-linuxapp-gcc and
> >
> > v2.0.0-rc3 and v2.0.0-before-common-eal, html report for librte_eal.so
> > shows removed symbols for "per_lcore__cpuset"
> >
> > v2.0.0-rc3 and v2.0.0-common-eal, html report for librte_eal.so shows
> > removed symbols for "per_lcore__cpuset"
> >
> > Removed symbol is different from what you have reported and in my case I
> > see it even before my commit. If you are interested I can unicast you html
> > report file. Please let me know how to proceed.
> >
> >
> 
> I did some experiment and found some interesting things.  I will take eal.c
> as an example
> 
> eal.c is split into eal_common_sysfs.c eal_common_mem_cfg.c
> eal_common_proc_type.c and eal_common_app_usage.c. In linuxapp/eal/Makefile
> if I compile new files right after eal.c as shown below
> 
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_proc_type.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_app_usage.c
> ...
> 
> validate-abi results matches baseline. Instead if i place new _common_
> files in common area in linuxapp/eal/Makefile as shown below
> 
> # from common dir
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
> ...
> 
> validate-abi reports problem in binary compatibility and source
> compatiblity
> 
> eal_filesystem.h, librte_eal.so.1
>  [+] eal_parse_sysfs_value ( char const* filename, unsigned long* val )
>  @@ DPDK_2.0 (2)
> 
> I believe files in common and linuxapp directory are compiled same way so
> not sure why placement in makefile makes difference.
> 
> Could this be false-positive from validate-abi script??
> 
It could be, yes.  Though I'm more inclined to think that perhaps in the new
version of the code we're not generating ithe same dwarf information out of it.
In fact for some reason, I've checked both the build before and after your
patch series, and the exported CFLAGS aren't getting passed to the build
properly, implying that we're not building all the code in the validator with
the -g flag, which the validator need to function properly.  I'm looking into
that
Neil

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-24 21:24  3%                 ` Ravi Kerur
@ 2015-04-25  1:45  4%                   ` Ravi Kerur
  2015-04-25 12:32  0%                     ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Ravi Kerur @ 2015-04-25  1:45 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Fri, Apr 24, 2015 at 2:24 PM, Ravi Kerur <rkerur@gmail.com> wrote:

>
>
> On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
>
>> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
>> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <nhorman@tuxdriver.com>
>> wrote:
>> >
>> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
>> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <nhorman@tuxdriver.com
>> >
>> > > wrote:
>> > > >
>> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
>> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
>> nhorman@tuxdriver.com>
>> > > > > wrote:
>> > > > > >
>> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur wrote:
>> > > > > > > > Changes in v7
>> > > > > > > > Remove _setname_ pthread calls.
>> > > > > > > > Use rte_gettid() API in RTE_LOG to print thread_id.
>> > > > > > > >
>> > > > > > > > Changes in v6
>> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
>> > > > > > > > Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
>> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG when
>> available.
>> > > > > > > > Use existing rte_get_systid() in RTE_LOG to print thread_id.
>> > > > > > > >
>> > > > > > > > Changes in v5
>> > > > > > > > Rebase to latest code.
>> > > > > > > >
>> > > > > > > > Changes in v4
>> > > > > > > > None
>> > > > > > > >
>> > > > > > > > Changes in v3
>> > > > > > > > Changed subject to be more explicit on file name inclusion.
>> > > > > > > >
>> > > > > > > > Changes in v2
>> > > > > > > > None
>> > > > > > > >
>> > > > > > > > Changes in v1
>> > > > > > > > eal_thread.c has minor differences between Linux and BSD,
>> move
>> > > > > > > > entire file into common directory.
>> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor
>> differences.
>> > > > > > > > Rename eal_thread.c to eal_common_thread.c
>> > > > > > > > Makefile changes to reflect file move and name change.
>> > > > > > > > Fix checkpatch warnings.
>> > > > > > > >
>> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
>> > > > > > > > ---
>> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
>> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
>> > > > > > > ------------------------------
>> > > > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
>> > > > > > > ++++++++++++++++++++++++++++-
>> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
>> > > > > > > +-----------------------------
>> > > > > > > >  4 files changed, 148 insertions(+), 305 deletions(-)
>> > > > > > > >
>> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
>> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
>> > > > > > > > index 2357cfa..55971b9 100644
>> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
>> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
>> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
>> > > > > > > >  # workaround for a gcc bug with noreturn attribute
>> > > > > > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
>> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
>> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
>> > > > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
>> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
>> > > > > > > >  endif
>> > > > > > > >
>> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
>> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
>> > > > > > > > index 9a03437..5714b8f 100644
>> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
>> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
>> > > > > > > > @@ -35,163 +35,11 @@
>> > > > > > > >  #include <stdio.h>
>> > > > > > > >  #include <stdlib.h>
>> > > > > > > >  #include <stdint.h>
>> > > > > > > > -#include <unistd.h>
>> > > > > > > > -#include <sched.h>
>> > > > > > > > -#include <pthread_np.h>
>> > > > > > > > -#include <sys/queue.h>
>> > > > > > > >  #include <sys/thr.h>
>> > > > > > > >
>> > > > > > > > -#include <rte_debug.h>
>> > > > > > > > -#include <rte_atomic.h>
>> > > > > > > > -#include <rte_launch.h>
>> > > > > > > > -#include <rte_log.h>
>> > > > > > > > -#include <rte_memory.h>
>> > > > > > > > -#include <rte_memzone.h>
>> > > > > > > > -#include <rte_per_lcore.h>
>> > > > > > > > -#include <rte_eal.h>
>> > > > > > > > -#include <rte_per_lcore.h>
>> > > > > > > > -#include <rte_lcore.h>
>> > > > > > > > -
>> > > > > > > >  #include "eal_private.h"
>> > > > > > > >  #include "eal_thread.h"
>> > > > > > > >
>> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
>> > > > > > > NAK, these are exported symbols, you can't remove them without
>> > > going
>> > > > > > > through the
>> > > > > > > deprecation process.
>> > > > > > >
>> > > > > > >
>> > > > > > They are not removed/deleted, they are moved from eal_thread.c
>> to
>> > > > > > eal_common_thread.c file since it is common to both Linux and
>> BSD.
>> > > > > >
>> > > > > Then perhaps you forgot to export the symbol?  Its showing up as
>> > > removed
>> > > > > on the
>> > > > > ABI checker utility.
>> > > > >
>> > > > > Neil
>> > > > >
>> > > >
>> > > > Can you please show me in the current code where it is being
>> exported? I
>> > > > have only moved definitions to _common_ files, not sure why it
>> should be
>> > > > exported now.  I searched in the current code for
>> RTE_DEFINE_PER_LCORE
>> > > >
>> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
>> > > > app/test/test_per_lcore.c:static RTE_DEFINE_PER_LCORE(unsigned,
>> test) =
>> > > > 0x12345678;
>> > > >
>> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
>> > > > _lcore_id) = LCORE_ID_ANY;
>> > > >
>> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
>> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
>> > > >
>> > >
>> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
>> > > > _cpuset);
>> > > >
>> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
>> > > > _lcore_id) = LCORE_ID_ANY;
>> > > >
>> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
>> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
>> > > >
>> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
>> > > > _cpuset);
>> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
>> > > > RTE_DEFINE_PER_LCORE(type, name)            \
>> > > > lib/librte_eal/common/include/rte_eal.h:    static
>> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
>> > > > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
>> > > > _rte_errno);
>> > > > lib/librte_eal/common/eal_common_errno.c:    static
>> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
>> > > >
>> > > >
>> > > > > > Thanks
>> > > > > > Ravi
>> > > > > >
>> > > > > > Regards
>> > > > > > > Neil
>> > > > > > >
>> > > > > > >
>> > > > >
>> > > Its exported in the version map file:
>> > >  per_lcore__lcore_id;
>> > >
>> > >
>> > Thanks Neil, I checked and both linux and bsd rte_eal_version.map have
>> it.
>> > I compared .map file between "changed code" and the original, they are
>> same
>> > for both linux and bsd. In fact you had ACK'd v4 version of this patch
>> > series and no major changes after that. Please let me know if I missed
>> > something.
>> >
>> I did, and I'm retracting that, because I didn't think to check the ABI
>> compatibility on this.  But I ran it throught the ABI checking script
>> this and
>> this error popped out.  You should run it as well, its in the scripts
>> directory.
>>
>>
>> I see in your first patch you removed it and re-added it in the common
>> section.
>> But something about how its building is causing it to not show up as an
>> exported
>> symbol, which is problematic, as other applications are going to want
>> access to
>> it.
>>
>> It also possible that the ABI checker is throwing a false positive, but
>> either
>> way, it needs to be looked into prior to moving forward with this.
>>
>>
> I did following things.
>
> Put a tag (v2.0.0-before-common-eal)  before EAL common functions changes
> for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net: remove unneeded
> include)
>
> Put a tag (v2.0.0-common-eal) after EAL common functions changes for
> commit (25737e5a7212630a7b5d8ca756860a062f403789 Move common functions in
> eal_pci.c)
>
> Ran validate-abi against x86_64-native-linuxapp-gcc and
>
> v2.0.0-rc3 and v2.0.0-before-common-eal, html report for librte_eal.so
> shows removed symbols for "per_lcore__cpuset"
>
> v2.0.0-rc3 and v2.0.0-common-eal, html report for librte_eal.so shows
> removed symbols for "per_lcore__cpuset"
>
> Removed symbol is different from what you have reported and in my case I
> see it even before my commit. If you are interested I can unicast you html
> report file. Please let me know how to proceed.
>
>

I did some experiment and found some interesting things.  I will take eal.c
as an example

eal.c is split into eal_common_sysfs.c eal_common_mem_cfg.c
eal_common_proc_type.c and eal_common_app_usage.c. In linuxapp/eal/Makefile
if I compile new files right after eal.c as shown below

SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_sysfs.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_mem_cfg.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_proc_type.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_app_usage.c
...

validate-abi results matches baseline. Instead if i place new _common_
files in common area in linuxapp/eal/Makefile as shown below

# from common dir
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
...

validate-abi reports problem in binary compatibility and source
compatiblity

eal_filesystem.h, librte_eal.so.1
 [+] eal_parse_sysfs_value ( char const* filename, unsigned long* val )
 @@ DPDK_2.0 (2)

I believe files in common and linuxapp directory are compiled same way so
not sure why placement in makefile makes difference.

Could this be false-positive from validate-abi script??

> Neil
>>
>> > dpdk-common-eal-f-4 has all the common code movement changes
>> > dpdk-tmp is original code
>> >
>> > #/home/rkerur/dpdk-common-eal-f-4/dpdk# diff
>> > lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> >
>> /home/rkerur/dpdk-tmp/dpdk/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> >
>> > <No difference>
>> >
>> > #/home/rkerur/dpdk-common-eal-f-4/dpdk# diff
>> > lib/librte_eal/bsdapp/eal/rte_eal_version.map
>> > /home/rkerur/dpdk-tmp/dpdk/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>> >
>> > <No difference>
>> >
>> > Thanks,
>> > Ravi
>> >
>> > Neil
>> > >
>> > >
>>
>
>

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-24 19:51  4%               ` Neil Horman
@ 2015-04-24 21:24  3%                 ` Ravi Kerur
  2015-04-25  1:45  4%                   ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Ravi Kerur @ 2015-04-24 21:24 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Fri, Apr 24, 2015 at 12:51 PM, Neil Horman <nhorman@tuxdriver.com> wrote:

> On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> > On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
> >
> > > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> > > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <nhorman@tuxdriver.com>
> > > wrote:
> > > >
> > > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> > > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <
> nhorman@tuxdriver.com>
> > > > > wrote:
> > > > > >
> > > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur wrote:
> > > > > > > > Changes in v7
> > > > > > > > Remove _setname_ pthread calls.
> > > > > > > > Use rte_gettid() API in RTE_LOG to print thread_id.
> > > > > > > >
> > > > > > > > Changes in v6
> > > > > > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
> > > > > > > > Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
> > > > > > > > respectively. Plan to use _getname_ in RTE_LOG when
> available.
> > > > > > > > Use existing rte_get_systid() in RTE_LOG to print thread_id.
> > > > > > > >
> > > > > > > > Changes in v5
> > > > > > > > Rebase to latest code.
> > > > > > > >
> > > > > > > > Changes in v4
> > > > > > > > None
> > > > > > > >
> > > > > > > > Changes in v3
> > > > > > > > Changed subject to be more explicit on file name inclusion.
> > > > > > > >
> > > > > > > > Changes in v2
> > > > > > > > None
> > > > > > > >
> > > > > > > > Changes in v1
> > > > > > > > eal_thread.c has minor differences between Linux and BSD,
> move
> > > > > > > > entire file into common directory.
> > > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor
> differences.
> > > > > > > > Rename eal_thread.c to eal_common_thread.c
> > > > > > > > Makefile changes to reflect file move and name change.
> > > > > > > > Fix checkpatch warnings.
> > > > > > > >
> > > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > > > > ---
> > > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> > > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > > > > > > ------------------------------
> > > > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > > > > > > ++++++++++++++++++++++++++++-
> > > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > > > > > > +-----------------------------
> > > > > > > >  4 files changed, 148 insertions(+), 305 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > > index 2357cfa..55971b9 100644
> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
> > > > > > > >  # workaround for a gcc bug with noreturn attribute
> > > > > > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > > > > > >  endif
> > > > > > > >
> > > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > > index 9a03437..5714b8f 100644
> > > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > > @@ -35,163 +35,11 @@
> > > > > > > >  #include <stdio.h>
> > > > > > > >  #include <stdlib.h>
> > > > > > > >  #include <stdint.h>
> > > > > > > > -#include <unistd.h>
> > > > > > > > -#include <sched.h>
> > > > > > > > -#include <pthread_np.h>
> > > > > > > > -#include <sys/queue.h>
> > > > > > > >  #include <sys/thr.h>
> > > > > > > >
> > > > > > > > -#include <rte_debug.h>
> > > > > > > > -#include <rte_atomic.h>
> > > > > > > > -#include <rte_launch.h>
> > > > > > > > -#include <rte_log.h>
> > > > > > > > -#include <rte_memory.h>
> > > > > > > > -#include <rte_memzone.h>
> > > > > > > > -#include <rte_per_lcore.h>
> > > > > > > > -#include <rte_eal.h>
> > > > > > > > -#include <rte_per_lcore.h>
> > > > > > > > -#include <rte_lcore.h>
> > > > > > > > -
> > > > > > > >  #include "eal_private.h"
> > > > > > > >  #include "eal_thread.h"
> > > > > > > >
> > > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
> > > > > > > NAK, these are exported symbols, you can't remove them without
> > > going
> > > > > > > through the
> > > > > > > deprecation process.
> > > > > > >
> > > > > > >
> > > > > > They are not removed/deleted, they are moved from eal_thread.c to
> > > > > > eal_common_thread.c file since it is common to both Linux and
> BSD.
> > > > > >
> > > > > Then perhaps you forgot to export the symbol?  Its showing up as
> > > removed
> > > > > on the
> > > > > ABI checker utility.
> > > > >
> > > > > Neil
> > > > >
> > > >
> > > > Can you please show me in the current code where it is being
> exported? I
> > > > have only moved definitions to _common_ files, not sure why it
> should be
> > > > exported now.  I searched in the current code for
> RTE_DEFINE_PER_LCORE
> > > >
> > > > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
> > > > app/test/test_per_lcore.c:static RTE_DEFINE_PER_LCORE(unsigned,
> test) =
> > > > 0x12345678;
> > > >
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > _lcore_id) = LCORE_ID_ANY;
> > > >
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > >
> > >
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > _cpuset);
> > > > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > _lcore_id) = LCORE_ID_ANY;
> > > > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > >
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > > _cpuset);
> > > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > > > RTE_DEFINE_PER_LCORE(type, name)            \
> > > > lib/librte_eal/common/include/rte_eal.h:    static
> > > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > > > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > > > _rte_errno);
> > > > lib/librte_eal/common/eal_common_errno.c:    static
> > > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> > > >
> > > >
> > > > > > Thanks
> > > > > > Ravi
> > > > > >
> > > > > > Regards
> > > > > > > Neil
> > > > > > >
> > > > > > >
> > > > >
> > > Its exported in the version map file:
> > >  per_lcore__lcore_id;
> > >
> > >
> > Thanks Neil, I checked and both linux and bsd rte_eal_version.map have
> it.
> > I compared .map file between "changed code" and the original, they are
> same
> > for both linux and bsd. In fact you had ACK'd v4 version of this patch
> > series and no major changes after that. Please let me know if I missed
> > something.
> >
> I did, and I'm retracting that, because I didn't think to check the ABI
> compatibility on this.  But I ran it throught the ABI checking script this
> and
> this error popped out.  You should run it as well, its in the scripts
> directory.
>
>
> I see in your first patch you removed it and re-added it in the common
> section.
> But something about how its building is causing it to not show up as an
> exported
> symbol, which is problematic, as other applications are going to want
> access to
> it.
>
> It also possible that the ABI checker is throwing a false positive, but
> either
> way, it needs to be looked into prior to moving forward with this.
>
>
I did following things.

Put a tag (v2.0.0-before-common-eal)  before EAL common functions changes
for commit (3c0c807038ad642f4be7deb9370293c39d12f029 net: remove unneeded
include)

Put a tag (v2.0.0-common-eal) after EAL common functions changes for commit
(25737e5a7212630a7b5d8ca756860a062f403789 Move common functions in
eal_pci.c)

Ran validate-abi against x86_64-native-linuxapp-gcc and

v2.0.0-rc3 and v2.0.0-before-common-eal, html report for librte_eal.so
shows removed symbols for "per_lcore__cpuset"

v2.0.0-rc3 and v2.0.0-common-eal, html report for librte_eal.so shows
removed symbols for "per_lcore__cpuset"

Removed symbol is different from what you have reported and in my case I
see it even before my commit. If you are interested I can unicast you html
report file. Please let me know how to proceed.


> Neil
>
> > dpdk-common-eal-f-4 has all the common code movement changes
> > dpdk-tmp is original code
> >
> > #/home/rkerur/dpdk-common-eal-f-4/dpdk# diff
> > lib/librte_eal/linuxapp/eal/rte_eal_version.map
> >
> /home/rkerur/dpdk-tmp/dpdk/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> >
> > <No difference>
> >
> > #/home/rkerur/dpdk-common-eal-f-4/dpdk# diff
> > lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > /home/rkerur/dpdk-tmp/dpdk/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> >
> > <No difference>
> >
> > Thanks,
> > Ravi
> >
> > Neil
> > >
> > >
>

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-24 19:21  0%             ` Ravi Kerur
@ 2015-04-24 19:51  4%               ` Neil Horman
  2015-04-24 21:24  3%                 ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-24 19:51 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Fri, Apr 24, 2015 at 12:21:23PM -0700, Ravi Kerur wrote:
> On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> > > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <nhorman@tuxdriver.com>
> > wrote:
> > >
> > > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> > > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <nhorman@tuxdriver.com>
> > > > wrote:
> > > > >
> > > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur wrote:
> > > > > > > Changes in v7
> > > > > > > Remove _setname_ pthread calls.
> > > > > > > Use rte_gettid() API in RTE_LOG to print thread_id.
> > > > > > >
> > > > > > > Changes in v6
> > > > > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
> > > > > > > Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
> > > > > > > respectively. Plan to use _getname_ in RTE_LOG when available.
> > > > > > > Use existing rte_get_systid() in RTE_LOG to print thread_id.
> > > > > > >
> > > > > > > Changes in v5
> > > > > > > Rebase to latest code.
> > > > > > >
> > > > > > > Changes in v4
> > > > > > > None
> > > > > > >
> > > > > > > Changes in v3
> > > > > > > Changed subject to be more explicit on file name inclusion.
> > > > > > >
> > > > > > > Changes in v2
> > > > > > > None
> > > > > > >
> > > > > > > Changes in v1
> > > > > > > eal_thread.c has minor differences between Linux and BSD, move
> > > > > > > entire file into common directory.
> > > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor differences.
> > > > > > > Rename eal_thread.c to eal_common_thread.c
> > > > > > > Makefile changes to reflect file move and name change.
> > > > > > > Fix checkpatch warnings.
> > > > > > >
> > > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > > > ---
> > > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> > > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > > > > > ------------------------------
> > > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > > > > > ++++++++++++++++++++++++++++-
> > > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > > > > > +-----------------------------
> > > > > > >  4 files changed, 148 insertions(+), 305 deletions(-)
> > > > > > >
> > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > index 2357cfa..55971b9 100644
> > > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
> > > > > > >  # workaround for a gcc bug with noreturn attribute
> > > > > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > > > > >  endif
> > > > > > >
> > > > > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > index 9a03437..5714b8f 100644
> > > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > > @@ -35,163 +35,11 @@
> > > > > > >  #include <stdio.h>
> > > > > > >  #include <stdlib.h>
> > > > > > >  #include <stdint.h>
> > > > > > > -#include <unistd.h>
> > > > > > > -#include <sched.h>
> > > > > > > -#include <pthread_np.h>
> > > > > > > -#include <sys/queue.h>
> > > > > > >  #include <sys/thr.h>
> > > > > > >
> > > > > > > -#include <rte_debug.h>
> > > > > > > -#include <rte_atomic.h>
> > > > > > > -#include <rte_launch.h>
> > > > > > > -#include <rte_log.h>
> > > > > > > -#include <rte_memory.h>
> > > > > > > -#include <rte_memzone.h>
> > > > > > > -#include <rte_per_lcore.h>
> > > > > > > -#include <rte_eal.h>
> > > > > > > -#include <rte_per_lcore.h>
> > > > > > > -#include <rte_lcore.h>
> > > > > > > -
> > > > > > >  #include "eal_private.h"
> > > > > > >  #include "eal_thread.h"
> > > > > > >
> > > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
> > > > > > NAK, these are exported symbols, you can't remove them without
> > going
> > > > > > through the
> > > > > > deprecation process.
> > > > > >
> > > > > >
> > > > > They are not removed/deleted, they are moved from eal_thread.c to
> > > > > eal_common_thread.c file since it is common to both Linux and BSD.
> > > > >
> > > > Then perhaps you forgot to export the symbol?  Its showing up as
> > removed
> > > > on the
> > > > ABI checker utility.
> > > >
> > > > Neil
> > > >
> > >
> > > Can you please show me in the current code where it is being exported? I
> > > have only moved definitions to _common_ files, not sure why it should be
> > > exported now.  I searched in the current code for RTE_DEFINE_PER_LCORE
> > >
> > > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
> > > app/test/test_per_lcore.c:static RTE_DEFINE_PER_LCORE(unsigned, test) =
> > > 0x12345678;
> > > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > _lcore_id) = LCORE_ID_ANY;
> > > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > >
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > _cpuset);
> > > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > _lcore_id) = LCORE_ID_ANY;
> > > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > > _cpuset);
> > > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > > RTE_DEFINE_PER_LCORE(type, name)            \
> > > lib/librte_eal/common/include/rte_eal.h:    static
> > > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > > _rte_errno);
> > > lib/librte_eal/common/eal_common_errno.c:    static
> > > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> > >
> > >
> > > > > Thanks
> > > > > Ravi
> > > > >
> > > > > Regards
> > > > > > Neil
> > > > > >
> > > > > >
> > > >
> > Its exported in the version map file:
> >  per_lcore__lcore_id;
> >
> >
> Thanks Neil, I checked and both linux and bsd rte_eal_version.map have it.
> I compared .map file between "changed code" and the original, they are same
> for both linux and bsd. In fact you had ACK'd v4 version of this patch
> series and no major changes after that. Please let me know if I missed
> something.
> 
I did, and I'm retracting that, because I didn't think to check the ABI
compatibility on this.  But I ran it throught the ABI checking script this and
this error popped out.  You should run it as well, its in the scripts directory.


I see in your first patch you removed it and re-added it in the common section.
But something about how its building is causing it to not show up as an exported
symbol, which is problematic, as other applications are going to want access to
it.

It also possible that the ABI checker is throwing a false positive, but either
way, it needs to be looked into prior to moving forward with this.

Neil

> dpdk-common-eal-f-4 has all the common code movement changes
> dpdk-tmp is original code
> 
> #/home/rkerur/dpdk-common-eal-f-4/dpdk# diff
> lib/librte_eal/linuxapp/eal/rte_eal_version.map
> /home/rkerur/dpdk-tmp/dpdk/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> 
> <No difference>
> 
> #/home/rkerur/dpdk-common-eal-f-4/dpdk# diff
> lib/librte_eal/bsdapp/eal/rte_eal_version.map
> /home/rkerur/dpdk-tmp/dpdk/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> 
> <No difference>
> 
> Thanks,
> Ravi
> 
> Neil
> >
> >

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-24 18:53  0%           ` Neil Horman
@ 2015-04-24 19:21  0%             ` Ravi Kerur
  2015-04-24 19:51  4%               ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Ravi Kerur @ 2015-04-24 19:21 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Fri, Apr 24, 2015 at 11:53 AM, Neil Horman <nhorman@tuxdriver.com> wrote:

> On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> > On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
> >
> > > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> > > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <nhorman@tuxdriver.com>
> > > wrote:
> > > >
> > > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur wrote:
> > > > > > Changes in v7
> > > > > > Remove _setname_ pthread calls.
> > > > > > Use rte_gettid() API in RTE_LOG to print thread_id.
> > > > > >
> > > > > > Changes in v6
> > > > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
> > > > > > Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
> > > > > > respectively. Plan to use _getname_ in RTE_LOG when available.
> > > > > > Use existing rte_get_systid() in RTE_LOG to print thread_id.
> > > > > >
> > > > > > Changes in v5
> > > > > > Rebase to latest code.
> > > > > >
> > > > > > Changes in v4
> > > > > > None
> > > > > >
> > > > > > Changes in v3
> > > > > > Changed subject to be more explicit on file name inclusion.
> > > > > >
> > > > > > Changes in v2
> > > > > > None
> > > > > >
> > > > > > Changes in v1
> > > > > > eal_thread.c has minor differences between Linux and BSD, move
> > > > > > entire file into common directory.
> > > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor differences.
> > > > > > Rename eal_thread.c to eal_common_thread.c
> > > > > > Makefile changes to reflect file move and name change.
> > > > > > Fix checkpatch warnings.
> > > > > >
> > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > > ---
> > > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> > > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > > > > ------------------------------
> > > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > > > > ++++++++++++++++++++++++++++-
> > > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > > > > +-----------------------------
> > > > > >  4 files changed, 148 insertions(+), 305 deletions(-)
> > > > > >
> > > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > index 2357cfa..55971b9 100644
> > > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
> > > > > >  # workaround for a gcc bug with noreturn attribute
> > > > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > > > >  endif
> > > > > >
> > > > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > index 9a03437..5714b8f 100644
> > > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > > @@ -35,163 +35,11 @@
> > > > > >  #include <stdio.h>
> > > > > >  #include <stdlib.h>
> > > > > >  #include <stdint.h>
> > > > > > -#include <unistd.h>
> > > > > > -#include <sched.h>
> > > > > > -#include <pthread_np.h>
> > > > > > -#include <sys/queue.h>
> > > > > >  #include <sys/thr.h>
> > > > > >
> > > > > > -#include <rte_debug.h>
> > > > > > -#include <rte_atomic.h>
> > > > > > -#include <rte_launch.h>
> > > > > > -#include <rte_log.h>
> > > > > > -#include <rte_memory.h>
> > > > > > -#include <rte_memzone.h>
> > > > > > -#include <rte_per_lcore.h>
> > > > > > -#include <rte_eal.h>
> > > > > > -#include <rte_per_lcore.h>
> > > > > > -#include <rte_lcore.h>
> > > > > > -
> > > > > >  #include "eal_private.h"
> > > > > >  #include "eal_thread.h"
> > > > > >
> > > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
> > > > > NAK, these are exported symbols, you can't remove them without
> going
> > > > > through the
> > > > > deprecation process.
> > > > >
> > > > >
> > > > They are not removed/deleted, they are moved from eal_thread.c to
> > > > eal_common_thread.c file since it is common to both Linux and BSD.
> > > >
> > > Then perhaps you forgot to export the symbol?  Its showing up as
> removed
> > > on the
> > > ABI checker utility.
> > >
> > > Neil
> > >
> >
> > Can you please show me in the current code where it is being exported? I
> > have only moved definitions to _common_ files, not sure why it should be
> > exported now.  I searched in the current code for RTE_DEFINE_PER_LCORE
> >
> > #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
> > app/test/test_per_lcore.c:static RTE_DEFINE_PER_LCORE(unsigned, test) =
> > 0x12345678;
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > _lcore_id) = LCORE_ID_ANY;
> > lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > _socket_id) = (unsigned)SOCKET_ID_ANY;
> >
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > _cpuset);
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > _lcore_id) = LCORE_ID_ANY;
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> > _socket_id) = (unsigned)SOCKET_ID_ANY;
> > lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> > _cpuset);
> > lib/librte_eal/common/include/rte_per_lcore.h:#define
> > RTE_DEFINE_PER_LCORE(type, name)            \
> > lib/librte_eal/common/include/rte_eal.h:    static
> > RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> > lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> > _rte_errno);
> > lib/librte_eal/common/eal_common_errno.c:    static
> > RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> >
> >
> > > > Thanks
> > > > Ravi
> > > >
> > > > Regards
> > > > > Neil
> > > > >
> > > > >
> > >
> Its exported in the version map file:
>  per_lcore__lcore_id;
>
>
Thanks Neil, I checked and both linux and bsd rte_eal_version.map have it.
I compared .map file between "changed code" and the original, they are same
for both linux and bsd. In fact you had ACK'd v4 version of this patch
series and no major changes after that. Please let me know if I missed
something.

dpdk-common-eal-f-4 has all the common code movement changes
dpdk-tmp is original code

#/home/rkerur/dpdk-common-eal-f-4/dpdk# diff
lib/librte_eal/linuxapp/eal/rte_eal_version.map
/home/rkerur/dpdk-tmp/dpdk/lib/librte_eal/linuxapp/eal/rte_eal_version.map

<No difference>

#/home/rkerur/dpdk-common-eal-f-4/dpdk# diff
lib/librte_eal/bsdapp/eal/rte_eal_version.map
/home/rkerur/dpdk-tmp/dpdk/lib/librte_eal/bsdapp/eal/rte_eal_version.map

<No difference>

Thanks,
Ravi

Neil
>
>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-24 16:45  0%         ` Ravi Kerur
@ 2015-04-24 18:53  0%           ` Neil Horman
  2015-04-24 19:21  0%             ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-24 18:53 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Fri, Apr 24, 2015 at 09:45:24AM -0700, Ravi Kerur wrote:
> On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> > > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <nhorman@tuxdriver.com>
> > wrote:
> > >
> > > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur wrote:
> > > > > Changes in v7
> > > > > Remove _setname_ pthread calls.
> > > > > Use rte_gettid() API in RTE_LOG to print thread_id.
> > > > >
> > > > > Changes in v6
> > > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
> > > > > Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
> > > > > respectively. Plan to use _getname_ in RTE_LOG when available.
> > > > > Use existing rte_get_systid() in RTE_LOG to print thread_id.
> > > > >
> > > > > Changes in v5
> > > > > Rebase to latest code.
> > > > >
> > > > > Changes in v4
> > > > > None
> > > > >
> > > > > Changes in v3
> > > > > Changed subject to be more explicit on file name inclusion.
> > > > >
> > > > > Changes in v2
> > > > > None
> > > > >
> > > > > Changes in v1
> > > > > eal_thread.c has minor differences between Linux and BSD, move
> > > > > entire file into common directory.
> > > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor differences.
> > > > > Rename eal_thread.c to eal_common_thread.c
> > > > > Makefile changes to reflect file move and name change.
> > > > > Fix checkpatch warnings.
> > > > >
> > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > ---
> > > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> > > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > > > ------------------------------
> > > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > > > ++++++++++++++++++++++++++++-
> > > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > > > +-----------------------------
> > > > >  4 files changed, 148 insertions(+), 305 deletions(-)
> > > > >
> > > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > index 2357cfa..55971b9 100644
> > > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
> > > > >  # workaround for a gcc bug with noreturn attribute
> > > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > > >  endif
> > > > >
> > > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > index 9a03437..5714b8f 100644
> > > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > > @@ -35,163 +35,11 @@
> > > > >  #include <stdio.h>
> > > > >  #include <stdlib.h>
> > > > >  #include <stdint.h>
> > > > > -#include <unistd.h>
> > > > > -#include <sched.h>
> > > > > -#include <pthread_np.h>
> > > > > -#include <sys/queue.h>
> > > > >  #include <sys/thr.h>
> > > > >
> > > > > -#include <rte_debug.h>
> > > > > -#include <rte_atomic.h>
> > > > > -#include <rte_launch.h>
> > > > > -#include <rte_log.h>
> > > > > -#include <rte_memory.h>
> > > > > -#include <rte_memzone.h>
> > > > > -#include <rte_per_lcore.h>
> > > > > -#include <rte_eal.h>
> > > > > -#include <rte_per_lcore.h>
> > > > > -#include <rte_lcore.h>
> > > > > -
> > > > >  #include "eal_private.h"
> > > > >  #include "eal_thread.h"
> > > > >
> > > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
> > > > NAK, these are exported symbols, you can't remove them without going
> > > > through the
> > > > deprecation process.
> > > >
> > > >
> > > They are not removed/deleted, they are moved from eal_thread.c to
> > > eal_common_thread.c file since it is common to both Linux and BSD.
> > >
> > Then perhaps you forgot to export the symbol?  Its showing up as removed
> > on the
> > ABI checker utility.
> >
> > Neil
> >
> 
> Can you please show me in the current code where it is being exported? I
> have only moved definitions to _common_ files, not sure why it should be
> exported now.  I searched in the current code for RTE_DEFINE_PER_LCORE
> 
> #home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
> app/test/test_per_lcore.c:static RTE_DEFINE_PER_LCORE(unsigned, test) =
> 0x12345678;
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> _lcore_id) = LCORE_ID_ANY;
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> _socket_id) = (unsigned)SOCKET_ID_ANY;
> lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> _cpuset);
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> _lcore_id) = LCORE_ID_ANY;
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
> _socket_id) = (unsigned)SOCKET_ID_ANY;
> lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
> _cpuset);
> lib/librte_eal/common/include/rte_per_lcore.h:#define
> RTE_DEFINE_PER_LCORE(type, name)            \
> lib/librte_eal/common/include/rte_eal.h:    static
> RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
> lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
> _rte_errno);
> lib/librte_eal/common/eal_common_errno.c:    static
> RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
> 
> 
> > > Thanks
> > > Ravi
> > >
> > > Regards
> > > > Neil
> > > >
> > > >
> >
Its exported in the version map file:
 per_lcore__lcore_id;

Neil

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  2015-04-24 15:22  3%       ` Neil Horman
@ 2015-04-24 16:45  0%         ` Ravi Kerur
  2015-04-24 18:53  0%           ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Ravi Kerur @ 2015-04-24 16:45 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Fri, Apr 24, 2015 at 8:22 AM, Neil Horman <nhorman@tuxdriver.com> wrote:

> On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> > On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
> >
> > > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur wrote:
> > > > Changes in v7
> > > > Remove _setname_ pthread calls.
> > > > Use rte_gettid() API in RTE_LOG to print thread_id.
> > > >
> > > > Changes in v6
> > > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
> > > > Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
> > > > respectively. Plan to use _getname_ in RTE_LOG when available.
> > > > Use existing rte_get_systid() in RTE_LOG to print thread_id.
> > > >
> > > > Changes in v5
> > > > Rebase to latest code.
> > > >
> > > > Changes in v4
> > > > None
> > > >
> > > > Changes in v3
> > > > Changed subject to be more explicit on file name inclusion.
> > > >
> > > > Changes in v2
> > > > None
> > > >
> > > > Changes in v1
> > > > eal_thread.c has minor differences between Linux and BSD, move
> > > > entire file into common directory.
> > > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor differences.
> > > > Rename eal_thread.c to eal_common_thread.c
> > > > Makefile changes to reflect file move and name change.
> > > > Fix checkpatch warnings.
> > > >
> > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > ---
> > > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> > > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > > ------------------------------
> > > >  lib/librte_eal/common/eal_common_thread.c | 147
> > > ++++++++++++++++++++++++++++-
> > > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > > +-----------------------------
> > > >  4 files changed, 148 insertions(+), 305 deletions(-)
> > > >
> > > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > > b/lib/librte_eal/bsdapp/eal/Makefile
> > > > index 2357cfa..55971b9 100644
> > > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
> > > >  # workaround for a gcc bug with noreturn attribute
> > > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > > >  endif
> > > >
> > > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > index 9a03437..5714b8f 100644
> > > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > > @@ -35,163 +35,11 @@
> > > >  #include <stdio.h>
> > > >  #include <stdlib.h>
> > > >  #include <stdint.h>
> > > > -#include <unistd.h>
> > > > -#include <sched.h>
> > > > -#include <pthread_np.h>
> > > > -#include <sys/queue.h>
> > > >  #include <sys/thr.h>
> > > >
> > > > -#include <rte_debug.h>
> > > > -#include <rte_atomic.h>
> > > > -#include <rte_launch.h>
> > > > -#include <rte_log.h>
> > > > -#include <rte_memory.h>
> > > > -#include <rte_memzone.h>
> > > > -#include <rte_per_lcore.h>
> > > > -#include <rte_eal.h>
> > > > -#include <rte_per_lcore.h>
> > > > -#include <rte_lcore.h>
> > > > -
> > > >  #include "eal_private.h"
> > > >  #include "eal_thread.h"
> > > >
> > > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
> > > NAK, these are exported symbols, you can't remove them without going
> > > through the
> > > deprecation process.
> > >
> > >
> > They are not removed/deleted, they are moved from eal_thread.c to
> > eal_common_thread.c file since it is common to both Linux and BSD.
> >
> Then perhaps you forgot to export the symbol?  Its showing up as removed
> on the
> ABI checker utility.
>
> Neil
>

Can you please show me in the current code where it is being exported? I
have only moved definitions to _common_ files, not sure why it should be
exported now.  I searched in the current code for RTE_DEFINE_PER_LCORE

#home/rkerur/dpdk-tmp/dpdk# grep -ir RTE_DEFINE_PER_LCORE *
app/test/test_per_lcore.c:static RTE_DEFINE_PER_LCORE(unsigned, test) =
0x12345678;
lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
_lcore_id) = LCORE_ID_ANY;
lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
_socket_id) = (unsigned)SOCKET_ID_ANY;
lib/librte_eal/linuxapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
_cpuset);
lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
_lcore_id) = LCORE_ID_ANY;
lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(unsigned,
_socket_id) = (unsigned)SOCKET_ID_ANY;
lib/librte_eal/bsdapp/eal/eal_thread.c:RTE_DEFINE_PER_LCORE(rte_cpuset_t,
_cpuset);
lib/librte_eal/common/include/rte_per_lcore.h:#define
RTE_DEFINE_PER_LCORE(type, name)            \
lib/librte_eal/common/include/rte_eal.h:    static
RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
lib/librte_eal/common/eal_common_errno.c:RTE_DEFINE_PER_LCORE(int,
_rte_errno);
lib/librte_eal/common/eal_common_errno.c:    static
RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);


> > Thanks
> > Ravi
> >
> > Regards
> > > Neil
> > >
> > >
>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c
  @ 2015-04-24 15:22  3%       ` Neil Horman
  2015-04-24 16:45  0%         ` Ravi Kerur
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-24 15:22 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Fri, Apr 24, 2015 at 08:14:04AM -0700, Ravi Kerur wrote:
> On Fri, Apr 24, 2015 at 6:51 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Thu, Apr 23, 2015 at 02:35:31PM -0700, Ravi Kerur wrote:
> > > Changes in v7
> > > Remove _setname_ pthread calls.
> > > Use rte_gettid() API in RTE_LOG to print thread_id.
> > >
> > > Changes in v6
> > > Remove RTE_EXEC_ENV_BSDAPP from eal_common_thread.c file.
> > > Add pthread_setname_np/pthread_set_name_np for Linux/FreeBSD
> > > respectively. Plan to use _getname_ in RTE_LOG when available.
> > > Use existing rte_get_systid() in RTE_LOG to print thread_id.
> > >
> > > Changes in v5
> > > Rebase to latest code.
> > >
> > > Changes in v4
> > > None
> > >
> > > Changes in v3
> > > Changed subject to be more explicit on file name inclusion.
> > >
> > > Changes in v2
> > > None
> > >
> > > Changes in v1
> > > eal_thread.c has minor differences between Linux and BSD, move
> > > entire file into common directory.
> > > Use RTE_EXEC_ENV_BSDAPP to differentiate on minor differences.
> > > Rename eal_thread.c to eal_common_thread.c
> > > Makefile changes to reflect file move and name change.
> > > Fix checkpatch warnings.
> > >
> > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > ---
> > >  lib/librte_eal/bsdapp/eal/Makefile        |   2 +-
> > >  lib/librte_eal/bsdapp/eal/eal_thread.c    | 152
> > ------------------------------
> > >  lib/librte_eal/common/eal_common_thread.c | 147
> > ++++++++++++++++++++++++++++-
> > >  lib/librte_eal/linuxapp/eal/eal_thread.c  | 152
> > +-----------------------------
> > >  4 files changed, 148 insertions(+), 305 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> > b/lib/librte_eal/bsdapp/eal/Makefile
> > > index 2357cfa..55971b9 100644
> > > --- a/lib/librte_eal/bsdapp/eal/Makefile
> > > +++ b/lib/librte_eal/bsdapp/eal/Makefile
> > > @@ -87,7 +87,7 @@ CFLAGS_eal_common_log.o := -D_GNU_SOURCE
> > >  # workaround for a gcc bug with noreturn attribute
> > >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> > >  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> > > -CFLAGS_eal_thread.o += -Wno-return-type
> > > +CFLAGS_eal_common_thread.o += -Wno-return-type
> > >  CFLAGS_eal_hpet.o += -Wno-return-type
> > >  endif
> > >
> > > diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > index 9a03437..5714b8f 100644
> > > --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> > > @@ -35,163 +35,11 @@
> > >  #include <stdio.h>
> > >  #include <stdlib.h>
> > >  #include <stdint.h>
> > > -#include <unistd.h>
> > > -#include <sched.h>
> > > -#include <pthread_np.h>
> > > -#include <sys/queue.h>
> > >  #include <sys/thr.h>
> > >
> > > -#include <rte_debug.h>
> > > -#include <rte_atomic.h>
> > > -#include <rte_launch.h>
> > > -#include <rte_log.h>
> > > -#include <rte_memory.h>
> > > -#include <rte_memzone.h>
> > > -#include <rte_per_lcore.h>
> > > -#include <rte_eal.h>
> > > -#include <rte_per_lcore.h>
> > > -#include <rte_lcore.h>
> > > -
> > >  #include "eal_private.h"
> > >  #include "eal_thread.h"
> > >
> > > -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
> > NAK, these are exported symbols, you can't remove them without going
> > through the
> > deprecation process.
> >
> >
> They are not removed/deleted, they are moved from eal_thread.c to
> eal_common_thread.c file since it is common to both Linux and BSD.
> 
Then perhaps you forgot to export the symbol?  Its showing up as removed on the
ABI checker utility.

Neil

> Thanks
> Ravi
> 
> Regards
> > Neil
> >
> >

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH] doc: fixed spellings and typos
@ 2015-04-24 12:58  8% John McNamara
  0 siblings, 0 replies; 200+ results
From: John McNamara @ 2015-04-24 12:58 UTC (permalink / raw)
  To: dev

Fixed several typos and spelling errors in guide docs.

Signed-off-by: John McNamara <john.mcnamara@intel.com>
---
 doc/guides/freebsd_gsg/build_sample_apps.rst       |  2 +-
 doc/guides/linux_gsg/build_dpdk.rst                |  2 +-
 doc/guides/linux_gsg/build_sample_apps.rst         |  2 +-
 doc/guides/linux_gsg/quick_start.rst               |  4 +--
 doc/guides/linux_gsg/sys_reqs.rst                  |  8 +++---
 doc/guides/nics/intel_vf.rst                       |  4 +--
 doc/guides/nics/mlx4.rst                           |  2 +-
 doc/guides/nics/pcap_ring.rst                      |  4 +--
 doc/guides/prog_guide/dev_kit_build_system.rst     |  2 +-
 doc/guides/prog_guide/dev_kit_root_make_help.rst   | 17 ++++++++++--
 doc/guides/prog_guide/env_abstraction_layer.rst    |  8 +++---
 doc/guides/prog_guide/index.rst                    |  4 +--
 .../prog_guide/ip_fragment_reassembly_lib.rst      | 16 ++++++------
 doc/guides/prog_guide/ivshmem_lib.rst              |  6 ++---
 doc/guides/prog_guide/kernel_nic_interface.rst     |  2 +-
 .../prog_guide/link_bonding_poll_mode_drv_lib.rst  |  6 ++---
 doc/guides/prog_guide/mbuf_lib.rst                 |  6 ++---
 .../prog_guide/packet_classif_access_ctrl.rst      | 12 ++++-----
 doc/guides/prog_guide/packet_framework.rst         | 22 ++++++++--------
 doc/guides/prog_guide/qos_framework.rst            |  4 +--
 doc/guides/prog_guide/source_org.rst               |  2 +-
 doc/guides/prog_guide/timer_lib.rst                |  2 +-
 doc/guides/prog_guide/vhost_lib.rst                |  6 ++---
 doc/guides/prog_guide/writing_efficient_code.rst   |  2 +-
 doc/guides/rel_notes/abi.rst                       |  4 +--
 doc/guides/rel_notes/faq.rst                       |  2 +-
 doc/guides/rel_notes/known_issues.rst              | 16 ++++++------
 doc/guides/rel_notes/resolved_issues.rst           | 30 +++++++++++-----------
 doc/guides/rel_notes/supported_features.rst        | 28 ++++++++++----------
 doc/guides/rel_notes/updating_apps.rst             |  4 +--
 doc/guides/sample_app_ug/dist_app.rst              |  2 +-
 doc/guides/sample_app_ug/ip_frag.rst               |  2 +-
 doc/guides/sample_app_ug/ip_reassembly.rst         |  2 +-
 doc/guides/sample_app_ug/ipv4_multicast.rst        |  4 +--
 doc/guides/sample_app_ug/l2_forward_job_stats.rst  |  8 +++---
 doc/guides/sample_app_ug/l3_forward_power_man.rst  |  2 +-
 doc/guides/sample_app_ug/load_balancer.rst         |  2 +-
 doc/guides/sample_app_ug/multi_process.rst         |  4 +--
 doc/guides/sample_app_ug/netmap_compatibility.rst  |  2 +-
 doc/guides/sample_app_ug/packet_ordering.rst       |  4 +--
 doc/guides/sample_app_ug/quota_watermark.rst       |  4 +--
 doc/guides/sample_app_ug/test_pipeline.rst         |  8 +++---
 doc/guides/sample_app_ug/timer.rst                 |  2 +-
 doc/guides/sample_app_ug/vhost.rst                 | 12 ++++-----
 doc/guides/sample_app_ug/vm_power_management.rst   |  6 ++---
 doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst   |  4 +--
 doc/guides/testpmd_app_ug/intro.rst                |  4 +--
 doc/guides/testpmd_app_ug/testpmd_funcs.rst        |  4 +--
 48 files changed, 159 insertions(+), 146 deletions(-)

diff --git a/doc/guides/freebsd_gsg/build_sample_apps.rst b/doc/guides/freebsd_gsg/build_sample_apps.rst
index e198c6a..acd0311 100644
--- a/doc/guides/freebsd_gsg/build_sample_apps.rst
+++ b/doc/guides/freebsd_gsg/build_sample_apps.rst
@@ -136,7 +136,7 @@ The EAL options for FreeBSD* are as follows:
     (multiple -b options are allowed).
 
 *   --use-device
-    : use the specified ethernet device(s) only.  Use comma-separate
+    : use the specified Ethernet device(s) only.  Use comma-separate
     <[domain:]bus:devid.func> values. Cannot be used with -b option.
 
 *   -r NUM
diff --git a/doc/guides/linux_gsg/build_dpdk.rst b/doc/guides/linux_gsg/build_dpdk.rst
index 5f0f3ae..e3a0b46 100644
--- a/doc/guides/linux_gsg/build_dpdk.rst
+++ b/doc/guides/linux_gsg/build_dpdk.rst
@@ -166,7 +166,7 @@ Loading Modules to Enable Userspace IO for DPDK
 -----------------------------------------------
 
 To run any DPDK application, a suitable uio module can be loaded into the running kernel.
-In many cases, the standard uio_pci_generic module included in the linux kernel
+In many cases, the standard uio_pci_generic module included in the Linux kernel
 can provide the uio capability. This module can be loaded using the command
 
 .. code-block:: console
diff --git a/doc/guides/linux_gsg/build_sample_apps.rst b/doc/guides/linux_gsg/build_sample_apps.rst
index 1abe99c..e0de2f5 100644
--- a/doc/guides/linux_gsg/build_sample_apps.rst
+++ b/doc/guides/linux_gsg/build_sample_apps.rst
@@ -119,7 +119,7 @@ The EAL options are as follows:
 
 *   -b <domain:bus:devid.func>: blacklisting of ports; prevent EAL from using specified PCI device (multiple -b options are allowed)
 
-*   --use-device: use the specified ethernet device(s) only. Use comma-separate <[domain:]bus:devid.func> values. Cannot be used with -b option
+*   --use-device: use the specified Ethernet device(s) only. Use comma-separate <[domain:]bus:devid.func> values. Cannot be used with -b option
 
 *   --socket-mem: Memory to allocate from hugepages on specific sockets
 
diff --git a/doc/guides/linux_gsg/quick_start.rst b/doc/guides/linux_gsg/quick_start.rst
index a1dd3ee..b07fc87 100644
--- a/doc/guides/linux_gsg/quick_start.rst
+++ b/doc/guides/linux_gsg/quick_start.rst
@@ -226,7 +226,7 @@ The following selection demonstrates the starting of the DPDK UIO driver.
     Loading DPDK UIO module
 
 The following selection demonstrates the creation of hugepages in a NUMA system.
-1024 2 Mbyte pages are assigned to each node.
+1024 2 MByte pages are assigned to each node.
 The result is that the application should use -m 4096 for starting the application to access both memory areas
 (this is done automatically if the -m option is not provided).
 
@@ -239,7 +239,7 @@ The result is that the application should use -m 4096 for starting the applicati
     Option: 15
 
     Removing currently reserved hugepages
-    nmounting /mnt/huge and removing directory
+    mounting /mnt/huge and removing directory
     Input the number of 2MB pages for each node
     Example: to have 128MB of hugepages available per node,
     enter '64' to reserve 64 * 2MB pages on each node
diff --git a/doc/guides/linux_gsg/sys_reqs.rst b/doc/guides/linux_gsg/sys_reqs.rst
index 7cc214f..ebed418 100644
--- a/doc/guides/linux_gsg/sys_reqs.rst
+++ b/doc/guides/linux_gsg/sys_reqs.rst
@@ -61,7 +61,7 @@ Compilation of the DPDK
 
 *   coreutils:  cmp, sed, grep, arch
 
-*   gcc: versions 4.5.x or later is recommended for i686/x86_64. versions 4.8.x or later is recommanded
+*   gcc: versions 4.5.x or later is recommended for i686/x86_64. versions 4.8.x or later is recommended
     for ppc_64 and x86_x32 ABI. On some distributions, some specific compiler flags and linker flags are enabled by
     default and affect performance (- fstack-protector, for example). Please refer to the documentation
     of your distribution and to gcc -dumpspecs.
@@ -81,7 +81,7 @@ Compilation of the DPDK
 .. note::
 
     x86_x32 ABI is currently supported with distribution packages only on Ubuntu
-    higher than 13.10 or recent debian distribution. The only supported  compiler is gcc 4.8+.
+    higher than 13.10 or recent Debian distribution. The only supported  compiler is gcc 4.8+.
 
 .. note::
 
@@ -121,7 +121,7 @@ System Software
 
 For details of the patches needed to use the DPDK with earlier kernel versions,
 see the DPDK FAQ included in the *DPDK Release Notes*.
-Note also that Redhat* Linux* 6.2 and 6.3 uses a 2.6.32 kernel that already has all the necessary patches applied.
+Note also that Red hat* Linux* 6.2 and 6.3 uses a 2.6.32 kernel that already has all the necessary patches applied.
 
 *   glibc >= 2.7 (for features related to cpuset)
 
@@ -139,7 +139,7 @@ Note also that Redhat* Linux* 6.2 and 6.3 uses a 2.6.32 kernel that already has
 
 *   Kernel configuration
 
-    In the Fedora* OS and other common distributions, such as Ubuntu*, or RedHat Enterprise Linux*,
+    In the Fedora* OS and other common distributions, such as Ubuntu*, or Red Hat Enterprise Linux*,
     the vendor supplied kernel configurations can be used to run most DPDK applications.
 
     For other kernel builds, options which should be enabled for DPDK include:
diff --git a/doc/guides/nics/intel_vf.rst b/doc/guides/nics/intel_vf.rst
index e773627..eeca973 100644
--- a/doc/guides/nics/intel_vf.rst
+++ b/doc/guides/nics/intel_vf.rst
@@ -55,7 +55,7 @@ Therefore, a NIC is logically distributed among multiple virtual machines (as sh
 while still having global data in common to share with the Physical Function and other Virtual Functions.
 The DPDK fm10kvf, i40evf, igbvf or ixgbevf as a Poll Mode Driver (PMD) serves for the Intel® 82576 Gigabit Ethernet Controller,
 Intel® Ethernet Controller I350 family, Intel® 82599 10 Gigabit Ethernet Controller NIC,
-Intel® Fortville 10/40 Gigabit Ethernet Controller NIC's virtual PCI function,or PCIE host-interface of the Intel Ethernet Switch
+Intel® Fortville 10/40 Gigabit Ethernet Controller NIC's virtual PCI function, or PCIe host-interface of the Intel Ethernet Switch
 FM10000 Series.
 Meanwhile the DPDK Poll Mode Driver (PMD) also supports "Physical Function" of such NIC's on the host.
 
@@ -536,7 +536,7 @@ The setup procedure is as follows:
 
     Run the DPDK l2fwd sample application in the Guest OS with Hugepages enabled.
     For the expected benchmark performance, you must pin the cores from the Guest OS to the Host OS (taskset can be used to do this) and
-    you must also look at the PCI Bus layout on the board to ensure you are not running the traffic over the QPI Inteface.
+    you must also look at the PCI Bus layout on the board to ensure you are not running the traffic over the QPI Interface.
 
 .. note::
 
diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 1216d95..ac2dd56 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -239,7 +239,7 @@ Getting Mellanox OFED
 ~~~~~~~~~~~~~~~~~~~~~
 
 While these libraries and kernel modules are available on OpenFabrics
-Aliance's `website <https://www.openfabrics.org/>`_ and provided by package
+Alliance's `website <https://www.openfabrics.org/>`_ and provided by package
 managers on most distributions, this PMD requires Ethernet extensions that
 may not be supported at the moment (this is a work in progress).
 
diff --git a/doc/guides/nics/pcap_ring.rst b/doc/guides/nics/pcap_ring.rst
index 702da89..5d65dc6 100644
--- a/doc/guides/nics/pcap_ring.rst
+++ b/doc/guides/nics/pcap_ring.rst
@@ -50,7 +50,7 @@ the DPDK also includes two pure-software PMDs. These two drivers are:
 Using the Drivers from the EAL Command Line
 -------------------------------------------
 
-For ease of use, the DPDK EAL also has been extended to allow pseudo-ethernet devices,
+For ease of use, the DPDK EAL also has been extended to allow pseudo-Ethernet devices,
 using one or more of these drivers,
 to be created at application startup time during EAL initialization.
 
@@ -226,7 +226,7 @@ and use these as a source of packet input to the application.
 Usage Examples
 ^^^^^^^^^^^^^^
 
-To create two pseudo-ethernet ports where all traffic sent to a port is looped back
+To create two pseudo-Ethernet ports where all traffic sent to a port is looped back
 for reception on the same port (error handling omitted for clarity):
 
 .. code-block:: c
diff --git a/doc/guides/prog_guide/dev_kit_build_system.rst b/doc/guides/prog_guide/dev_kit_build_system.rst
index cf5c96f..5bfef58 100644
--- a/doc/guides/prog_guide/dev_kit_build_system.rst
+++ b/doc/guides/prog_guide/dev_kit_build_system.rst
@@ -317,7 +317,7 @@ Useful Variables Provided by the Build System
     When compiling an external application, the variable points to the root of external application sources.
 
 *   RTE_OUTPUT: The path to which output files are written.
-    Typically, it is $(RTE_SRCDIR)/build, but it can be overriden by the O= option in the make command line.
+    Typically, it is $(RTE_SRCDIR)/build, but it can be overridden by the O= option in the make command line.
 
 *   RTE_TARGET: A string identifying the target for which we are building.
     The format is arch-machine-execenv-toolchain.
diff --git a/doc/guides/prog_guide/dev_kit_root_make_help.rst b/doc/guides/prog_guide/dev_kit_root_make_help.rst
index 4f30192..333b007 100644
--- a/doc/guides/prog_guide/dev_kit_root_make_help.rst
+++ b/doc/guides/prog_guide/dev_kit_root_make_help.rst
@@ -154,9 +154,22 @@ Test Targets
 Documentation Targets
 ---------------------
 
-*   doxydoc
+*   doc
+
+    Generate the Doxygen documentation (API, html and pdf).
+
+*   doc-api-html
+
+    Generate the Doxygen API documentation in html.
+
+*   doc-guides-html
+
+    Generate the guides documentation in html.
+
+*   doc-guides-pdf
+
+    Generate the guides documentation in pdf.
 
-    Generate the Doxygen documentation (pdf only).
 
 Deps Targets
 ------------
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 06289ed..4ecbe6a 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -224,7 +224,7 @@ However, alternately it is possible to utilize the idle cycles available to take
 the full capability of the CPU.
 
 By taking advantage of cgroup, the CPU utilization quota can be simply assigned.
-This gives another way to improve the CPU efficienct, however, there is a prerequisite;
+This gives another way to improve the CPU efficiency, however, there is a prerequisite;
 DPDK must handle the context switching between multiple pthreads per core.
 
 For further flexibility, it is useful to set pthread affinity not only to a CPU but to a CPU set.
@@ -284,7 +284,7 @@ Those TLS include *_cpuset* and *_socket_id*:
 
 *	*_cpuset* stores the CPUs bitmap to which the pthread is affinitized.
 
-*	*_socket_id* stores the NUMA node of the CPU set. If the CPUs in CPU set belong to different NUMA node, the *_socket_id* will be set to SOCKTE_ID_ANY.
+*	*_socket_id* stores the NUMA node of the CPU set. If the CPUs in CPU set belong to different NUMA node, the *_socket_id* will be set to SOCKET_ID_ANY.
 
 
 .. _known_issue_label:
@@ -302,7 +302,7 @@ Known Issues
 + rte_ring
 
   rte_ring supports multi-producer enqueue and multi-consumer dequeue.
-  However, it is non-preemptive, this has a knock on effect of making rte_mempool non-preemtable.
+  However, it is non-preemptive, this has a knock on effect of making rte_mempool non-preemptable.
 
   .. note::
 
@@ -329,7 +329,7 @@ Known Issues
   ``RTE_RING_PAUSE_REP_COUNT`` is defined for rte_ring to reduce contention. It's mainly for case 2, a yield is issued after number of times pause repeat.
 
   It adds a sched_yield() syscall if the thread spins for too long while waiting on the other thread to finish its operations on the ring.
-  This gives the pre-empted thread a chance to proceed and finish with the ring enqueue/dequeue operation.
+  This gives the preempted thread a chance to proceed and finish with the ring enqueue/dequeue operation.
 
 + rte_timer
 
diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst
index a9966a0..57d516a 100644
--- a/doc/guides/prog_guide/index.rst
+++ b/doc/guides/prog_guide/index.rst
@@ -98,7 +98,7 @@ Programmer's Guide
 
 :ref:`Figure 9. An mbuf with Three Segments <pg_figure_9>`
 
-:ref:`Figure 16. Memory Sharing inthe Intel® DPDK Multi-process Sample Application <pg_figure_16>`
+:ref:`Figure 16. Memory Sharing in the Intel® DPDK Multi-process Sample Application <pg_figure_16>`
 
 :ref:`Figure 17. Components of an Intel® DPDK KNI Application <pg_figure_17>`
 
@@ -194,7 +194,7 @@ Programmer's Guide
 
 :ref:`Table 22. Configuration parameters common for all hash table types <pg_table_22>`
 
-:ref:`Table 23. Configuration parameters specific to extendible bucket hash table <pg_table_23>`
+:ref:`Table 23. Configuration parameters specific to extendable bucket hash table <pg_table_23>`
 
 :ref:`Table 24. Configuration parameters specific to pre-computed key signature hash table <pg_table_24>`
 
diff --git a/doc/guides/prog_guide/ip_fragment_reassembly_lib.rst b/doc/guides/prog_guide/ip_fragment_reassembly_lib.rst
index 7d6bdaa..1d3d4ac 100644
--- a/doc/guides/prog_guide/ip_fragment_reassembly_lib.rst
+++ b/doc/guides/prog_guide/ip_fragment_reassembly_lib.rst
@@ -36,10 +36,10 @@ The IP Fragmentation and Reassembly Library implements IPv4 and IPv6 packet frag
 Packet fragmentation
 --------------------
 
-Packet fragmentation routines devide input packet into number of fragments.
+Packet fragmentation routines divide input packet into number of fragments.
 Both rte_ipv4_fragment_packet() and rte_ipv6_fragment_packet() functions assume that input mbuf data
 points to the start of the IP header of the packet (i.e. L2 header is already stripped out).
-To avoid copying fo the actual packet's data zero-copy technique is used (rte_pktmbuf_attach).
+To avoid copying of the actual packet's data zero-copy technique is used (rte_pktmbuf_attach).
 For each fragment two new mbufs are created:
 
 *   Direct mbuf -- mbuf that will contain L3 header of the new fragment.
@@ -50,7 +50,7 @@ For each fragment two new mbufs are created:
 Then L3 header is copied from the original mbuf into the 'direct' mbuf and updated to reflect new fragmented status.
 Note that for IPv4, header checksum is not recalculated and is set to zero.
 
-Finally 'direct' and 'indirect' mbufs for each fragnemt are linked together via mbuf's next filed to compose a packet for the new fragment.
+Finally 'direct' and 'indirect' mbufs for each fragment are linked together via mbuf's next filed to compose a packet for the new fragment.
 
 The caller has an ability to explicitly specify which mempools should be used to allocate 'direct' and 'indirect' mbufs from.
 
@@ -66,9 +66,9 @@ Fragment table maintains information about already received fragments of the pac
 
 Each IP packet is uniquely identified by triple <Source IP address>, <Destination IP address>, <ID>.
 
-Note that all update/lookup operations on Fragmen Table are not thread safe.
+Note that all update/lookup operations on Fragment Table are not thread safe.
 So if different execution contexts (threads/processes) will access the same table simultaneously,
-then some exernal syncing mechanism have to be provided.
+then some external syncing mechanism have to be provided.
 
 Each table entry can hold information about packets consisting of up to RTE_LIBRTE_IP_FRAG_MAX (by default: 4) fragments.
 
@@ -80,11 +80,11 @@ Code example, that demonstrates creation of a new Fragment table:
     bucket_num = max_flow_num + max_flow_num / 4;
     frag_tbl = rte_ip_frag_table_create(max_flow_num, bucket_entries, max_flow_num, frag_cycles, socket_id);
 
-Internally Fragmen table is a simple hash table.
+Internally Fragment table is a simple hash table.
 The basic idea is to use two hash functions and <bucket_entries> \* associativity.
 This provides 2 \* <bucket_entries> possible locations in the hash table for each key.
 When the collision occurs and all 2 \* <bucket_entries> are occupied,
-instead of resinserting existing keys into alternative locations, ip_frag_tbl_add() just returns a faiure.
+instead of reinserting existing keys into alternative locations, ip_frag_tbl_add() just returns a failure.
 
 Also, entries that resides in the table longer then <max_cycles> are considered as invalid,
 and could be removed/replaced by the new ones.
@@ -120,7 +120,7 @@ These functions are responsible for:
 
     b) If no, then return a NULL to the caller.
 
-If at any stage of packet processing an error is envountered
+If at any stage of packet processing an error is encountered
 (e.g: can't insert new entry into the Fragment Table, or invalid/timed-out fragment),
 then the function will free all associated with the packet fragments,
 mark the table entry as invalid and return NULL to the caller.
diff --git a/doc/guides/prog_guide/ivshmem_lib.rst b/doc/guides/prog_guide/ivshmem_lib.rst
index c76d2b3..75175fa 100644
--- a/doc/guides/prog_guide/ivshmem_lib.rst
+++ b/doc/guides/prog_guide/ivshmem_lib.rst
@@ -32,7 +32,7 @@ IVSHMEM Library
 ===============
 
 The DPDK IVSHMEM library facilitates fast zero-copy data sharing among virtual machines
-(host-to-guest or guest-to-guest) by means of QEUMU's IVSHMEM mechanism.
+(host-to-guest or guest-to-guest) by means of QEMU's IVSHMEM mechanism.
 
 The library works by providing a command line for QEMU to map several hugepages into a single IVSHMEM device.
 For the guest to know what is inside any given IVSHMEM device
@@ -107,7 +107,7 @@ Best Practices for Writing IVSHMEM Applications
 -----------------------------------------------
 
 When considering the use of IVSHMEM for sharing memory, security implications need to be carefully evaluated.
-IVSHMEM is not suitable for untrusted guests, as IVSHMEM is essentially a window into the host processs memory.
+IVSHMEM is not suitable for untrusted guests, as IVSHMEM is essentially a window into the host process memory.
 This also has implications for the multiple VM scenarios.
 While the IVSHMEM library tries to share as little memory as possible,
 it is quite probable that data designated for one VM might also be present in an IVSMHMEM device designated for another VM.
@@ -137,7 +137,7 @@ For performance reasons,
 it is best to pin host processes and QEMU processes to different cores so that they do not interfere with each other.
 If NUMA support is enabled, it is also desirable to keep host process' hugepage memory and QEMU process on the same NUMA node.
 
-For the best performance across all NUMA nodes, each QUEMU core should be pinned to host CPU core on the appropriate NUMA node.
+For the best performance across all NUMA nodes, each QEMU core should be pinned to host CPU core on the appropriate NUMA node.
 QEMU's virtual NUMA nodes should also be set up to correspond to physical NUMA nodes.
 More on how to set up DPDK and QEMU NUMA support can be found in *DPDK Getting Started Guide* and
 `QEMU documentation <http://qemu.weilnetz.de/qemu-doc.html>`_ respectively.
diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst b/doc/guides/prog_guide/kernel_nic_interface.rst
index bac2215..bab376a 100644
--- a/doc/guides/prog_guide/kernel_nic_interface.rst
+++ b/doc/guides/prog_guide/kernel_nic_interface.rst
@@ -267,7 +267,7 @@ Then, using the qemu-kvm command with the -netdev option to assign such raw sock
 
 .. note::
 
-    The key word tap must exist as qemu-kvm now only supports vhost with a tap beckend, so here we cheat qemu-kvm by an existing fd.
+    The key word tap must exist as qemu-kvm now only supports vhost with a tap backend, so here we cheat qemu-kvm by an existing fd.
 
 Compatibility Configure Option
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst b/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
index 24a1a36..ae9b516 100644
--- a/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
+++ b/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
@@ -1,5 +1,5 @@
 ..  BSD LICENSE
-    Copyright(c) 2010-2014 ntel Corporation. All rights reserved.
+    Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
     All rights reserved.
 
     Redistribution and use in source and binary forms, with or without
@@ -87,7 +87,7 @@ Currently the Link Bonding PMD library supports 4 modes of operation:
     This mode provides transmit load balancing (based on the selected
     transmission policy) and fault tolerance. The default policy (layer2) uses
     a simple calculation based on the packet flow source and destination MAC
-    addresses aswell as the number of active slaves available to the bonded
+    addresses as well as the number of active slaves available to the bonded
     device to classify the packet to a specific slave to transmit on. Alternate
     transmission policies supported are layer 2+3, this takes the IP source and
     destination addresses into the calculation of the transmit slave port and
@@ -95,7 +95,7 @@ Currently the Link Bonding PMD library supports 4 modes of operation:
     destination addresses as well as the TCP/UDP source and destination port.
 
 .. note::
-    The colouring differences of the packets are used to identify different flow
+    The coloring differences of the packets are used to identify different flow
     classification calculated by the selected transmit policy
 
 
diff --git a/doc/guides/prog_guide/mbuf_lib.rst b/doc/guides/prog_guide/mbuf_lib.rst
index 8f546e0..338f7da 100644
--- a/doc/guides/prog_guide/mbuf_lib.rst
+++ b/doc/guides/prog_guide/mbuf_lib.rst
@@ -167,7 +167,7 @@ a vxlan-encapsulated tcp packet:
     mb->ol_flags |= PKT_TX_IPV4 | PKT_TX_IP_CSUM
     set out_ip checksum to 0 in the packet
 
-  This is supported on hardwares advertising DEV_TX_OFFLOAD_IPV4_CKSUM.
+  This is supported on hardware advertising DEV_TX_OFFLOAD_IPV4_CKSUM.
 
 - calculate checksum of out_ip and out_udp::
 
@@ -177,7 +177,7 @@ a vxlan-encapsulated tcp packet:
     set out_ip checksum to 0 in the packet
     set out_udp checksum to pseudo header using rte_ipv4_phdr_cksum()
 
-   This is supported on hardwares advertising DEV_TX_OFFLOAD_IPV4_CKSUM
+   This is supported on hardware advertising DEV_TX_OFFLOAD_IPV4_CKSUM
    and DEV_TX_OFFLOAD_UDP_CKSUM.
 
 - calculate checksum of in_ip::
@@ -188,7 +188,7 @@ a vxlan-encapsulated tcp packet:
     set in_ip checksum to 0 in the packet
 
   This is similar to case 1), but l2_len is different. It is supported
-  on hardwares advertising DEV_TX_OFFLOAD_IPV4_CKSUM.
+  on hardware advertising DEV_TX_OFFLOAD_IPV4_CKSUM.
   Note that it can only work if outer L4 checksum is 0.
 
 - calculate checksum of in_ip and in_tcp::
diff --git a/doc/guides/prog_guide/packet_classif_access_ctrl.rst b/doc/guides/prog_guide/packet_classif_access_ctrl.rst
index 210b020..a9a5815 100644
--- a/doc/guides/prog_guide/packet_classif_access_ctrl.rst
+++ b/doc/guides/prog_guide/packet_classif_access_ctrl.rst
@@ -281,14 +281,14 @@ for each of them.
 Depending on the rule-set, it might reduce RT memory requirements but might
 increase classification time.
 There is a possibility at build-time to specify maximum memory limit for internal RT structures for given AC context.
-It could be done via **max_size** field of the **rte_acl_config** strucure.
+It could be done via **max_size** field of the **rte_acl_config** structure.
 Setting it to the value greater than zero, instructs rte_acl_build() to:
 
-*   attempt to minimise number of tries in the RT table, but
+*   attempt to minimize number of tries in the RT table, but
 *   make sure that size of RT table wouldn't exceed given value.
 
-Setting it to zero makes rte_acl_build() to use the default behaviour:
-try to minimise size of the RT structures, but doesn't expose any hard limit on it.
+Setting it to zero makes rte_acl_build() to use the default behavior:
+try to minimize size of the RT structures, but doesn't expose any hard limit on it.
 
 That gives the user the ability to decisions about performance/space trade-off.
 For example:
@@ -304,12 +304,12 @@ For example:
      * populated with rules AC context and cfg filled properly.
      */
 
-     /* try to build AC context, with RT strcutures less then 8MB. */
+     /* try to build AC context, with RT structures less then 8MB. */
      cfg.max_size = 0x800000;
      ret = rte_acl_build(acx, &cfg);
 
      /*
-      * RT strcutures can't fit into 8MB for given context.
+      * RT structures can't fit into 8MB for given context.
       * Try to build without exposing any hard limit.
       */
      if (ret == -ERANGE) {
diff --git a/doc/guides/prog_guide/packet_framework.rst b/doc/guides/prog_guide/packet_framework.rst
index 8e8e32f..2056c4f 100644
--- a/doc/guides/prog_guide/packet_framework.rst
+++ b/doc/guides/prog_guide/packet_framework.rst
@@ -330,7 +330,7 @@ so the key search can be narrowed down from the full set of keys currently in th
 to just the set of keys currently in the identified table bucket.
 
 The performance of the hash table lookup operation is greatly improved,
-provided that the table keys are evenly distributed amongst the hash table buckets,
+provided that the table keys are evenly distributed among the hash table buckets,
 which can be achieved by using a hash function with uniform distribution.
 The rule to map a key to its bucket can simply be to use the key signature (modulo the number of table buckets) as the table bucket ID:
 
@@ -439,7 +439,7 @@ The possible options are:
     When a key needs to be picked and dropped, the first candidate for drop, i.e. the current LRU key, is always picked.
     The LRU logic requires maintaining specific data structures per each bucket.
 
-#.  **Extendible Bucket Hash Table.**
+#.  **Extendable Bucket Hash Table.**
     The bucket is extended with space for 4 more keys.
     This is done by allocating additional memory at table initialization time,
     which is used to create a pool of free keys (the size of this pool is configurable and always a multiple of 4).
@@ -449,11 +449,11 @@ The possible options are:
     when the key to be deleted is the only key that was used within its group of 4 keys at that time.
     On key lookup operation, if the current bucket is in extended state and a match is not found in the first group of 4 keys,
     the search continues beyond the first group of 4 keys, potentially until all keys in this bucket are examined.
-    The extendible bucket logic requires maintaining specific data structures per table and per each bucket.
+    The extendable bucket logic requires maintaining specific data structures per table and per each bucket.
 
 .. _pg_table_23:
 
-**Table 23 Configuration Parameters Specific to Extendible Bucket Hash Table**
+**Table 23 Configuration Parameters Specific to Extendable Bucket Hash Table**
 
 +---+---------------------------+--------------------------------------------------+
 | # | Parameter                 | Details                                          |
@@ -576,7 +576,7 @@ either with pre-computed signature or "do-sig").
 |   |                         |                              |                           |                               |
 +---+-------------------------+------------------------------+---------------------------+-------------------------------+
 | 2 | Bucket extensions array | n_buckets_ext (configurable) | 32                        | This array is only created    |
-|   |                         |                              |                           | for extendible bucket tables. |
+|   |                         |                              |                           | for extendable bucket tables. |
 |   |                         |                              |                           |                               |
 +---+-------------------------+------------------------------+---------------------------+-------------------------------+
 | 3 | Key array               | n_keys                       | key_size (configurable)   | Keys added to the hash table. |
@@ -601,7 +601,7 @@ either with pre-computed signature or "do-sig").
 |   |                  |                    | Entry 0 stores the index (0 .. 3) of the MRU key, while entry 3  |
 |   |                  |                    | stores the index of the LRU key.                                 |
 |   |                  |                    |                                                                  |
-|   |                  |                    | For extendible bucket tables, this field represents the next     |
+|   |                  |                    | For extendable bucket tables, this field represents the next     |
 |   |                  |                    | pointer (i.e. the pointer to the next group of 4 keys linked to  |
 |   |                  |                    | the current bucket). The next pointer is not NULL if the bucket  |
 |   |                  |                    | is currently extended or NULL otherwise.                         |
@@ -867,7 +867,7 @@ Figure 37, Figure 38, Table 30 and 31 detail the main data structures used to im
 |   |                         |                              |                      |                                    |
 +---+-------------------------+------------------------------+----------------------+------------------------------------+
 | 2 | Bucket extensions array | n_buckets_ext (configurable) | *8-byte key size:*   | This array is only created for     |
-|   |                         |                              |                      | extendible bucket tables.          |
+|   |                         |                              |                      | extendable bucket tables.          |
 |   |                         |                              |                      |                                    |
 |   |                         |                              | 64 + 4 x entry_size  |                                    |
 |   |                         |                              |                      |                                    |
@@ -888,7 +888,7 @@ Figure 37, Figure 38, Table 30 and 31 detail the main data structures used to im
 +===+===============+====================+===============================================================================+
 | 1 | Valid         | 8                  | Bit X (X = 0 .. 3) is set to 1 if key X is valid or to 0 otherwise.           |
 |   |               |                    |                                                                               |
-|   |               |                    | Bit 4 is only used for extendible bucket tables to help with the              |
+|   |               |                    | Bit 4 is only used for extendable bucket tables to help with the              |
 |   |               |                    | implementation of the branchless logic. In this case, bit 4 is set to 1 if    |
 |   |               |                    | next pointer is valid (not NULL) or to 0 otherwise.                           |
 |   |               |                    |                                                                               |
@@ -897,7 +897,7 @@ Figure 37, Figure 38, Table 30 and 31 detail the main data structures used to im
 |   |               |                    | stored as array of 4 entries of 2 bytes each. Entry 0 stores the index        |
 |   |               |                    | (0 .. 3) of the MRU key, while entry 3 stores the index of the LRU key.       |
 |   |               |                    |                                                                               |
-|   |               |                    | For extendible bucket tables, this field represents the next pointer (i.e.    |
+|   |               |                    | For extendable bucket tables, this field represents the next pointer (i.e.    |
 |   |               |                    | the pointer to the next group of 4 keys linked to the current bucket). The    |
 |   |               |                    | next pointer is not NULL if the bucket is currently extended or NULL          |
 |   |               |                    | otherwise.                                                                    |
@@ -962,7 +962,7 @@ Additional notes:
 #.  The pipelined version of the bucket search algorithm is executed only if there are at least 5 packets in the burst of input packets.
     If there are less than 5 packets in the burst of input packets, a non-optimized implementation of the bucket search algorithm is executed.
 
-#.  For extendible bucket hash tables only,
+#.  For extendable bucket hash tables only,
     once the pipelined version of the bucket search algorithm has been executed for all the packets in the burst of input packets,
     the non-optimized implementation of the bucket search algorithm is also executed for any packets that did not produce a lookup hit,
     but have the bucket in extended state.
@@ -1148,7 +1148,7 @@ Mechanisms to share the same table between multiple threads:
     The threads performing table entry add/delete operations send table update requests to the reader (typically through message passing queues),
     which does the actual table updates and then sends the response back to the request initiator.
 
-#.  **Single writer thread performing table entry add/delete operations and multiple reader threads that performtable lookup operations with read-only access to the table entries.**
+#.  **Single writer thread performing table entry add/delete operations and multiple reader threads that perform table lookup operations with read-only access to the table entries.**
     The reader threads use the main table copy while the writer is updating the mirror copy.
     Once the writer update is done, the writer can signal to the readers and busy wait until all readers swaps between the mirror copy (which now becomes the main copy) and
     the mirror copy (which now becomes the main copy).
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index b609841..98d8714 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -881,7 +881,7 @@ The evolution of the WRR design solution from simple to complex is shown in Tabl
 |   |            |                 |             | introducing a cost per byte that is different for each   |
 |   |            |                 |             | queue. Queues with lower weights have a higher cost per  |
 |   |            |                 |             | byte. This way, it is still meaningful to compare the    |
-|   |            |                 |             | consumption amongst different queues in order to select  |
+|   |            |                 |             | consumption among different queues in order to select    |
 |   |            |                 |             | the next queue.                                          |
 |   |            |                 |             |                                                          |
 |   |            |                 |             | w(i) = Weight of queue #i                                |
@@ -984,7 +984,7 @@ with the third approach selected for implementation.
 +=====+===========================+=========================================================================+
 | 1   | Don't care                | First come, first served.                                               |
 |     |                           |                                                                         |
-|     |                           | This approach is not fair amongst subport member pipes, as pipes that   |
+|     |                           | This approach is not fair among subport member pipes, as pipes that     |
 |     |                           | are served first will use up as much bandwidth for TC X as they need,   |
 |     |                           | while pipes that are served later will receive poor service due to      |
 |     |                           | bandwidth for TC X at the subport level being scarce.                   |
diff --git a/doc/guides/prog_guide/source_org.rst b/doc/guides/prog_guide/source_org.rst
index 061f107..1bce0b8 100644
--- a/doc/guides/prog_guide/source_org.rst
+++ b/doc/guides/prog_guide/source_org.rst
@@ -114,7 +114,7 @@ The examples directory contains sample applications that show how libraries can
     examples
     +-- cmdline            # Example of using cmdline library
     +-- dpdk_qat           # Example showing integration with Intel QuickAssist
-    +-- exception_path     # Sending packets to and from Linux ethernet device (TAP)
+    +-- exception_path     # Sending packets to and from Linux Ethernet device (TAP)
     +-- helloworld         # Helloworld basic example
     +-- ip_reassembly      # Example showing IP Reassembly
     +-- ip_fragmentation   # Example showing IPv4 Fragmentation
diff --git a/doc/guides/prog_guide/timer_lib.rst b/doc/guides/prog_guide/timer_lib.rst
index 7baf034..f437417 100644
--- a/doc/guides/prog_guide/timer_lib.rst
+++ b/doc/guides/prog_guide/timer_lib.rst
@@ -85,7 +85,7 @@ the expiry time of the first list entry is maintained within the per-core timer
 On 64-bit platforms, this value can be checked without the need to take a lock on the overall structure.
 (Since expiry times are maintained as 64-bit values,
 a check on the value cannot be done on 32-bit platforms without using either a compare-and-swap (CAS) instruction or using a lock,
-so this additional check is skipped in favour of checking as normal once the lock has been taken.)
+so this additional check is skipped in favor of checking as normal once the lock has been taken.)
 On both 64-bit and 32-bit platforms,
 a call to rte_timer_manage() returns without taking a lock in the case where the timer list for the calling core is empty.
 
diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst
index a52fa50..48e1fff 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -46,7 +46,7 @@ Vhost API Overview
       rte_vhost_driver_register registers the vhost driver into the system.
       For vhost-cuse, character device file will be created under the /dev directory.
       Character device name is specified as the parameter.
-      For vhost-user, a unix domain socket server will be created with the parameter as
+      For vhost-user, a Unix domain socket server will be created with the parameter as
       the local socket path.
 
 *   Vhost session start
@@ -102,7 +102,7 @@ When the release call is released, vhost will destroy the device.
 
 Vhost user implementation
 ~~~~~~~~~~~~~~~~~~~~~~~~~
-When vSwitch registers a vhost driver, it will create a unix domain socket server
+When vSwitch registers a vhost driver, it will create a Unix domain socket server
 into the system. This server will listen for a connection and process the vhost message from
 QEMU simulator.
 
@@ -110,7 +110,7 @@ When there is a new socket connection, it means a new virtio device has been cre
 the guest virtual machine, and the vhost driver will create a vhost device for this virtio device.
 
 For messages with a file descriptor, the file descriptor could be directly used in the vhost
-process as it is already installed by unix domain socket.
+process as it is already installed by Unix domain socket.
 
  * VHOST_SET_MEM_TABLE
  * VHOST_SET_VRING_KICK
diff --git a/doc/guides/prog_guide/writing_efficient_code.rst b/doc/guides/prog_guide/writing_efficient_code.rst
index 9a7b31b..613db88 100644
--- a/doc/guides/prog_guide/writing_efficient_code.rst
+++ b/doc/guides/prog_guide/writing_efficient_code.rst
@@ -215,7 +215,7 @@ Setting the Target CPU Type
 
 The DPDK supports CPU microarchitecture-specific optimizations by means of CONFIG_RTE_MACHINE option
 in the DPDK configuration file.
-The degree of optimization depends on the compiler's ability to optimize for a specitic microarchitecture,
+The degree of optimization depends on the compiler's ability to optimize for a specific microarchitecture,
 therefore it is preferable to use the latest compiler versions whenever possible.
 
 If the compiler version does not support the specific feature set (for example, the Intel® AVX instruction set),
diff --git a/doc/guides/rel_notes/abi.rst b/doc/guides/rel_notes/abi.rst
index bd25485..f00a6ee 100644
--- a/doc/guides/rel_notes/abi.rst
+++ b/doc/guides/rel_notes/abi.rst
@@ -17,7 +17,7 @@ Some ABI changes may be too significant to reasonably maintain multiple
 versions of.  In those events ABI's may be updated without backward
 compatibility provided.  The requirements for doing so are:
 
-#. At least 3 acknoweldgements of the need on the dpdk.org
+#. At least 3 acknowledgments of the need on the dpdk.org
 #. A full deprecation cycle must be made to offer downstream consumers sufficient warning of the change.  E.g. if dpdk 2.0 is under development when the change is proposed, a deprecation notice must be added to this file, and released with dpdk 2.0.  Then the change may be incorporated for dpdk 2.1
 #. The LIBABIVER variable in the makefile(s) where the ABI changes are incorporated must be incremented in parallel with the ABI changes themselves
 
@@ -25,7 +25,7 @@ Note that the above process for ABI deprecation should not be undertaken
 lightly.  ABI stability is extremely important for downstream consumers of the
 DPDK, especially when distributed in shared object form.  Every effort should be
 made to preserve ABI whenever possible.  For instance, reorganizing public
-structure field for astetic or readability purposes should be avoided as it will
+structure field for aesthetic or readability purposes should be avoided as it will
 cause ABI breakage.  Only significant (e.g. performance) reasons should be seen
 as cause to alter ABI.
 
diff --git a/doc/guides/rel_notes/faq.rst b/doc/guides/rel_notes/faq.rst
index 14b1167..d87230a 100644
--- a/doc/guides/rel_notes/faq.rst
+++ b/doc/guides/rel_notes/faq.rst
@@ -201,7 +201,7 @@ What is the purpose of setting iommu=pt?
 ----------------------------------------
 DPDK uses a 1:1 mapping and does not support IOMMU. IOMMU allows for simpler VM physical address translation.
 The second role of IOMMU is to allow protection from unwanted memory access by an unsafe device that has DMA privileges.
-Unfortunately, the protection comes with an extremely high perfomance cost for high speed NICs.
+Unfortunately, the protection comes with an extremely high performance cost for high speed NICs.
 
 iommu=pt disables IOMMU support for the hypervisor.
 
diff --git a/doc/guides/rel_notes/known_issues.rst b/doc/guides/rel_notes/known_issues.rst
index a94b6aa..7b62085 100644
--- a/doc/guides/rel_notes/known_issues.rst
+++ b/doc/guides/rel_notes/known_issues.rst
@@ -432,7 +432,7 @@ Some hardware off-load functions are not supported by the VF Driver
 |                                |                                                                                      |
 +--------------------------------+--------------------------------------------------------------------------------------+
 | Implication                    | Any configuration for these items in the VF register will be ignored. The behavior   |
-|                                | is dependant on the current PF setting.                                              |
+|                                | is dependent on the current PF setting.                                              |
 |                                |                                                                                      |
 +--------------------------------+--------------------------------------------------------------------------------------+
 | Resolution/ Workaround         | For the PF (Physical Function) status on which the VF driver depends, there is an    |
@@ -683,8 +683,8 @@ Binding PCI devices to igb_uio fails on Linux* kernel 3.9 when more than one dev
 |                                |                                                                                      |
 +--------------------------------+--------------------------------------------------------------------------------------+
 
-GCC might generate Intel® AVX instructions forprocessors without Intel® AVX support
------------------------------------------------------------------------------------
+GCC might generate Intel® AVX instructions for processors without Intel® AVX support
+------------------------------------------------------------------------------------
 
 +--------------------------------+--------------------------------------------------------------------------------------+
 | Title                          | Gcc might generate Intel® AVX instructions for processors without Intel® AVX support |
@@ -749,11 +749,11 @@ Ethertype filter could receive other packets (non-assigned) in Niantic
 |                                |                                                                                      |
 +--------------------------------+--------------------------------------------------------------------------------------+
 
-Cannot set link speed on Intel® 40G ethernet controller
+Cannot set link speed on Intel® 40G Ethernet controller
 -------------------------------------------------------
 
 +--------------------------------+--------------------------------------------------------------------------------------+
-| Title                          | Cannot set link speed on Intel® 40G ethernet controller                              |
+| Title                          | Cannot set link speed on Intel® 40G Ethernet controller                              |
 |                                |                                                                                      |
 +================================+======================================================================================+
 | Reference #                    | IXA00386379                                                                          |
@@ -764,7 +764,7 @@ Cannot set link speed on Intel® 40G ethernet controller
 |                                | It cannot set the link to specific speed.                                            |
 |                                |                                                                                      |
 +--------------------------------+--------------------------------------------------------------------------------------+
-| Implication                    | The link speed cannot be changed forcedly, though it can be configured by            |
+| Implication                    | The link speed cannot be changed forcibly, though it can be configured by            |
 |                                | application.                                                                         |
 |                                |                                                                                      |
 +--------------------------------+--------------------------------------------------------------------------------------+
@@ -778,11 +778,11 @@ Cannot set link speed on Intel® 40G ethernet controller
 |                                |                                                                                      |
 +--------------------------------+--------------------------------------------------------------------------------------+
 
-Stopping the port does not down the link on Intel® 40G ethernet controller
+Stopping the port does not down the link on Intel® 40G Ethernet controller
 --------------------------------------------------------------------------
 
 +--------------------------------+--------------------------------------------------------------------------------------+
-| Title                          | Stopping the port does not down the link on Intel® 40G ethernet controller           |
+| Title                          | Stopping the port does not down the link on Intel® 40G Ethernet controller           |
 |                                |                                                                                      |
 +================================+======================================================================================+
 | Reference #                    | IXA00386380                                                                          |
diff --git a/doc/guides/rel_notes/resolved_issues.rst b/doc/guides/rel_notes/resolved_issues.rst
index 5fb973f..8d6bbfa 100644
--- a/doc/guides/rel_notes/resolved_issues.rst
+++ b/doc/guides/rel_notes/resolved_issues.rst
@@ -84,7 +84,7 @@ Vhost-xen cannot detect Domain U application exit on Xen version 4.0.1
 |                                |                                                                                      |
 +--------------------------------+--------------------------------------------------------------------------------------+
 | Description                    | When using DPDK applications on Xen 4.0.1, e.g. TestPMD Sample Application,          |
-|                                | on killing the application (e.g. killall testmd) vhost-switch cannot detect          |
+|                                | on killing the application (e.g. killall testpmd) vhost-switch cannot detect         |
 |                                | the domain U exited and does not free the Virtio device.                             |
 |                                |                                                                                      |
 +--------------------------------+--------------------------------------------------------------------------------------+
@@ -211,11 +211,11 @@ KNI does not provide Ethtool support for all NICs supported by the Poll-Mode Dri
 | Title                           | KNI does not provide ethtool support for all NICs supported by the Poll Mode Drivers  |
 |                                 |                                                                                       |
 +=================================+=======================================================================================+
-| Refererence #                   | IXA00383835                                                                           |
+| Reference #                     | IXA00383835                                                                           |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
-| Description                     | To support ethtool functionality using the KNI, the KNI libray includes seperate      |
-|                                 | driver code based off the Linux kernel drivers, because this driver code is seperate  |
+| Description                     | To support ethtool functionality using the KNI, the KNI library includes separate     |
+|                                 | driver code based off the Linux kernel drivers, because this driver code is separate  |
 |                                 | from the poll-mode drivers, the set of supported NICs for these two components may    |
 |                                 | differ.                                                                               |
 |                                 |                                                                                       |
@@ -247,7 +247,7 @@ Linux IPv4 forwarding is not stable with vhost-switch on high packet rate
 | Title                           | Linux IPv4 forwarding is not stable with vhost-switch on high packet rate.            |
 |                                 |                                                                                       |
 +=================================+=======================================================================================+
-| Refererence #                   | IXA00384430                                                                           |
+| Reference #                     | IXA00384430                                                                           |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
 | Description                     | Linux IPv4 forwarding is not stable in Guest when Tx traffic is high from traffic     |
@@ -261,7 +261,7 @@ Linux IPv4 forwarding is not stable with vhost-switch on high packet rate
 | Resolution/Workaround           | N/A                                                                                   |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
-| AffectedEnvironment/Platform    | All                                                                                   |
+| Affected Environment/Platform   | All                                                                                   |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
 | Driver/Module                   | Sample application                                                                    |
@@ -431,7 +431,7 @@ Initialization failure with Intel® Ethernet Controller X540-T2
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
 | Description                     | This device causes a failure during initialization when the software tries to read    |
-|                                 | the part number from the device EEPROM.                                               |
+|                                 | the part number from the device EPROM.                                                |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
 | Implication                     | Device cannot be used.                                                                |
@@ -998,7 +998,7 @@ No traffic through bridge when using exception_path sample application
 |                                 | not forwarded by the bridge.                                                          |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
-| Implication                     | The sample application does not work as described in its sample application quide.    |
+| Implication                     | The sample application does not work as described in its sample application guide.    |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
 | Resolution/Workaround           | If you cannot get packets though the bridge, it might be because IP packet filtering  |
@@ -1105,8 +1105,8 @@ When running multi-process applications, “rte_malloc” functions cannot be us
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
 
-Configuring maximum packet length for IGB with VLAN enabled may not take intoaccount the length of VLAN tag
------------------------------------------------------------------------------------------------------------
+Configuring maximum packet length for IGB with VLAN enabled may not take into account the length of VLAN tag
+------------------------------------------------------------------------------------------------------------
 
 +---------------------------------+---------------------------------------------------------------------------------------+
 | Title                           | Configuring maximum packet length for IGB with VLAN enabled may not take into account |
@@ -1177,7 +1177,7 @@ EAL can silently reserve less memory than requested
 | Implication                     | The application fails to start.                                                       |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
-| Resolution                      | EAL will detect if this condition occurs and will give anappropriate error message    |
+| Resolution                      | EAL will detect if this condition occurs and will give an appropriate error message   |
 |                                 | describing steps to fix the problem.                                                  |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
@@ -1366,17 +1366,17 @@ Packet reception issues when virtualization is enabled
 
 
 
-Double VLAN does not work on Intel® 40GbE ethernet contoller
-------------------------------------------------------------
+Double VLAN does not work on Intel® 40GbE Ethernet controller
+-------------------------------------------------------------
 
 +---------------------------------+---------------------------------------------------------------------------------------+
-| Title                           | Double VLAN does not work on Intel® 40GbE ethernet controller                         |
+| Title                           | Double VLAN does not work on Intel® 40GbE Ethernet controller                         |
 |                                 |                                                                                       |
 +=================================+=======================================================================================+
 | Reference #                     | IXA00369908                                                                           |
 |                                 |                                                                                       |
 +---------------------------------+---------------------------------------------------------------------------------------+
-| Description                     | On Intel® 40 GbE ethernet controller double VLAN does not work.                       |
+| Description                     | On Intel® 40 GbE Ethernet controller double VLAN does not work.                       |
 |                                 | This was confirmed as a Firmware issue which will be fixed in later versions of       |
 |                                 | firmware.                                                                             |
 +---------------------------------+---------------------------------------------------------------------------------------+
diff --git a/doc/guides/rel_notes/supported_features.rst b/doc/guides/rel_notes/supported_features.rst
index c908877..1102b66 100644
--- a/doc/guides/rel_notes/supported_features.rst
+++ b/doc/guides/rel_notes/supported_features.rst
@@ -170,14 +170,14 @@ Supported Features
 *   Support for multiple instances of the Intel®  DPDK
 
 *   Support for Intel®  82574L Gigabit Ethernet Controller - Intel®  Gigabit CT Desktop Adapter
-    (previously code named “Hartwell”)
+    (previously code named "Hartwell")
 
-*   Support for Intel® Ethernet Controller I210 (previously code named “Springville”)
+*   Support for Intel® Ethernet Controller I210 (previously code named "Springville")
 
 *   Early access support for the Quad-port Intel®  Ethernet Server Adapter X520-4 and X520-DA2
-    (code named “Spring Fountain”)
+    (code named "Spring Fountain")
 
-*   Support for Intel®  X710/XL710 40 Gigabit Ethernet Controller (code named “Fortville”)
+*   Support for Intel®  X710/XL710 40 Gigabit Ethernet Controller (code named "Fortville")
 
 *   Core components:
 
@@ -223,16 +223,16 @@ Supported Features
 
 *   IGB Poll Mode Driver - 1 GbE Controllers (librte_pmd_e1000)
 
-    *   Support for Intel® 82576 Gigabit Ethernet Controller (previously code named “Kawela”)
+    *   Support for Intel® 82576 Gigabit Ethernet Controller (previously code named "Kawela")
 
-    *   Support for Intel® 82580 Gigabit Ethernet Controller (previously code named “Barton Hills”)
+    *   Support for Intel® 82580 Gigabit Ethernet Controller (previously code named "Barton Hills")
 
-    *   Support for Intel®  I350 Gigabit Ethernet Controller (previously code named “Powerville”)
+    *   Support for Intel®  I350 Gigabit Ethernet Controller (previously code named "Powerville")
 
     *   Support for Intel® 82574L Gigabit Ethernet Controller - Intel® Gigabit CT Desktop Adapter
-        (previously code named “Hartwell”)
+        (previously code named "Hartwell")
 
-    *   Support for Intel® Ethernet Controller I210 (previously code named “Springville”)
+    *   Support for Intel® Ethernet Controller I210 (previously code named "Springville")
 
     *   Support for L2 Ethertype filters, SYN filters, 2-tuple filters and Flex filters for 82580 and i350
 
@@ -240,11 +240,11 @@ Supported Features
 
 *   Poll Mode Driver - 10 GbE Controllers (librte_pmd_ixgbe)
 
-    *   Support for Intel® 82599 10 Gigabit Ethernet Controller (previously code named “Niantic”)
+    *   Support for Intel® 82599 10 Gigabit Ethernet Controller (previously code named "Niantic")
 
-    *   Support for Intel® Ethernet Server Adapter X520-T2 (previously code named “Iron Pond”)
+    *   Support for Intel® Ethernet Server Adapter X520-T2 (previously code named "Iron Pond")
 
-    *   Support for Intel® Ethernet Controller X540-T2 (previously code named “Twin Pond”)
+    *   Support for Intel® Ethernet Controller X540-T2 (previously code named "Twin Pond")
 
     *   Support for Virtual Machine Device Queues (VMDq) and Data Center Bridging (DCB) to divide
         incoming traffic into 128 RX queues. DCB is also supported for transmitting packets.
@@ -351,7 +351,7 @@ Supported Features
 *   Improvements to SR-IOV switch configurability on the Intel® 82599 Ethernet Controllers in
     a virtualized environment.
 
-*   An API for L2 Ethernet Address “whitelist” filtering
+*   An API for L2 Ethernet Address "whitelist" filtering
 
 *   An API for resetting statistics counters
 
@@ -363,7 +363,7 @@ Supported Features
 
 *   Support for zero-copy Multicast
 
-*   New APIs to allow the “blacklisting” of specific NIC ports.
+*   New APIs to allow the "blacklisting" of specific NIC ports.
 
 *   Header files for common protocols (IP, SCTP, TCP, UDP)
 
diff --git a/doc/guides/rel_notes/updating_apps.rst b/doc/guides/rel_notes/updating_apps.rst
index 4dbf268..9467c4d 100644
--- a/doc/guides/rel_notes/updating_apps.rst
+++ b/doc/guides/rel_notes/updating_apps.rst
@@ -71,7 +71,7 @@ Intel® DPDK 1.2 to Intel® DPDK 1.3
 
 Note the following difference between releases 1.2 and 1.3:
 
-*   In release 1.3, the Intel® DPDK supports two different 1 GBe drivers: igb and em.
+*   In release 1.3, the Intel® DPDK supports two different 1 GbE drivers: igb and em.
     Both of them are located in the same library: lib_pmd_e1000.a.
     Therefore, the name of the library to link with for the igb PMD has changed from librte_pmd_igb.a to librte_pmd_e1000.a.
 
@@ -109,7 +109,7 @@ Note the following difference between release 1.1 and release 1.2:
 
 *   The method used for managing mbufs on the NIC TX rings for the 10 GbE driver has been modified to improve performance.
     As a result, different parameter values should be passed to the rte_eth_tx_queue_setup() function.
-    The recommended default values are to have tx_thresh.tx_wt hresh, tx_free_thresh,
+    The recommended default values are to have tx_thresh.tx_wthresh, tx_free_thresh,
     as well as the new parameter tx_rs_thresh (all in the struct rte_eth_txconf datatype) set to zero.
     See the "Configuration of Transmit and Receive Queues" section in the *Intel® DPDK Programmer's Guide* for more details.
 
diff --git a/doc/guides/sample_app_ug/dist_app.rst b/doc/guides/sample_app_ug/dist_app.rst
index bcff0dd..56195bb 100644
--- a/doc/guides/sample_app_ug/dist_app.rst
+++ b/doc/guides/sample_app_ug/dist_app.rst
@@ -104,7 +104,7 @@ Explanation
 -----------
 
 The distributor application consists of three types of threads: a receive
-thread (lcore_rx()), a set of worker threads(locre_worker())
+thread (lcore_rx()), a set of worker threads(lcore_worker())
 and a transmit thread(lcore_tx()). How these threads work together is shown
 in Fig2 below. The main() function launches  threads of these three types.
 Each thread has a while loop which will be doing processing and which is
diff --git a/doc/guides/sample_app_ug/ip_frag.rst b/doc/guides/sample_app_ug/ip_frag.rst
index 815cb4a..0c18fff 100644
--- a/doc/guides/sample_app_ug/ip_frag.rst
+++ b/doc/guides/sample_app_ug/ip_frag.rst
@@ -40,7 +40,7 @@ Overview
 
 The application demonstrates the use of zero-copy buffers for packet fragmentation.
 The initialization and run-time paths are very similar to those of the L2 forwarding application
-(see Chapter 9 "L2 Forwarding Simple Application (in Real and Virtualised Environments)" for more information).
+(see Chapter 9 "L2 Forwarding Simple Application (in Real and Virtualized Environments)" for more information).
 This guide highlights the differences between the two applications.
 
 There are three key differences from the L2 Forwarding sample application:
diff --git a/doc/guides/sample_app_ug/ip_reassembly.rst b/doc/guides/sample_app_ug/ip_reassembly.rst
index 6c500c0..050802a 100644
--- a/doc/guides/sample_app_ug/ip_reassembly.rst
+++ b/doc/guides/sample_app_ug/ip_reassembly.rst
@@ -96,7 +96,7 @@ where:
 
 *   --flowttl=TTL[(s|ms)]: determines maximum Time To Live for fragmented packet.
     If all fragments of the packet wouldn't appear within given time-out,
-    then they are consirdered as invalid and will be dropped.
+    then they are considered as invalid and will be dropped.
     Valid range is 1ms - 3600s. Default value: 1s.
 
 To run the example in linuxapp environment with 2 lcores (2,4) over 2 ports(0,2) with 1 RX queue per lcore:
diff --git a/doc/guides/sample_app_ug/ipv4_multicast.rst b/doc/guides/sample_app_ug/ipv4_multicast.rst
index 2020c4b..5e27041 100644
--- a/doc/guides/sample_app_ug/ipv4_multicast.rst
+++ b/doc/guides/sample_app_ug/ipv4_multicast.rst
@@ -232,12 +232,12 @@ Thereafter, a destination Ethernet address is constructed:
 
 .. code-block:: c
 
-    /* construct destination ethernet address */
+    /* construct destination Ethernet address */
 
     dst_eth_addr = ETHER_ADDR_FOR_IPV4_MCAST(dest_addr);
 
 Since Ethernet addresses are also part of the multicast process, each outgoing packet carries the same destination Ethernet address.
-The destination Ethernet address is constructed from the lower 23 bits of the multicast group ORed
+The destination Ethernet address is constructed from the lower 23 bits of the multicast group OR-ed
 with the Ethernet address 01:00:5e:00:00:00, as per RFC 1112:
 
 .. code-block:: c
diff --git a/doc/guides/sample_app_ug/l2_forward_job_stats.rst b/doc/guides/sample_app_ug/l2_forward_job_stats.rst
index 54d25cb..10dfecb 100644
--- a/doc/guides/sample_app_ug/l2_forward_job_stats.rst
+++ b/doc/guides/sample_app_ug/l2_forward_job_stats.rst
@@ -459,15 +459,15 @@ In the l2fwd_main_loop() function three loops are placed.
         rte_pause();
     }
 
-First inifnite for loop is to minimize impact of stats reading. Lock is only locked/unlocked when asked.
+First infinite for loop is to minimize impact of stats reading. Lock is only locked/unlocked when asked.
 
 Second inner while loop do the whole jobs management. When any job is ready, the use rte_timer_manage() is used to call the job handler.
 In this place functions l2fwd_fwd_job() and l2fwd_flush_job() are called when needed.
 Then rte_jobstats_context_finish() is called to mark loop end - no other jobs are ready to execute. By this time stats are ready to be read
 and if stats_read_pending is set, loop breaks allowing stats to be read.
 
-Third do-while loop is the idle job (idle stats counter). Its only purpose is moniting if any job is ready or stats job read is pending
-for this lcore. Statistics from this part of code is considered as the headroom available fo additional processing.
+Third do-while loop is the idle job (idle stats counter). Its only purpose is monitoring if any job is ready or stats job read is pending
+for this lcore. Statistics from this part of code is considered as the headroom available for additional processing.
 
 Receive, Process and Transmit Packets
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -595,7 +595,7 @@ If the table is full, the whole packets table is transmitted using the l2fwd_sen
     }
 
 To ensure that no packets remain in the tables, the flush job exists. The l2fwd_flush_job()
-is called periodicaly to for each lcore draining TX queue of each port.
+is called periodically to for each lcore draining TX queue of each port.
 This technique introduces some latency when there are not many packets to send,
 however it improves performance:
 
diff --git a/doc/guides/sample_app_ug/l3_forward_power_man.rst b/doc/guides/sample_app_ug/l3_forward_power_man.rst
index 20e4be7..39c2ea5 100644
--- a/doc/guides/sample_app_ug/l3_forward_power_man.rst
+++ b/doc/guides/sample_app_ug/l3_forward_power_man.rst
@@ -341,7 +341,7 @@ to generate hints based on recent network load trends.
                      */
                     rte_delay_us(lcore_idle_hint);
                 else
-                    /* long sleep force runing thread to suspend */
+                    /* long sleep force ruining thread to suspend */
                     usleep(lcore_idle_hint);
 
                stats[lcore_id].sleep_time += lcore_idle_hint;
diff --git a/doc/guides/sample_app_ug/load_balancer.rst b/doc/guides/sample_app_ug/load_balancer.rst
index 6237633..3b32bdc 100644
--- a/doc/guides/sample_app_ug/load_balancer.rst
+++ b/doc/guides/sample_app_ug/load_balancer.rst
@@ -220,7 +220,7 @@ The application has built-in performance enhancements for the NUMA case:
 #.  Memory for the NIC RX or TX rings is allocated on the same socket with the lcore handling the respective ring.
 
 In the case where multiple CPU sockets are used in the system,
-it is recommended to enable at least one lcore to fulfil the I/O role for the NIC ports that
+it is recommended to enable at least one lcore to fulfill the I/O role for the NIC ports that
 are directly attached to that CPU socket through the PCI Express* bus.
 It is always recommended to handle the packet I/O with lcores from the same CPU socket as the NICs.
 
diff --git a/doc/guides/sample_app_ug/multi_process.rst b/doc/guides/sample_app_ug/multi_process.rst
index 7ca71ca..9ed450b 100644
--- a/doc/guides/sample_app_ug/multi_process.rst
+++ b/doc/guides/sample_app_ug/multi_process.rst
@@ -269,7 +269,7 @@ therefore will be accessible by the secondary process as it initializes.
     for(i = 0; i < num_ports; i++){
         if(proc_type == RTE_PROC_PRIMARY)
             if (smp_port_init(ports[i], mp, (uint16_t)num_procs) < 0)
-                rte_exit(EXIT_FAILURE, "Error initialising ports\n");
+                rte_exit(EXIT_FAILURE, "Error initializing ports\n");
     }
 
 In the secondary instance, rather than initializing the network ports, the port information exported by the primary process is used,
@@ -569,7 +569,7 @@ and the master needs to see the update and print them out.
 So, it needs to allocate a heap buffer using rte_zmalloc.
 In addition, if the -f option is specified,
 an array is needed to store the allocated core ID for the floating process so that the master can return it
-after a slave has exited accidently.
+after a slave has exited accidentally.
 
 .. code-block:: c
 
diff --git a/doc/guides/sample_app_ug/netmap_compatibility.rst b/doc/guides/sample_app_ug/netmap_compatibility.rst
index f333f25..d86b3e3 100644
--- a/doc/guides/sample_app_ug/netmap_compatibility.rst
+++ b/doc/guides/sample_app_ug/netmap_compatibility.rst
@@ -114,7 +114,7 @@ namely rte_netmap_init() and rte_netmap_init_port().
 These two initialization functions take compat_netmap specific data structures as parameters:
 struct rte_netmap_conf and struct rte_netmap_port_conf.
 Those structures' fields are Netmap related and are self-explanatory for developers familiar with Netmap.
-They are definedin $RTE_SDK/examples/netmap_compat/ lib/compat_netmap.h.
+They are defined in $RTE_SDK/examples/netmap_compat/ lib/compat_netmap.h.
 
 The bridge application is an example largely based on the bridge example shipped with the Netmap distribution.
 It shows how a minimal Netmap application with minimal and straightforward source code changes can be run on top of the DPDK.
diff --git a/doc/guides/sample_app_ug/packet_ordering.rst b/doc/guides/sample_app_ug/packet_ordering.rst
index 481f1b7..ef85150 100644
--- a/doc/guides/sample_app_ug/packet_ordering.rst
+++ b/doc/guides/sample_app_ug/packet_ordering.rst
@@ -46,7 +46,7 @@ The application uses at least three CPU cores:
   Currently it modifies the output port of the packet for configurations with
   more than one port enabled.
 
-* TX Core (slave core) receives traffic from Woker cores through software queues,
+* TX Core (slave core) receives traffic from Worker cores through software queues,
   inserts out-of-order packets into reorder buffer, extracts ordered packets
   from the reorder buffer and sends them to the NIC ports for transmission.
 
@@ -94,7 +94,7 @@ The first CPU core in the core mask is the master core and would be assigned to
 RX core, the last to TX core and the rest to Worker cores.
 
 The PORTMASK parameter must contain either 1 or even enabled port numbers.
-When setting more than 1 port, traffic would be forwarderd in pairs.
+When setting more than 1 port, traffic would be forwarded in pairs.
 For example, if we enable 4 ports, traffic from port 0 to 1 and from 1 to 0,
 then the other pair from 2 to 3 and from 3 to 2, having [0,1] and [2,3] pairs.
 
diff --git a/doc/guides/sample_app_ug/quota_watermark.rst b/doc/guides/sample_app_ug/quota_watermark.rst
index e091ad9..4274223 100644
--- a/doc/guides/sample_app_ug/quota_watermark.rst
+++ b/doc/guides/sample_app_ug/quota_watermark.rst
@@ -209,7 +209,7 @@ Then, a call to init_dpdk(), defined in init.c, is made to initialize the poll m
             rte_exit(EXIT_FAILURE, "rte_eal_pci_probe(): error %d\n", ret);
 
         if (rte_eth_dev_count() < 2)
-            rte_exit(EXIT_FAILURE, "Not enough ethernet port available\n");
+            rte_exit(EXIT_FAILURE, "Not enough Ethernet port available\n");
     }
 
 To fully understand this code, it is recommended to study the chapters that relate to the *Poll Mode Driver*
@@ -492,7 +492,7 @@ low_watermark from the rte_memzone previously created by qw.
 
         qw_memzone = rte_memzone_lookup(QUOTA_WATERMARK_MEMZONE_NAME);
         if (qw_memzone == NULL)
-            rte_exit(EXIT_FAILURE, "Could't find memzone\n");
+            rte_exit(EXIT_FAILURE, "Couldn't find memzone\n");
 
         quota = qw_memzone->addr;
 
diff --git a/doc/guides/sample_app_ug/test_pipeline.rst b/doc/guides/sample_app_ug/test_pipeline.rst
index 0432942..46aa6d5 100644
--- a/doc/guides/sample_app_ug/test_pipeline.rst
+++ b/doc/guides/sample_app_ug/test_pipeline.rst
@@ -112,7 +112,7 @@ For hash tables, the following parameters can be selected:
     The available options are 8, 16 and 32 bytes;
 
 *   **Table type (e.g. hash-spec-16-ext or hash-spec-16-lru).**
-    The available options are ext (extendible bucket) or lru (least recently used).
+    The available options are ext (extendable bucket) or lru (least recently used).
 
 .. _table_3:
 
@@ -152,7 +152,7 @@ For hash tables, the following parameters can be selected:
 |       |                        |                                                          | [destination IPv4 address, 4 bytes of 0]              |
 |       |                        |                                                          |                                                       |
 +-------+------------------------+----------------------------------------------------------+-------------------------------------------------------+
-| 4     | hash-[spec]-8-ext      | Extendible bucket hash table with 8-byte key size        | Same as hash-[spec]-8-lru table entries, above.       |
+| 4     | hash-[spec]-8-ext      | Extendable bucket hash table with 8-byte key size        | Same as hash-[spec]-8-lru table entries, above.       |
 |       |                        | and 16 million entries.                                  |                                                       |
 |       |                        |                                                          |                                                       |
 +-------+------------------------+----------------------------------------------------------+-------------------------------------------------------+
@@ -175,7 +175,7 @@ For hash tables, the following parameters can be selected:
 |       |                        |                                                          | [destination IPv4 address, 12 bytes of 0]             |
 |       |                        |                                                          |                                                       |
 +-------+------------------------+----------------------------------------------------------+-------------------------------------------------------+
-| 6     | hash-[spec]-16-ext     | Extendible bucket hash table with 16-byte key size       | Same as hash-[spec]-16-lru table entries, above.      |
+| 6     | hash-[spec]-16-ext     | Extendable bucket hash table with 16-byte key size       | Same as hash-[spec]-16-lru table entries, above.      |
 |       |                        | and 16 million entries.                                  |                                                       |
 |       |                        |                                                          |                                                       |
 +-------+------------------------+----------------------------------------------------------+-------------------------------------------------------+
@@ -198,7 +198,7 @@ For hash tables, the following parameters can be selected:
 |       |                        |                                                          | [destination IPv4 address, 28 bytes of 0]             |
 |       |                        |                                                          |                                                       |
 +-------+------------------------+----------------------------------------------------------+-------------------------------------------------------+
-| 8     | hash-[spec]-32-ext     | Extendible bucket hash table with 32-byte key size       | Same as hash-[spec]-32-lru table entries, above.      |
+| 8     | hash-[spec]-32-ext     | Extendable bucket hash table with 32-byte key size       | Same as hash-[spec]-32-lru table entries, above.      |
 |       |                        | and 16 million entries.                                  |                                                       |
 |       |                        |                                                          |                                                       |
 +-------+------------------------+----------------------------------------------------------+-------------------------------------------------------+
diff --git a/doc/guides/sample_app_ug/timer.rst b/doc/guides/sample_app_ug/timer.rst
index d7f17f5..ee0a732 100644
--- a/doc/guides/sample_app_ug/timer.rst
+++ b/doc/guides/sample_app_ug/timer.rst
@@ -114,7 +114,7 @@ The main loop is very simple in this example:
         /*
          *   Call the timer handler on each core: as we don't
          *   need a very precise timer, so only call
-         *   rte_timer_manage() every ~10ms (at 2 Ghz). In a real
+         *   rte_timer_manage() every ~10ms (at 2 GHz). In a real
          *   application, this will enhance performances as
          *   reading the HPET timer is not efficient.
         */
diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst
index df8cd8c..ca9390d 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -37,7 +37,7 @@ The vhost sample application demonstrates integration of the Data Plane Developm
 with the Linux* KVM hypervisor by implementing the vhost-net offload API.
 The sample application performs simple packet switching between virtual machines based on Media Access Control
 (MAC) address or Virtual Local Area Network (VLAN) tag.
-The splitting of ethernet traffic from an external switch is performed in hardware by the Virtual Machine Device Queues
+The splitting of Ethernet traffic from an external switch is performed in hardware by the Virtual Machine Device Queues
 (VMDQ) and Data Center Bridging (DCB) features of the Intel® 82599 10 Gigabit Ethernet Controller.
 
 Background
@@ -90,7 +90,7 @@ Sample Code Overview
 The DPDK vhost-net sample code demonstrates KVM (QEMU) offloading the servicing of a Virtual Machine's (VM's)
 virtio-net devices to a DPDK-based application in place of the kernel's vhost-net module.
 
-The DPDK vhost-net sample code is based on vhost library. Vhost library is developed for user space ethernet switch to
+The DPDK vhost-net sample code is based on vhost library. Vhost library is developed for user space Ethernet switch to
 easily integrate with vhost functionality.
 
 The vhost library implements the following features:
@@ -110,7 +110,7 @@ socket messages. Most of the messages share the same handler routine.
 .. note::
     **Any vhost cuse specific requirement in the following sections will be emphasized**.
 
-Two impelmentations are turned on and off statically through configure file. Only one implementation could be turned on. They don't co-exist in current implementation.
+Two implementations are turned on and off statically through configure file. Only one implementation could be turned on. They don't co-exist in current implementation.
 
 The vhost sample code application is a simple packet switching application with the following feature:
 
@@ -158,7 +158,7 @@ Installing Packages on the Host(vhost cuse required)
 
 The vhost cuse code uses the following packages; fuse, fuse-devel, and kernel-modules-extra.
 The vhost user code don't rely on those modules as eventfds are already installed into vhost process through
-unix domain socket.
+Unix domain socket.
 
 #.  Install Fuse Development Libraries and headers:
 
@@ -491,7 +491,7 @@ This option is disabled by default.
 
 **RX descriptor number.**
 The RX descriptor number option specify the Ethernet RX descriptor number,
-Linux legacy virtio-net has different behaviour in how to use the vring descriptor from DPDK based virtio-net PMD,
+Linux legacy virtio-net has different behavior in how to use the vring descriptor from DPDK based virtio-net PMD,
 the former likely allocate half for virtio header, another half for frame buffer,
 while the latter allocate all for frame buffer,
 this lead to different number for available frame buffer in vring,
@@ -502,7 +502,7 @@ So it is valid only in zero copy mode is enabled. The value is 32 by default.
 
     user@target:~$ ./build/app/vhost-switch -c f -n 4 --huge-dir /mnt/huge -- --zero-copy 1 --rx-desc-num [0, n]
 
-**TX descriptornumber.**
+**TX descriptor number.**
 The TX descriptor number option specify the Ethernet TX descriptor number, it is valid only in zero copy mode is enabled.
 The value is 64 by default.
 
diff --git a/doc/guides/sample_app_ug/vm_power_management.rst b/doc/guides/sample_app_ug/vm_power_management.rst
index 2a923d8..dd6e1e8 100644
--- a/doc/guides/sample_app_ug/vm_power_management.rst
+++ b/doc/guides/sample_app_ug/vm_power_management.rst
@@ -139,7 +139,7 @@ Host Operating System
 The Host OS must also have the *apci_cpufreq* module installed, in some cases
 the *intel_pstate* driver may be the default Power Management environment.
 To enable *acpi_cpufreq* and disable *intel_pstate*, add the following
-to the grub linux command line:
+to the grub Linux command line:
 
 .. code-block:: console
 
@@ -220,7 +220,7 @@ on cores 0 & 1 on a system with 4 memory channels:
 
  ./build/vm_power_mgr -c 0x3 -n 4
 
-After successful initialisation the user is presented with VM Power Manager CLI:
+After successful initialization the user is presented with VM Power Manager CLI:
 
 .. code-block:: console
 
@@ -343,7 +343,7 @@ for example to run on cores 0,1,2,3 on a system with 4 memory channels:
  ./build/guest_vm_power_mgr -c 0xf -n 4
 
 
-After successful initialisation the user is presented with VM Power Manager Guest CLI:
+After successful initialization the user is presented with VM Power Manager Guest CLI:
 
 .. code-block:: console
 
diff --git a/doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst b/doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst
index e5d34e1..9fc1fd5 100644
--- a/doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst
+++ b/doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst
@@ -142,7 +142,7 @@ a default structure is provided for VMDQ and DCB configuration to be filled in l
 
 .. code-block:: c
 
-    /* empty vmdq+dcb configuration structure. Filled in programatically */
+    /* empty vmdq+dcb configuration structure. Filled in programmatically */
 
     static const struct rte_eth_conf vmdq_dcb_conf_default = {
         .rxmode = {
@@ -228,7 +228,7 @@ so the pools parameter in the rte_eth_vmdq_dcb_conf structure is specified as a
 Once the network port has been initialized using the correct VMDQ and DCB values,
 the initialization of the port's RX and TX hardware rings is performed similarly to that
 in the L2 Forwarding sample application.
-See Chapter 9, "L2 Forwarding Sample Aplication (in Real and Virtualized Environments)" for more information.
+See Chapter 9, "L2 Forwarding Sample Application (in Real and Virtualized Environments)" for more information.
 
 Statistics Display
 ~~~~~~~~~~~~~~~~~~
diff --git a/doc/guides/testpmd_app_ug/intro.rst b/doc/guides/testpmd_app_ug/intro.rst
index d9d17dc..ccf57ed 100644
--- a/doc/guides/testpmd_app_ug/intro.rst
+++ b/doc/guides/testpmd_app_ug/intro.rst
@@ -37,8 +37,8 @@ The testpmd application can be used to test the DPDK in a packet forwarding mode
 and also to access NIC hardware features such as Flow Director.
 It also serves as a example of how to build a more fully-featured application using the DPDK SDK.
 
-DocumentationRoadmap
---------------------
+Documentation Roadmap
+---------------------
 
 The following is a list of DPDK documents in the suggested reading order:
 
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index a08327b..761172e 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -510,7 +510,7 @@ tx_vlan set (vlan_id) (port_id)
 tx_vlan set pvid
 ~~~~~~~~~~~~~~~~
 
-Set port based hardware insertion of VLAN ID in pacekts sent on a port:
+Set port based hardware insertion of VLAN ID in packets sent on a port:
 
 tx_vlan set pvid (port_id) (vlan_id) (on|off)
 
@@ -1214,7 +1214,7 @@ For example, set a Link Bonding device (port 10) to use a balance policy of laye
 set bonding mon_period
 ~~~~~~~~~~~~~~~~~~~~~~
 
-Set the link status monitoring polling period in milliseconds for a bonding devicie.
+Set the link status monitoring polling period in milliseconds for a bonding device.
 
 This adds support for PMD slave devices which do not support link status interrupts.
 When the mon_period is set to a value greater than 0 then all PMD's which do not support
-- 
1.8.1.4

^ permalink raw reply	[relevance 8%]

* Re: [dpdk-dev] [PATCH v3 3/4] bond mode 4: allow external state machine
  @ 2015-04-22 19:51  3%   ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-04-22 19:51 UTC (permalink / raw)
  To: Eric Kinzie; +Cc: dev, Eric Kinzie

On Wed, Apr 22, 2015 at 09:27:42AM -0700, Eric Kinzie wrote:
> From: Eric Kinzie <ekinzie@brocade.com>
> 
>   Provide functions to allow an external 802.3ad state machine to transmit
>   and recieve LACPDUs and to set the collection/distribution flags on
>   slave interfaces.
> 
> Signed-off-by: Eric Kinzie <ehkinzie@gmail.com>
> ---
>  lib/librte_pmd_bond/rte_eth_bond_8023ad.c         |  173 +++++++++++++++++++++
>  lib/librte_pmd_bond/rte_eth_bond_8023ad.h         |   44 ++++++
>  lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h |    2 +
>  lib/librte_pmd_bond/rte_eth_bond_version.map      |    3 +
>  4 files changed, 222 insertions(+)
> 
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> index 1009d5b..326e899 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> @@ -42,6 +42,8 @@
>  
>  #include "rte_eth_bond_private.h"
>  
> +static void bond_mode_8023ad_ext_periodic_cb(void *arg);
> +
>  #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
>  #define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
>  			bond_dbg_get_time_diff_ms(), slave_id, \
> @@ -1014,6 +1016,7 @@ bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
>  	conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
>  	conf->update_timeout_ms = mode4->update_timeout_us / 1000;
>  	conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
> +	conf->slowrx_cb = mode4->slowrx_cb;
>  }
>  
>  void
> @@ -1035,8 +1038,11 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev,
>  		conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
>  		conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
>  		conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
> +		conf->slowrx_cb = NULL;
>  	}
>  
> +	bond_mode_8023ad_stop(dev);
> +
>  	mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
>  	mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
>  	mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
> @@ -1045,6 +1051,10 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev,
>  	mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
>  	mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
>  	mode4->update_timeout_us = conf->update_timeout_ms * 1000;
> +	mode4->slowrx_cb = conf->slowrx_cb;
> +
> +	if (dev->data->dev_started)
> +		bond_mode_8023ad_start(dev);
>  }
>  
>  int
> @@ -1062,6 +1072,13 @@ bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
>  int
>  bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
>  {
> +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> +	struct mode8023ad_private *mode4 = &internals->mode4;
> +
> +	if (mode4->slowrx_cb)
> +		return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
> +			&bond_mode_8023ad_ext_periodic_cb, bond_dev);
> +
>  	return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
>  			&bond_mode_8023ad_periodic_cb, bond_dev);
>  }
> @@ -1069,6 +1086,13 @@ bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
>  void
>  bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
>  {
> +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> +	struct mode8023ad_private *mode4 = &internals->mode4;
> +
> +	if (mode4->slowrx_cb) {
> +		rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb, bond_dev);
> +		return;
> +	}
>  	rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
>  }
>  
> @@ -1215,3 +1239,152 @@ rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
>  	info->agg_port_id = port->aggregator_port_id;
>  	return 0;
>  }
> +
> +int
> +rte_eth_bond_8023ad_ext_collect(uint8_t port_id, uint8_t slave_id, int enabled)
> +{
> +	struct rte_eth_dev *bond_dev;
> +	struct bond_dev_private *internals;
> +	struct mode8023ad_private *mode4;
> +	struct port *port;
> +
> +	if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
> +		return -EINVAL;
> +
> +	bond_dev = &rte_eth_devices[port_id];
> +
> +	if (!bond_dev->data->dev_started)
> +		return -EINVAL;
> +
> +	internals = bond_dev->data->dev_private;
> +	if (find_slave_by_id(internals->active_slaves,
> +			internals->active_slave_count, slave_id) ==
> +				internals->active_slave_count)
> +		return -EINVAL;
> +
> +	mode4 = &internals->mode4;
> +	if (mode4->slowrx_cb == NULL)
> +		return -EINVAL;
> +
> +	port = &mode_8023ad_ports[slave_id];
> +
> +	if (enabled)
> +		ACTOR_STATE_SET(port, COLLECTING);
> +	else
> +		ACTOR_STATE_CLR(port, COLLECTING);
> +
> +	return 0;
> +}
> +
> +int
> +rte_eth_bond_8023ad_ext_distrib(uint8_t port_id, uint8_t slave_id, int enabled)
> +{
> +	struct rte_eth_dev *bond_dev;
> +	struct bond_dev_private *internals;
> +	struct mode8023ad_private *mode4;
> +	struct port *port;
> +
> +	if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
> +		return -EINVAL;
> +
> +	bond_dev = &rte_eth_devices[port_id];
> +
> +	if (!bond_dev->data->dev_started)
> +		return -EINVAL;
> +
> +	internals = bond_dev->data->dev_private;
> +	if (find_slave_by_id(internals->active_slaves,
> +			internals->active_slave_count, slave_id) ==
> +				internals->active_slave_count)
> +		return -EINVAL;
> +
> +	mode4 = &internals->mode4;
> +	if (mode4->slowrx_cb == NULL)
> +		return -EINVAL;
> +
> +	port = &mode_8023ad_ports[slave_id];
> +
> +	if (enabled)
> +		ACTOR_STATE_SET(port, DISTRIBUTING);
> +	else
> +		ACTOR_STATE_CLR(port, DISTRIBUTING);
> +
> +	return 0;
> +}
> +
> +int
> +rte_eth_bond_8023ad_ext_slowtx(uint8_t port_id, uint8_t slave_id,
> +		struct rte_mbuf *lacp_pkt)
> +{
> +	struct rte_eth_dev *bond_dev;
> +	struct bond_dev_private *internals;
> +	struct mode8023ad_private *mode4;
> +	struct port *port;
> +
> +	if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
> +		return -EINVAL;
> +
> +	bond_dev = &rte_eth_devices[port_id];
> +
> +	if (!bond_dev->data->dev_started)
> +		return -EINVAL;
> +
> +	internals = bond_dev->data->dev_private;
> +	if (find_slave_by_id(internals->active_slaves,
> +			internals->active_slave_count, slave_id) ==
> +				internals->active_slave_count)
> +		return -EINVAL;
> +
> +	mode4 = &internals->mode4;
> +	if (mode4->slowrx_cb == NULL)
> +		return -EINVAL;
> +
> +	port = &mode_8023ad_ports[slave_id];
> +
> +	if (rte_pktmbuf_pkt_len(lacp_pkt) < sizeof(struct lacpdu_header))
> +		return -EINVAL;
> +
> +	struct lacpdu_header *lacp;
> +
> +	/* only enqueue LACPDUs */
> +	lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
> +	if (lacp->lacpdu.subtype != SLOW_SUBTYPE_LACP)
> +		return -EINVAL;
> +
> +	MODE4_DEBUG("sending LACP frame\n");
> +
> +	return rte_ring_enqueue(port->tx_ring, lacp_pkt);
> +}
> +
> +static void
> +bond_mode_8023ad_ext_periodic_cb(void *arg)
> +{
> +	struct rte_eth_dev *bond_dev = arg;
> +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> +	struct mode8023ad_private *mode4 = &internals->mode4;
> +	struct port *port;
> +	void *pkt = NULL;
> +	uint16_t i, slave_id;
> +
> +	for (i = 0; i < internals->active_slave_count; i++) {
> +		slave_id = internals->active_slaves[i];
> +		port = &mode_8023ad_ports[slave_id];
> +
> +		if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
> +			struct rte_mbuf *lacp_pkt = pkt;
> +			struct lacpdu_header *lacp;
> +
> +			lacp = rte_pktmbuf_mtod(lacp_pkt,
> +						struct lacpdu_header *);
> +			RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
> +
> +			/* This is LACP frame so pass it to rx callback.
> +			 * Callback is responsible for freeing mbuf.
> +			 */
> +			mode4->slowrx_cb(slave_id, lacp_pkt);
> +		}
> +	}
> +
> +	rte_eal_alarm_set(internals->mode4.update_timeout_us,
> +			bond_mode_8023ad_ext_periodic_cb, arg);
> +}
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
> index ebd0e93..8cfa3d3 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
> @@ -64,6 +64,8 @@ extern "C" {
>  #define MARKER_TLV_TYPE_INFO                0x01
>  #define MARKER_TLV_TYPE_RESP                0x02
>  
> +typedef void (*rte_eth_bond_8023ad_ext_slowrx_fn)(uint8_t slave_id, struct rte_mbuf *lacp_pkt);
> +
>  enum rte_bond_8023ad_selection {
>  	UNSELECTED,
>  	STANDBY,
> @@ -157,6 +159,7 @@ struct rte_eth_bond_8023ad_conf {
>  	uint32_t tx_period_ms;
>  	uint32_t rx_marker_period_ms;
>  	uint32_t update_timeout_ms;
> +	rte_eth_bond_8023ad_ext_slowrx_fn slowrx_cb;
>  };
>  

This is dangerous.  While it doesn't break ABI, it does create a situation in
which you will be prone to writing to invalid memory. By the examples that you
provide in the test application, you declare rte_eth_bond_8023ad_conf statically
within an application, so if someone is using the DPDK built as a shared
library, they may have less allocated space in the .data section then you think
with this new extended structure.  As a result in that situation you will read
from and write to unallocated memory when you access slowrx_cb.  I recommend
that you version the setup routine, so that the older version is modified to not
touch slowrx_cb.

Neil

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v4 3/7] hv: add basic vmbus support
  @ 2015-04-21 17:32  1% ` Stephen Hemminger
  0 siblings, 0 replies; 200+ results
From: Stephen Hemminger @ 2015-04-21 17:32 UTC (permalink / raw)
  To: alexmay; +Cc: dev

The hyper-v device driver forces the base EAL code to change
to support multiple bus types. This is done changing the pci_device
in ether driver to a generic union.

As much as possible this is done in a backwards source compatiable
way. It will break ABI for device drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/librte_eal/common/Makefile             |   2 +-
 lib/librte_eal/common/eal_common_options.c |   5 +
 lib/librte_eal/common/eal_internal_cfg.h   |   1 +
 lib/librte_eal/common/eal_options.h        |   2 +
 lib/librte_eal/common/eal_private.h        |  10 +
 lib/librte_eal/common/include/rte_vmbus.h  | 159 +++++++
 lib/librte_eal/linuxapp/eal/Makefile       |   3 +
 lib/librte_eal/linuxapp/eal/eal.c          |  11 +
 lib/librte_eal/linuxapp/eal/eal_vmbus.c    | 641 +++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.c              | 128 +++++-
 lib/librte_ether/rte_ethdev.h              |  15 +-
 11 files changed, 968 insertions(+), 9 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 3ea3bbf..202485e 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 INC := rte_branch_prediction.h rte_common.h
 INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
 INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
 INC += rte_rwlock.h rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 8fcb1ab..76a3394 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -80,6 +80,7 @@ eal_long_options[] = {
 	{OPT_NO_HPET,           0, NULL, OPT_NO_HPET_NUM          },
 	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
 	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
+	{OPT_NO_VMBUS,		0, NULL, OPT_NO_VMBUS_NUM	  },
 	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
 	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
@@ -726,6 +727,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_pci = 1;
 		break;
 
+	case OPT_NO_VMBUS_NUM:
+		conf->no_vmbus = 1;
+		break;
+
 	case OPT_NO_HPET_NUM:
 		conf->no_hpet = 1;
 		break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index e2ecb0d..0e7de34 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -66,6 +66,7 @@ struct internal_config {
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
 	volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
 	volatile unsigned no_pci;         /**< true to disable PCI */
+	volatile unsigned no_vmbus;	  /**< true to disable VMBUS */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
 										* instead of native TSC */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index f6714d9..54f03dc 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -67,6 +67,8 @@ enum {
 	OPT_NO_HUGE_NUM,
 #define OPT_NO_PCI            "no-pci"
 	OPT_NO_PCI_NUM,
+#define OPT_NO_VMBUS          "no-vmbus"
+	OPT_NO_VMBUS_NUM,
 #define OPT_NO_SHCONF         "no-shconf"
 	OPT_NO_SHCONF_NUM,
 #define OPT_SOCKET_MEM        "socket-mem"
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 4acf5a0..039e9f3 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -180,6 +180,16 @@ int rte_eal_pci_close_one_driver(struct rte_pci_driver *dr,
 		struct rte_pci_device *dev);
 
 /**
+ * VMBUS related functions and structures
+ */
+int rte_eal_vmbus_init(void);
+
+struct rte_vmbus_driver;
+struct rte_vmbus_device;
+
+int rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+		struct rte_vmbus_device *dev);
+/**
  * Init tail queues for non-EAL library structures. This is to allow
  * the rings, mempools, etc. lists to be shared among multiple processes
  *
diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
new file mode 100644
index 0000000..e632572
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vmbus.h
@@ -0,0 +1,159 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_VMBUS_H_
+#define _RTE_VMBUS_H_
+
+/**
+ * @file
+ *
+ * RTE VMBUS Interface
+ */
+
+#include <sys/queue.h>
+
+/** Pathname of VMBUS devices directory. */
+#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
+
+/** Formatting string for VMBUS device identifier: Ex: vmbus_0_9 */
+#define VMBUS_PRI_FMT "vmbus_0_%u"
+
+#define VMBUS_ID_ANY 0xFFFF
+
+#define VMBUS_NETWORK_DEVICE "{f8615163-df3e-46c5-913f-f2d2f965ed0e}"
+
+/** Maximum number of VMBUS resources. */
+#define VMBUS_MAX_RESOURCE 7
+
+/**
+ * A structure describing an ID for a VMBUS driver. Each driver provides a
+ * table of these IDs for each device that it supports.
+ */
+struct rte_vmbus_id {
+	uint16_t device_id;           /**< VMBUS Device ID */
+	uint16_t sysfs_num;           /**< vmbus_0_X */
+};
+
+/**
+ * A structure describing a VMBUS memory resource.
+ */
+struct rte_vmbus_resource {
+	uint64_t phys_addr;   /**< Physical address, 0 if no resource. */
+	uint64_t len;         /**< Length of the resource. */
+	void *addr;           /**< Virtual address, NULL when not mapped. */
+};
+
+/**
+ * A structure describing a VMBUS device.
+ */
+struct rte_vmbus_device {
+	TAILQ_ENTRY(rte_vmbus_device) next;     /**< Next probed VMBUS device. */
+	struct rte_vmbus_id id;                 /**< VMBUS ID. */
+	const struct rte_vmbus_driver *driver;  /**< Associated driver */
+	int numa_node;                          /**< NUMA node connection */
+	unsigned int blacklisted:1;             /**< Device is blacklisted */
+	struct rte_vmbus_resource mem_resource[VMBUS_MAX_RESOURCE];   /**< VMBUS Memory Resource */
+	uint32_t vmbus_monitor_id;              /**< VMBus monitor ID for device */
+	int uio_fd;                             /** UIO device file descriptor */
+};
+
+/** Macro used to help building up tables of device IDs */
+#define RTE_VMBUS_DEVICE(dev)          \
+	.device_id = (dev)
+
+struct rte_vmbus_driver;
+
+/**
+ * Initialisation function for the driver called during VMBUS probing.
+ */
+typedef int (vmbus_devinit_t)(struct rte_vmbus_driver *, struct rte_vmbus_device *);
+
+/**
+ * Uninitialisation function for the driver called during hotplugging.
+ */
+typedef int (vmbus_devuninit_t)(struct rte_vmbus_device *);
+
+/**
+ * A structure describing a VMBUS driver.
+ */
+struct rte_vmbus_driver {
+	TAILQ_ENTRY(rte_vmbus_driver) next;     /**< Next in list. */
+	const char *name;                       /**< Driver name. */
+	vmbus_devinit_t *devinit;               /**< Device init. function. */
+	vmbus_devuninit_t *devuninit;           /**< Device uninit function. */
+	const struct rte_vmbus_id *id_table;    /**< ID table, NULL terminated. */
+	uint32_t drv_flags;                     /**< Flags contolling handling of device. */
+	const char *module_name;		/**< Associated kernel module */
+};
+
+/**
+ * Probe the VMBUS device for registered drivers.
+ *
+ * Scan the content of the vmbus, and call the probe() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_vmbus_probe(void);
+
+/**
+ * Dump the content of the vmbus.
+ */
+void rte_eal_vmbus_dump(void);
+
+/**
+ * Register a VMBUS driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vmbus_driver structure describing the driver
+ *   to be registered.
+ */
+void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
+
+/**
+ * Unregister a VMBUS driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vmbus_driver structure describing the driver
+ *   to be unregistered.
+ */
+void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
+
+int vmbus_uio_map_resource(struct rte_vmbus_device *dev);
+
+#endif /* _RTE_VMBUS_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 01f7b70..acd5127 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -74,6 +74,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
 ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
 endif
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_vmbus.c
+endif
 
 # from common dir
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index bd770cf..86d0e31 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -70,6 +70,7 @@
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_devargs.h>
 #include <rte_common.h>
 #include <rte_version.h>
@@ -796,6 +797,11 @@ rte_eal_init(int argc, char **argv)
 
 	rte_eal_mcfg_complete();
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_init() < 0)
+		RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
+#endif
+
 	TAILQ_FOREACH(solib, &solib_list, next) {
 		RTE_LOG(INFO, EAL, "open shared lib %s\n", solib->name);
 		solib->lib_handle = dlopen(solib->name, RTLD_NOW);
@@ -845,6 +851,11 @@ rte_eal_init(int argc, char **argv)
 	if (rte_eal_pci_probe())
 		rte_panic("Cannot probe PCI\n");
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_probe() < 0)
+		rte_panic("Cannot probe VMBUS\n");
+#endif
+
 	return fctret;
 }
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
new file mode 100644
index 0000000..165edd6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
@@ -0,0 +1,641 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_vmbus.h>
+#include <rte_common.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_malloc.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#define PROC_MODULES "/proc/modules"
+#define VMBUS_DRV_PATH "/sys/bus/vmbus/drivers/%s"
+
+TAILQ_HEAD(vmbus_device_list, rte_vmbus_device); /**< VMBUS devices in D-linked Q. */
+TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver); /**< VMBUS drivers in D-linked Q. */
+
+static struct vmbus_driver_list vmbus_driver_list =
+	TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
+static struct vmbus_device_list vmbus_device_list =
+	TAILQ_HEAD_INITIALIZER(vmbus_device_list);
+
+struct uio_map {
+	void *addr;
+	uint64_t offset;
+	uint64_t size;
+	uint64_t phaddr;
+};
+
+/*
+ * For multi-process we need to reproduce all vmbus mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct uio_resource {
+	TAILQ_ENTRY(uio_resource) next;
+
+	struct rte_vmbus_id vmbus_addr;
+	char path[PATH_MAX];
+	size_t nb_maps;
+	struct uio_map maps[VMBUS_MAX_RESOURCE];
+};
+
+/*
+ * parse a sysfs file containing one integer value
+ * different to the eal version, as it needs to work with 64-bit values
+ */
+static int
+vmbus_parse_sysfs_value(const char *filename, uint64_t *val)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+				__func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	*val = strtoull(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+#define OFF_MAX              ((uint64_t)(off_t)-1)
+static ssize_t
+vmbus_uio_get_mappings(const char *devname, struct uio_map maps[], size_t nb_maps)
+{
+	size_t i;
+	char dirname[PATH_MAX];
+	char filename[PATH_MAX];
+	uint64_t offset, size;
+
+	for (i = 0; i != nb_maps; i++) {
+
+		/* check if map directory exists */
+		snprintf(dirname, sizeof(dirname),
+				"%s/maps/map%zu", devname, i);
+
+		RTE_LOG(DEBUG, EAL, "Scanning maps in %s\n", (char *)dirname);
+
+		if (access(dirname, F_OK) != 0)
+			break;
+
+		/* get mapping offset */
+		snprintf(filename, sizeof(filename),
+				"%s/offset", dirname);
+		if (vmbus_parse_sysfs_value(filename, &offset) < 0) {
+			RTE_LOG(ERR, EAL,
+					"%s(): cannot parse offset of %s\n",
+					__func__, dirname);
+			return -1;
+		}
+
+		/* get mapping size */
+		snprintf(filename, sizeof(filename),
+				"%s/size", dirname);
+		if (vmbus_parse_sysfs_value(filename, &size) < 0) {
+			RTE_LOG(ERR, EAL,
+					"%s(): cannot parse size of %s\n",
+					__func__, dirname);
+			return -1;
+		}
+
+		/* get mapping physical address */
+		snprintf(filename, sizeof(filename),
+				"%s/addr", dirname);
+		if (vmbus_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
+			RTE_LOG(ERR, EAL,
+					"%s(): cannot parse addr of %s\n",
+					__func__, dirname);
+			return -1;
+		}
+
+		if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+			RTE_LOG(ERR, EAL,
+					"%s(): offset/size exceed system max value\n",
+					__func__);
+			return -1;
+		}
+
+		maps[i].offset = offset;
+		maps[i].size = size;
+	}
+	return i;
+}
+
+/* maximum time to wait that /dev/uioX appears */
+#define UIO_DEV_WAIT_TIMEOUT 3 /* seconds */
+
+/* map a particular resource from a file */
+static void *
+vmbus_map_resource(struct rte_vmbus_device *dev, void *requested_addr,
+		const char *devname, off_t offset, size_t size)
+{
+	int fd;
+	void *mapaddr;
+
+	if (dev->uio_fd <= 0)
+		fd = open(devname, O_RDWR);
+	else
+		fd = dev->uio_fd;
+
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+				devname, strerror(errno));
+		goto fail;
+	}
+
+	dev->uio_fd = fd;
+	/* Map the memory resource of device */
+	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, offset);
+	if (mapaddr == MAP_FAILED ||
+			(requested_addr != NULL && mapaddr != requested_addr)) {
+		RTE_LOG(ERR, EAL,
+			"%s(): cannot mmap(%s(%d), %p, 0x%lx, 0x%lx):"
+			" %s (%p)\n", __func__, devname, fd, requested_addr,
+			(unsigned long)size, (unsigned long)offset,
+			strerror(errno), mapaddr);
+		close(fd);
+		goto fail;
+	}
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		close(fd);
+
+	RTE_LOG(DEBUG, EAL, "  VMBUS memory mapped at %p\n", mapaddr);
+
+	return mapaddr;
+
+fail:
+	return NULL;
+}
+
+/* map the resources of a vmbus device in virtual memory */
+int
+vmbus_uio_map_resource(struct rte_vmbus_device *dev)
+{
+	int i;
+	struct dirent *e;
+	DIR *dir;
+	char dirname[PATH_MAX];
+	char dirname2[PATH_MAX];
+	char devname[PATH_MAX]; /* contains the /dev/uioX */
+	void *mapaddr;
+	unsigned uio_num;
+	uint64_t phaddr;
+	uint64_t offset;
+	uint64_t pagesz;
+	ssize_t nb_maps;
+	struct rte_vmbus_id *loc = &dev->id;
+	struct uio_resource *uio_res;
+	struct uio_map *maps;
+
+	/* depending on kernel version, uio can be located in uio/uioX
+	 * or uio:uioX */
+	snprintf(dirname, sizeof(dirname),
+			"/sys/bus/vmbus/devices/" VMBUS_PRI_FMT "/uio", loc->sysfs_num);
+
+	dir = opendir(dirname);
+	if (dir == NULL) {
+		/* retry with the parent directory */
+		snprintf(dirname, sizeof(dirname),
+				"/sys/bus/vmbus/devices/" VMBUS_PRI_FMT, loc->sysfs_num);
+		dir = opendir(dirname);
+
+		if (dir == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
+			return -1;
+		}
+	}
+
+	/* take the first file starting with "uio" */
+	while ((e = readdir(dir)) != NULL) {
+		/* format could be uio%d ...*/
+		int shortprefix_len = sizeof("uio") - 1;
+		/* ... or uio:uio%d */
+		int longprefix_len = sizeof("uio:uio") - 1;
+		char *endptr;
+
+		if (strncmp(e->d_name, "uio", 3) != 0)
+			continue;
+
+		/* first try uio%d */
+		errno = 0;
+		uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != e->d_name) {
+			snprintf(dirname2, sizeof(dirname2),
+					"%s/uio%u", dirname, uio_num);
+			break;
+		}
+
+		/* then try uio:uio%d */
+		errno = 0;
+		uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != e->d_name) {
+			snprintf(dirname2, sizeof(dirname2),
+					"%s/uio:uio%u", dirname, uio_num);
+			break;
+		}
+	}
+	closedir(dir);
+
+	/* No uio resource found */
+	if (e == NULL) {
+		RTE_LOG(WARNING, EAL,
+			VMBUS_PRI_FMT" not managed by UIO driver, skipping\n",
+			loc->sysfs_num);
+		return -1;
+	}
+
+	/* allocate the mapping details for secondary processes*/
+	uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
+	if (uio_res == NULL) {
+		RTE_LOG(ERR, EAL,
+				"%s(): cannot store uio mmap details\n", __func__);
+		return -1;
+	}
+
+	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+	snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+	memcpy(&uio_res->vmbus_addr, &dev->id, sizeof(uio_res->vmbus_addr));
+
+	/* collect info about device mappings */
+	nb_maps = vmbus_uio_get_mappings(dirname2, uio_res->maps,
+			sizeof(uio_res->maps) / sizeof(uio_res->maps[0]));
+	if (nb_maps < 0)
+		return nb_maps;
+
+	RTE_LOG(DEBUG, EAL, "Found %d memory maps for device "VMBUS_PRI_FMT"\n",
+			(int)nb_maps, loc->sysfs_num);
+
+	uio_res->nb_maps = nb_maps;
+
+	pagesz = sysconf(_SC_PAGESIZE);
+
+	maps = uio_res->maps;
+	for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
+		phaddr = maps[i].phaddr;
+		if (phaddr == 0)
+			continue;
+
+		RTE_LOG(DEBUG, EAL, "	mem_map%d: addr=0x%lx len = %lu\n",
+				i,
+				maps[i].phaddr,
+				maps[i].size);
+
+		if (i != nb_maps) {
+			offset = i * pagesz;
+			mapaddr = vmbus_map_resource(dev, NULL, devname, (off_t)offset,
+					(size_t)maps[i].size);
+			if (mapaddr == NULL)
+				return -1;
+
+			/* Important: offset for mapping can be non-zero, pad the addr */
+			mapaddr = ((char *)mapaddr + maps[i].offset);
+			maps[i].addr = mapaddr;
+			maps[i].offset = offset;
+			dev->mem_resource[i].addr = mapaddr;
+			dev->mem_resource[i].phys_addr = phaddr;
+			dev->mem_resource[i].len = maps[i].size;
+		}
+	}
+
+	return 0;
+}
+
+/* Compare two VMBUS device addresses. */
+static int
+vmbus_compare(struct rte_vmbus_id *id, struct rte_vmbus_id *id2)
+{
+	return id->device_id > id2->device_id;
+}
+
+/* Scan one vmbus sysfs entry, and fill the devices list from it. */
+static int
+vmbus_scan_one(const char *name)
+{
+	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	char dirname[PATH_MAX];
+	unsigned long tmp;
+	unsigned int sysfs_num;
+	struct rte_vmbus_device *dev;
+	FILE *f;
+
+	dev = rte_zmalloc("vmbus_device", sizeof(*dev), 0);
+	if (dev == NULL)
+		return -1;
+
+	snprintf(dirname, sizeof(dirname), "%s/%s",
+		 SYSFS_VMBUS_DEVICES, name);
+
+	/* parse directory name in sysfs.  this does not always reflect
+	 * the device id read below.
+	 */
+	if (sscanf(name, VMBUS_PRI_FMT, &sysfs_num) != 1) {
+		RTE_LOG(ERR, EAL, "Unable to parse vmbus sysfs name\n");
+		rte_free(dev);
+		return -1;
+	}
+	dev->id.sysfs_num = sysfs_num;
+
+	/* get device id */
+	snprintf(filename, sizeof(filename), "%s/id", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		rte_free(dev);
+		return -1;
+	}
+	dev->id.device_id = (uint16_t)tmp;
+
+	/* get monitor id */
+	snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		rte_free(dev);
+		return -1;
+	}
+	dev->vmbus_monitor_id = tmp;
+
+	/* compare class_id of device with {f8615163-df3e-46c5-913ff2d2f965ed0e} */
+	snprintf(filename, sizeof(filename), "%s/class_id", dirname);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+				__func__, filename);
+		rte_free(dev);
+		return -1;
+	}
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		rte_free(dev);
+		return -1;
+	}
+	fclose(f);
+
+	if (strncmp(buf, VMBUS_NETWORK_DEVICE, strlen(VMBUS_NETWORK_DEVICE))) {
+		RTE_LOG(DEBUG, EAL, "%s(): skip vmbus_0_%u with class_id = %s",
+				__func__, dev->id.sysfs_num, buf);
+		rte_free(dev);
+		return 0;
+	}
+
+	/* device is valid, add in list (sorted) */
+	RTE_LOG(DEBUG, EAL, "Adding vmbus device %d\n", dev->id.device_id);
+	if (!TAILQ_EMPTY(&vmbus_device_list)) {
+		struct rte_vmbus_device *dev2 = NULL;
+
+		TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
+			if (vmbus_compare(&dev->id, &dev2->id))
+				continue;
+
+			TAILQ_INSERT_BEFORE(dev2, dev, next);
+			return 0;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
+
+	return 0;
+}
+
+static int
+check_vmbus_device(const char *buf, int bufsize)
+{
+	char *n, *buf_copy, *endp;
+	unsigned long err;
+
+	/* the format is 'vmbus_0_%d' */
+	n = strrchr(buf, '_');
+	if (n == NULL)
+		return -1;
+	n++;
+	buf_copy = strndup(n, bufsize);
+	if (buf_copy == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): failed to strndup: %s\n",
+				__func__, strerror(errno));
+		return -1;
+	}
+
+	err = strtoul(buf_copy, &endp, 10);
+	free(buf_copy);
+
+	if (*endp != '\0' || (err == ULONG_MAX && errno == ERANGE)) {
+		RTE_LOG(ERR, EAL, "%s(): can't parse devid: %s\n",
+				__func__, strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Scan the content of the vmbus, and the devices in the devices list
+ */
+static int
+vmbus_scan(void)
+{
+	struct dirent *e;
+	DIR *dir;
+
+	dir = opendir(SYSFS_VMBUS_DEVICES);
+	if (dir == NULL) {
+		if (errno == ENOENT)
+			return 0;
+
+		RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
+			__func__, strerror(errno));
+		return -1;
+	}
+
+	while ((e = readdir(dir)) != NULL) {
+		if (e->d_name[0] == '.')
+			continue;
+
+		if (check_vmbus_device(e->d_name, sizeof(e->d_name)))
+			continue;
+
+		if (vmbus_scan_one(e->d_name) < 0)
+			goto error;
+	}
+	closedir(dir);
+	return 0;
+
+ error:
+	closedir(dir);
+	return -1;
+}
+
+/* Init the VMBUS EAL subsystem */
+int rte_eal_vmbus_init(void)
+{
+	/* VMBUS can be disabled */
+	if (internal_config.no_vmbus)
+		return 0;
+
+	if (vmbus_scan() < 0) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
+		return -1;
+	}
+	return 0;
+}
+
+/* Below is PROBE part of eal_vmbus library */
+
+/*
+ * If device ID match, call the devinit() function of the driver.
+ */
+int
+rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+		struct rte_vmbus_device *dev)
+{
+	const struct rte_vmbus_id *id_table;
+
+	for (id_table = dr->id_table; id_table->device_id != VMBUS_ID_ANY; id_table++) {
+		const struct rte_vmbus_id *loc = &dev->id;
+
+		RTE_LOG(DEBUG, EAL, "VMBUS device "VMBUS_PRI_FMT"\n",
+				loc->sysfs_num);
+		RTE_LOG(DEBUG, EAL, "  probe driver: %s\n", dr->name);
+
+		/* no initialization when blacklisted, return without error */
+		if (dev->blacklisted) {
+			RTE_LOG(DEBUG, EAL, "  Device is blacklisted, not initializing\n");
+			return 0;
+		}
+
+		/* map the resources */
+		if (vmbus_uio_map_resource(dev) < 0)
+			return -1;
+
+		/* reference driver structure */
+		dev->driver = dr;
+
+		/* call the driver devinit() function */
+		return dr->devinit(dr, dev);
+	}
+
+	/* return positive value if driver is not found */
+	return 1;
+}
+
+/*
+ * call the devinit() function of all
+ * registered drivers for the vmbus device. Return -1 if no driver is
+ * found for this class of vmbus device.
+ * The present assumption is that we have drivers only for vmbus network
+ * devices. That's why we don't check driver's id_table now.
+ */
+static int
+vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
+{
+	struct rte_vmbus_driver *dr = NULL;
+	int ret;
+
+	TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
+		ret = rte_eal_vmbus_probe_one_driver(dr, dev);
+		if (ret < 0) {
+			/* negative value is an error */
+			RTE_LOG(ERR, EAL, "Failed to probe driver %s\n", dr->name);
+			break;
+		}
+		if (ret > 0) {
+			/* positive value means driver not found */
+			RTE_LOG(DEBUG, EAL, "Driver %s not found", dr->name);
+			continue;
+		}
+
+		RTE_LOG(DEBUG, EAL, "OK. Driver was found and probed.\n");
+		return 0;
+	}
+	return -1;
+}
+
+
+/*
+ * Scan the vmbus, and call the devinit() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ */
+int
+rte_eal_vmbus_probe(void)
+{
+	struct rte_vmbus_device *dev = NULL;
+
+	TAILQ_FOREACH(dev, &vmbus_device_list, next) {
+		RTE_LOG(DEBUG, EAL, "Probing driver for device %d ...\n",
+				dev->id.device_id);
+		vmbus_probe_all_drivers(dev);
+	}
+	return 0;
+}
+
+/* register vmbus driver */
+void
+rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
+{
+	TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
+}
+
+/* unregister vmbus driver */
+void
+rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
+{
+	TAILQ_REMOVE(&vmbus_driver_list, driver, next);
+}
+
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 9577d17..9093966 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -379,6 +379,98 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
 	return 0;
 }
 
+#ifdef RTE_LIBRTE_HV_PMD
+static int
+rte_vmbus_dev_init(struct rte_vmbus_driver *vmbus_drv,
+		   struct rte_vmbus_device *vmbus_dev)
+{
+	struct eth_driver  *eth_drv = (struct eth_driver *)vmbus_drv;
+	struct rte_eth_dev *eth_dev;
+	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+	int diag;
+
+	snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u",
+		 vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num);
+
+	eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI);
+	if (eth_dev == NULL)
+		return -ENOMEM;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
+				  eth_drv->dev_private_size,
+				  RTE_CACHE_LINE_SIZE);
+		if (eth_dev->data->dev_private == NULL)
+			rte_panic("Cannot allocate memzone for private port data\n");
+	}
+	eth_dev->vmbus_dev = vmbus_dev;
+	eth_dev->driver = eth_drv;
+	eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+	/* init user callbacks */
+	TAILQ_INIT(&(eth_dev->link_intr_cbs));
+
+	/*
+	 * Set the default maximum frame size.
+	 */
+	eth_dev->data->mtu = ETHER_MTU;
+
+	/* Invoke PMD device initialization function */
+	diag = (*eth_drv->eth_dev_init)(eth_dev);
+	if (diag == 0)
+		return 0;
+
+	PMD_DEBUG_TRACE("driver %s: eth_dev_init(device_id=0x%x)"
+			" failed\n", vmbus_drv->name,
+			(unsigned) vmbus_dev->id.device_id);
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+	nb_ports--;
+	return diag;
+}
+
+
+static int
+rte_vmbus_dev_uninit(struct rte_vmbus_device *vmbus_dev)
+{
+	const struct eth_driver *eth_drv;
+	struct rte_eth_dev *eth_dev;
+	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+	int ret;
+
+	if (vmbus_dev == NULL)
+		return -EINVAL;
+
+	snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u",
+		 vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num);
+
+	eth_dev = rte_eth_dev_allocated(ethdev_name);
+	if (eth_dev == NULL)
+		return -ENODEV;
+
+	eth_drv = (const struct eth_driver *)vmbus_dev->driver;
+
+	/* Invoke PMD device uninit function */
+	if (*eth_drv->eth_dev_uninit) {
+		ret = (*eth_drv->eth_dev_uninit)(eth_dev);
+		if (ret)
+			return ret;
+	}
+
+	/* free ether device */
+	rte_eth_dev_release_port(eth_dev);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+
+	eth_dev->pci_dev = NULL;
+	eth_dev->driver = NULL;
+	eth_dev->data = NULL;
+
+	return 0;
+}
+#endif
+
 /**
  * Register an Ethernet [Poll Mode] driver.
  *
@@ -396,9 +488,22 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
 void
 rte_eth_driver_register(struct eth_driver *eth_drv)
 {
-	eth_drv->pci_drv.devinit = rte_eth_dev_init;
-	eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
-	rte_eal_pci_register(&eth_drv->pci_drv);
+	switch (eth_drv->bus_type) {
+	case RTE_BUS_PCI:
+		eth_drv->pci_drv.devinit = rte_eth_dev_init;
+		eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
+		rte_eal_pci_register(&eth_drv->pci_drv);
+		break;
+#ifdef RTE_LIBRTE_HV_PMD
+	case RTE_BUS_VMBUS:
+		eth_drv->vmbus_drv.devinit = rte_vmbus_dev_init;
+		eth_drv->vmbus_drv.devuninit = rte_vmbus_dev_uninit;
+		rte_eal_vmbus_register(&eth_drv->vmbus_drv);
+		break;
+#endif
+	default:
+		rte_panic("unknown bus type %u\n", eth_drv->bus_type);
+	}
 }
 
 static int
@@ -1351,6 +1456,9 @@ rte_eth_has_link_state(uint8_t port_id)
 	}
 	dev = &rte_eth_devices[port_id];
 
+	if (dev->driver->bus_type != RTE_BUS_PCI)
+		return 0;
+
 	return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
 }
 
@@ -1901,9 +2009,17 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
 
 	FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
 	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
-	dev_info->pci_dev = dev->pci_dev;
-	if (dev->driver)
-		dev_info->driver_name = dev->driver->pci_drv.name;
+
+	if (dev->driver) {
+		switch (dev->driver->bus_type) {
+		case RTE_BUS_PCI:
+			dev_info->driver_name = dev->driver->pci_drv.name;
+			dev_info->pci_dev = dev->pci_dev;
+			break;
+		case RTE_BUS_VMBUS:
+			dev_info->driver_name = dev->driver->vmbus_drv.name;
+		}
+	}
 }
 
 void
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 991023b..9e08f3e 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -178,6 +178,7 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_mbuf.h>
@@ -1477,7 +1478,10 @@ struct rte_eth_dev {
 	struct rte_eth_dev_data *data;  /**< Pointer to device data */
 	const struct eth_driver *driver;/**< Driver for this device */
 	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
-	struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
+	union {
+		struct rte_pci_device *pci_dev; /**< PCI info. supplied by probig */
+		struct rte_vmbus_device *vmbus_dev; /**< VMBUS info. supplied by probing */
+	};
 	/** User application callbacks for NIC interrupts */
 	struct rte_eth_dev_cb_list link_intr_cbs;
 	/**
@@ -1696,7 +1700,14 @@ typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev);
  * - The size of the private data to allocate for each matching device.
  */
 struct eth_driver {
-	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+	union {
+		struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+		struct rte_vmbus_driver vmbus_drv;/**< The PMD is also a VMBUS drv. */
+	};
+	enum {
+		RTE_BUS_PCI=0,
+		RTE_BUS_VMBUS
+	} bus_type;			  /**< Device bus type. */
 	eth_dev_init_t eth_dev_init;      /**< Device init function. */
 	eth_dev_uninit_t eth_dev_uninit;  /**< Device uninit function. */
 	unsigned int dev_private_size;    /**< Size of device private data. */
-- 
2.1.4

^ permalink raw reply	[relevance 1%]

* Re: [dpdk-dev] [PATCH 3/7] hv: add basic vmbus support
  2015-04-20 21:54  2% ` [dpdk-dev] [PATCH 3/7] hv: add basic vmbus support Stephen Hemminger
@ 2015-04-21 12:23  0%   ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-04-21 12:23 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, alexmay

On Mon, Apr 20, 2015 at 02:54:10PM -0700, Stephen Hemminger wrote:
> The hyper-v device driver forces the base EAL code to change
> to support multiple bus types. This is done changing the pci_device
> in ether driver to a generic union.
> 
> As much as possible this is done in a backwards source compatiable
> way. It will break ABI for device drivers.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  lib/librte_eal/common/Makefile             |   2 +-
>  lib/librte_eal/common/eal_common_options.c |   5 ++
>  lib/librte_eal/common/eal_internal_cfg.h   |   1 +
>  lib/librte_eal/common/eal_options.h        |   2 +
>  lib/librte_eal/common/eal_private.h        |  10 +++
>  lib/librte_eal/linuxapp/eal/Makefile       |   3 +
>  lib/librte_eal/linuxapp/eal/eal.c          |  11 +++
>  lib/librte_ether/rte_ethdev.c              | 128 +++++++++++++++++++++++++++--
>  lib/librte_ether/rte_ethdev.h              |  15 +++-
>  9 files changed, 168 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
> index 3ea3bbf..202485e 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>  
>  INC := rte_branch_prediction.h rte_common.h
>  INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
> -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
> +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
rte_vmbus.h isn't included in this patch set and breaks the build

Neil

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v5 00/12] mbuf: enhancements of mbuf clones
  @ 2015-04-21 11:50  3%   ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-04-21 11:50 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev

On Tue, Apr 21, 2015 at 11:55:10AM +0200, Olivier Matz wrote:
> The first objective of this series is to fix the support of indirect
> mbufs when the application reserves a private area in mbufs. It also
> removes the limitation that rte_pktmbuf_clone() is only allowed on
> direct (non-cloned) mbufs. The series also contains some enhancements
> and fixes in the mbuf area that makes the implementation of the
> last patches easier.
> 
> Changes in v5:
> - update rte_mbuf_version.map to fix compilation with shared libraries
> 
> Changes in v4:
> - do not add a priv_size in mbuf structure, having a proper accessor
>   to read it from the pool private area is clearer
> - prepend some reworks in the mbuf area to simplify the implementation
>   (fix mbuf initialization by not using a hardcoded mbuf size, add
>   accessors for mbuf pool private area, add a helper to create a
>   mbuf pool)
> 
> Changes in v3:
> - a mbuf can now attach to another one that have a different private
>   size. In this case, the m->priv_size corresponds to the size of the
>   private area of the direct mbuf.
> - add comments to reflect these changes
> - minor style modifications
> 
> Changes in v2:
> - do not change the use of MBUF_EXT_MEM() in vhost
> - change rte_mbuf_from_baddr() to rte_mbuf_from_indirect(), removing
>   one parameter
> - fix and rework rte_pktmbuf_detach()
> - move m->priv_size in second mbuf cache line
> - fix mbuf free in test error case
> 
> Olivier Matz (12):
>   mbuf: fix mbuf data room size calculation rte_pktmbuf_pool_init
>   examples: always initialize mbuf_pool private area
>   mbuf: add accessors to get data room size and private size
>   mbuf: fix rte_pktmbuf_init when mbuf private size is not zero
>   testpmd: use standard functions to initialize mbufs and mbuf pool
>   mbuf: introduce a new helper to create a mbuf pool
>   apps: use rte_pktmbuf_pool_create to create mbuf pools
>   mbuf: fix clone support when application uses private mbuf data
>   mbuf: allow to clone an indirect mbuf
>   test/mbuf: rename mc variable in m
>   test/mbuf: enhance mbuf refcnt test
>   test/mbuf: verify that cloning a clone works properly
> 
>  app/test-pipeline/init.c                           |  15 +-
>  app/test-pmd/testpmd.c                             |  78 +--------
>  app/test/test_distributor.c                        |  10 +-
>  app/test/test_distributor_perf.c                   |  10 +-
>  app/test/test_kni.c                                |  16 +-
>  app/test/test_link_bonding.c                       |  10 +-
>  app/test/test_link_bonding_mode4.c                 |  12 +-
>  app/test/test_mbuf.c                               | 110 +++++++++---
>  app/test/test_pmd_perf.c                           |  11 +-
>  app/test/test_pmd_ring.c                           |  10 +-
>  app/test/test_reorder.c                            |  10 +-
>  app/test/test_sched.c                              |  16 +-
>  app/test/test_table.c                              |   9 +-
>  app/test/test_table.h                              |   3 +-
>  doc/guides/rel_notes/updating_apps.rst             |  16 ++
>  examples/bond/main.c                               |  10 +-
>  examples/distributor/main.c                        |  11 +-
>  examples/dpdk_qat/main.c                           |  10 +-
>  examples/exception_path/main.c                     |  14 +-
>  examples/ip_fragmentation/main.c                   |  18 +-
>  examples/ip_pipeline/init.c                        |  28 +--
>  examples/ipv4_multicast/main.c                     |  21 +--
>  examples/kni/main.c                                |  12 +-
>  examples/l2fwd-ivshmem/host/host.c                 |  10 +-
>  examples/l2fwd-jobstats/main.c                     |  10 +-
>  examples/l2fwd/main.c                              |  11 +-
>  examples/l3fwd-acl/main.c                          |  11 +-
>  examples/l3fwd-power/main.c                        |  11 +-
>  examples/l3fwd-vf/main.c                           |  12 +-
>  examples/l3fwd/main.c                              |  10 +-
>  examples/link_status_interrupt/main.c              |  10 +-
>  examples/load_balancer/init.c                      |  12 +-
>  examples/load_balancer/main.h                      |   4 +-
>  .../client_server_mp/mp_server/init.c              |  10 +-
>  examples/multi_process/symmetric_mp/main.c         |  10 +-
>  examples/netmap_compat/bridge/bridge.c             |  12 +-
>  examples/packet_ordering/main.c                    |  11 +-
>  examples/qos_meter/main.c                          |   7 +-
>  examples/qos_sched/init.c                          |  10 +-
>  examples/qos_sched/main.h                          |   2 +-
>  examples/quota_watermark/include/conf.h            |   2 +-
>  examples/quota_watermark/qw/main.c                 |   7 +-
>  examples/rxtx_callbacks/main.c                     |  11 +-
>  examples/skeleton/basicfwd.c                       |  13 +-
>  examples/vhost/main.c                              |  31 ++--
>  examples/vhost_xen/main.c                          |  11 +-
>  examples/vmdq/main.c                               |  11 +-
>  examples/vmdq_dcb/main.c                           |  10 +-
>  lib/librte_ether/rte_ethdev.c                      |   4 +-
>  lib/librte_mbuf/rte_mbuf.c                         |  63 +++++--
>  lib/librte_mbuf/rte_mbuf.h                         | 189 ++++++++++++++++-----
>  lib/librte_mbuf/rte_mbuf_version.map               |   8 +
>  lib/librte_pmd_af_packet/rte_eth_af_packet.c       |   6 +-
>  lib/librte_pmd_bond/rte_eth_bond_alb.c             |  16 +-
>  lib/librte_pmd_e1000/em_rxtx.c                     |   5 +-
>  lib/librte_pmd_e1000/igb_rxtx.c                    |  12 +-
>  lib/librte_pmd_fm10k/fm10k_ethdev.c                |   6 +-
>  lib/librte_pmd_i40e/i40e_ethdev_vf.c               |   6 +-
>  lib/librte_pmd_i40e/i40e_rxtx.c                    |  15 +-
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c                  |  12 +-
>  lib/librte_pmd_pcap/rte_eth_pcap.c                 |   5 +-
>  lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c              |   7 +-
>  62 files changed, 507 insertions(+), 566 deletions(-)
> 
> -- 
> 2.1.4
> 
> 


applies,builds, maintains ABI.  Seems to work in test app cases.
Series
Acked-by: Neil Horman <nhorman@tuxdriver.com>

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH 3/7] hv: add basic vmbus support
  @ 2015-04-20 21:54  2% ` Stephen Hemminger
  2015-04-21 12:23  0%   ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2015-04-20 21:54 UTC (permalink / raw)
  To: alexmay; +Cc: dev

The hyper-v device driver forces the base EAL code to change
to support multiple bus types. This is done changing the pci_device
in ether driver to a generic union.

As much as possible this is done in a backwards source compatiable
way. It will break ABI for device drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/librte_eal/common/Makefile             |   2 +-
 lib/librte_eal/common/eal_common_options.c |   5 ++
 lib/librte_eal/common/eal_internal_cfg.h   |   1 +
 lib/librte_eal/common/eal_options.h        |   2 +
 lib/librte_eal/common/eal_private.h        |  10 +++
 lib/librte_eal/linuxapp/eal/Makefile       |   3 +
 lib/librte_eal/linuxapp/eal/eal.c          |  11 +++
 lib/librte_ether/rte_ethdev.c              | 128 +++++++++++++++++++++++++++--
 lib/librte_ether/rte_ethdev.h              |  15 +++-
 9 files changed, 168 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 3ea3bbf..202485e 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 INC := rte_branch_prediction.h rte_common.h
 INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
 INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
 INC += rte_rwlock.h rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 8fcb1ab..76a3394 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -80,6 +80,7 @@ eal_long_options[] = {
 	{OPT_NO_HPET,           0, NULL, OPT_NO_HPET_NUM          },
 	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
 	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
+	{OPT_NO_VMBUS,		0, NULL, OPT_NO_VMBUS_NUM	  },
 	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
 	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
@@ -726,6 +727,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_pci = 1;
 		break;
 
+	case OPT_NO_VMBUS_NUM:
+		conf->no_vmbus = 1;
+		break;
+
 	case OPT_NO_HPET_NUM:
 		conf->no_hpet = 1;
 		break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index e2ecb0d..0e7de34 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -66,6 +66,7 @@ struct internal_config {
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
 	volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
 	volatile unsigned no_pci;         /**< true to disable PCI */
+	volatile unsigned no_vmbus;	  /**< true to disable VMBUS */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
 										* instead of native TSC */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index f6714d9..54f03dc 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -67,6 +67,8 @@ enum {
 	OPT_NO_HUGE_NUM,
 #define OPT_NO_PCI            "no-pci"
 	OPT_NO_PCI_NUM,
+#define OPT_NO_VMBUS          "no-vmbus"
+	OPT_NO_VMBUS_NUM,
 #define OPT_NO_SHCONF         "no-shconf"
 	OPT_NO_SHCONF_NUM,
 #define OPT_SOCKET_MEM        "socket-mem"
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 4acf5a0..039e9f3 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -180,6 +180,16 @@ int rte_eal_pci_close_one_driver(struct rte_pci_driver *dr,
 		struct rte_pci_device *dev);
 
 /**
+ * VMBUS related functions and structures
+ */
+int rte_eal_vmbus_init(void);
+
+struct rte_vmbus_driver;
+struct rte_vmbus_device;
+
+int rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+		struct rte_vmbus_device *dev);
+/**
  * Init tail queues for non-EAL library structures. This is to allow
  * the rings, mempools, etc. lists to be shared among multiple processes
  *
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 01f7b70..acd5127 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -74,6 +74,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
 ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
 endif
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_vmbus.c
+endif
 
 # from common dir
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index bd770cf..86d0e31 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -70,6 +70,7 @@
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_devargs.h>
 #include <rte_common.h>
 #include <rte_version.h>
@@ -796,6 +797,11 @@ rte_eal_init(int argc, char **argv)
 
 	rte_eal_mcfg_complete();
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_init() < 0)
+		RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
+#endif
+
 	TAILQ_FOREACH(solib, &solib_list, next) {
 		RTE_LOG(INFO, EAL, "open shared lib %s\n", solib->name);
 		solib->lib_handle = dlopen(solib->name, RTLD_NOW);
@@ -845,6 +851,11 @@ rte_eal_init(int argc, char **argv)
 	if (rte_eal_pci_probe())
 		rte_panic("Cannot probe PCI\n");
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_probe() < 0)
+		rte_panic("Cannot probe VMBUS\n");
+#endif
+
 	return fctret;
 }
 
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 9577d17..9093966 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -379,6 +379,98 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
 	return 0;
 }
 
+#ifdef RTE_LIBRTE_HV_PMD
+static int
+rte_vmbus_dev_init(struct rte_vmbus_driver *vmbus_drv,
+		   struct rte_vmbus_device *vmbus_dev)
+{
+	struct eth_driver  *eth_drv = (struct eth_driver *)vmbus_drv;
+	struct rte_eth_dev *eth_dev;
+	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+	int diag;
+
+	snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u",
+		 vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num);
+
+	eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI);
+	if (eth_dev == NULL)
+		return -ENOMEM;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
+				  eth_drv->dev_private_size,
+				  RTE_CACHE_LINE_SIZE);
+		if (eth_dev->data->dev_private == NULL)
+			rte_panic("Cannot allocate memzone for private port data\n");
+	}
+	eth_dev->vmbus_dev = vmbus_dev;
+	eth_dev->driver = eth_drv;
+	eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+	/* init user callbacks */
+	TAILQ_INIT(&(eth_dev->link_intr_cbs));
+
+	/*
+	 * Set the default maximum frame size.
+	 */
+	eth_dev->data->mtu = ETHER_MTU;
+
+	/* Invoke PMD device initialization function */
+	diag = (*eth_drv->eth_dev_init)(eth_dev);
+	if (diag == 0)
+		return 0;
+
+	PMD_DEBUG_TRACE("driver %s: eth_dev_init(device_id=0x%x)"
+			" failed\n", vmbus_drv->name,
+			(unsigned) vmbus_dev->id.device_id);
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+	nb_ports--;
+	return diag;
+}
+
+
+static int
+rte_vmbus_dev_uninit(struct rte_vmbus_device *vmbus_dev)
+{
+	const struct eth_driver *eth_drv;
+	struct rte_eth_dev *eth_dev;
+	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+	int ret;
+
+	if (vmbus_dev == NULL)
+		return -EINVAL;
+
+	snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u",
+		 vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num);
+
+	eth_dev = rte_eth_dev_allocated(ethdev_name);
+	if (eth_dev == NULL)
+		return -ENODEV;
+
+	eth_drv = (const struct eth_driver *)vmbus_dev->driver;
+
+	/* Invoke PMD device uninit function */
+	if (*eth_drv->eth_dev_uninit) {
+		ret = (*eth_drv->eth_dev_uninit)(eth_dev);
+		if (ret)
+			return ret;
+	}
+
+	/* free ether device */
+	rte_eth_dev_release_port(eth_dev);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+
+	eth_dev->pci_dev = NULL;
+	eth_dev->driver = NULL;
+	eth_dev->data = NULL;
+
+	return 0;
+}
+#endif
+
 /**
  * Register an Ethernet [Poll Mode] driver.
  *
@@ -396,9 +488,22 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
 void
 rte_eth_driver_register(struct eth_driver *eth_drv)
 {
-	eth_drv->pci_drv.devinit = rte_eth_dev_init;
-	eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
-	rte_eal_pci_register(&eth_drv->pci_drv);
+	switch (eth_drv->bus_type) {
+	case RTE_BUS_PCI:
+		eth_drv->pci_drv.devinit = rte_eth_dev_init;
+		eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
+		rte_eal_pci_register(&eth_drv->pci_drv);
+		break;
+#ifdef RTE_LIBRTE_HV_PMD
+	case RTE_BUS_VMBUS:
+		eth_drv->vmbus_drv.devinit = rte_vmbus_dev_init;
+		eth_drv->vmbus_drv.devuninit = rte_vmbus_dev_uninit;
+		rte_eal_vmbus_register(&eth_drv->vmbus_drv);
+		break;
+#endif
+	default:
+		rte_panic("unknown bus type %u\n", eth_drv->bus_type);
+	}
 }
 
 static int
@@ -1351,6 +1456,9 @@ rte_eth_has_link_state(uint8_t port_id)
 	}
 	dev = &rte_eth_devices[port_id];
 
+	if (dev->driver->bus_type != RTE_BUS_PCI)
+		return 0;
+
 	return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
 }
 
@@ -1901,9 +2009,17 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
 
 	FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
 	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
-	dev_info->pci_dev = dev->pci_dev;
-	if (dev->driver)
-		dev_info->driver_name = dev->driver->pci_drv.name;
+
+	if (dev->driver) {
+		switch (dev->driver->bus_type) {
+		case RTE_BUS_PCI:
+			dev_info->driver_name = dev->driver->pci_drv.name;
+			dev_info->pci_dev = dev->pci_dev;
+			break;
+		case RTE_BUS_VMBUS:
+			dev_info->driver_name = dev->driver->vmbus_drv.name;
+		}
+	}
 }
 
 void
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 991023b..9e08f3e 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -178,6 +178,7 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_mbuf.h>
@@ -1477,7 +1478,10 @@ struct rte_eth_dev {
 	struct rte_eth_dev_data *data;  /**< Pointer to device data */
 	const struct eth_driver *driver;/**< Driver for this device */
 	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
-	struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
+	union {
+		struct rte_pci_device *pci_dev; /**< PCI info. supplied by probig */
+		struct rte_vmbus_device *vmbus_dev; /**< VMBUS info. supplied by probing */
+	};
 	/** User application callbacks for NIC interrupts */
 	struct rte_eth_dev_cb_list link_intr_cbs;
 	/**
@@ -1696,7 +1700,14 @@ typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev);
  * - The size of the private data to allocate for each matching device.
  */
 struct eth_driver {
-	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+	union {
+		struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+		struct rte_vmbus_driver vmbus_drv;/**< The PMD is also a VMBUS drv. */
+	};
+	enum {
+		RTE_BUS_PCI=0,
+		RTE_BUS_VMBUS
+	} bus_type;			  /**< Device bus type. */
 	eth_dev_init_t eth_dev_init;      /**< Device init function. */
 	eth_dev_uninit_t eth_dev_uninit;  /**< Device uninit function. */
 	unsigned int dev_private_size;    /**< Size of device private data. */
-- 
2.1.4

^ permalink raw reply	[relevance 2%]

* Re: [dpdk-dev] [RFC PATCH] ethdev: remove old flow director API
  2015-04-20 16:33  3% ` Neil Horman
@ 2015-04-20 16:45  0%   ` Venky Venkatesan
  2015-04-27 16:08  0%     ` Thomas Monjalon
  0 siblings, 1 reply; 200+ results
From: Venky Venkatesan @ 2015-04-20 16:45 UTC (permalink / raw)
  To: dev



On 04/20/2015 09:33 AM, Neil Horman wrote:
> On Mon, Apr 20, 2015 at 04:11:43PM +0200, Thomas Monjalon wrote:
>> It's time to remove this old API.
>> It seems some work is still needed to rely only on eth_ctrl API.
>> At least ixgbe, i40e and testpmd must be fixed.
>> Jingjing, do you think it's possible to remove all these structures
>> from rte_ethdev.h?
>>
>> Thanks
>>
> NAK.
>
> I'm certainly not opposed to removing the API's if they are truly no longer
> needed.  But they have been codified as part of the ABI, so the deprecation
> schedule needs to be followed.  Given what you've said above, it seems like that
> might be worthwhile anyway, as it will provide the needed runway to allow users
> to convert to the new API.
>
> Neil
+1 NAK. Agree with Neil.

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [RFC PATCH] ethdev: remove old flow director API
  @ 2015-04-20 16:33  3% ` Neil Horman
  2015-04-20 16:45  0%   ` Venky Venkatesan
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-04-20 16:33 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Mon, Apr 20, 2015 at 04:11:43PM +0200, Thomas Monjalon wrote:
> It's time to remove this old API.
> It seems some work is still needed to rely only on eth_ctrl API.
> At least ixgbe, i40e and testpmd must be fixed.
> Jingjing, do you think it's possible to remove all these structures
> from rte_ethdev.h?
> 
> Thanks
> 
NAK.

I'm certainly not opposed to removing the API's if they are truly no longer
needed.  But they have been codified as part of the ABI, so the deprecation
schedule needs to be followed.  Given what you've said above, it seems like that
might be worthwhile anyway, as it will provide the needed runway to allow users
to convert to the new API.

Neil

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] tools brainstorming
  @ 2015-04-13 15:02  2%   ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-04-13 15:02 UTC (permalink / raw)
  To: Butler, Siobhan A; +Cc: dev

On Wed, Apr 08, 2015 at 10:43:53AM +0000, Butler, Siobhan A wrote:
> Hi all,
> To add to the tools brainstorming - I propose we use the following Coding Standards as the basis of guidelines on coding style going forward.
> The style outlined below is in alignment with the current convention used for the majority of the project.
> Any thoughts/suggestions or feedback welcome.
> Thanks
> Siobhan :)
> <siobhan.a.butler@intel.com>
> 
> 
> 
> Coding Style
> ~~~~~~~~~~
> 
> Description
> -----------
> 
> This document specifies the preferred style for source files in the DPDK source tree. 
> It is based on the Linux Kernel coding guidelines and the FreeBSD 7.2 Kernel Developer's Manual (see man style(9)), 
> but was heavily modified for the needs of the DPDK. Many of the style rules are implicit in the examples. 
> Be careful to check the examples before assuming that style is silent on an issue. 
> 
> General Guidelines
> ------------------
> 
> The rules and guidelines given in this document cannot cover every situation, so the following general guidelines should be used as a fallback: 
> The code style should be consistent within each individual file, and within each file in a given directory or module - in the case of creating new files 
> The primary reason for coding standards is to increase code readability and comprehensibility, therefore always use whatever option will make the code easiest to read. 
> 
> The following more specific recommendations apply to all sections, both for C and assembly code: 
> Line length is recommended to be not more than 80 characters, including comments. [Tab stop size should be assumed to be at least 4-characters wide] 
> Indentation should be to no more than 3 levels deep. 
> NOTE The above are recommendations, and not hard limits. However, it is expected that the recommendations should be followed in all but the rarest situations. 
> C Comment Style
> 
> Usual Comments
> --------------
> 
> These comments should be used in normal cases. To document a public API, a doxygen-like format must be used: refer to Doxygen Documentation. 
>  /*
>   * VERY important single-line comments look like this.
>   */
>  
>  /* Most single-line comments look like this. */
>  
>  /*
>   * Multi-line comments look like this.  Make them real sentences. Fill
>   * them so they look like real paragraphs.
>   */
> 
> License Header
> --------------
> 
> Each file should begin with a special comment tag which will contain the appropriate copyright and license for the file (Generally BSD License). 
> After any copyright header, a blank line should be left before any other contents, e.g. include statements in a C file. 
> 
> C Preprocessor Directives
> -------------------------
> 
> Header Includes
> 
> In DPDK sources, the include files should be ordered as following: 
>  libc includes (system includes first) 
>  DPDK EAL includes 
>  DPDK misc libraries includes 
>  application-specific includes 
> 
> Example: 
>  #include <stdio.h>
>  #include <stdlib.h>
>  
>  #include <rte_eal.h>
>  
>  #include <rte_ring.h>
>  #include <rte_mempool.h>
>  
>  #include "application.h"
It doesn't really matter to me, for the sake of consistency, it might be
worthwhile mandating search path includes only (< >), and adding a -I . to the
CFLAGS in the Makefile.  That way a grep for "*.*<.*>" returns all your include
files

> 
> 
> Global pathnames are defined in <paths.h>. Pathnames local to the program go in "pathnames.h" in the local directory. 
>  #include <paths.h>

This is a design issue, not a coding style issue.  Where an application chooses
to put its pathnames are its business, and not something we should codify here.
Also, paths.h doesn't exist so it should probably not be referenced here

> 
> 
> Leave another blank line before the user include files. 
>  #include "pathnames.h"         /* Local includes in double quotes. */
> 
> NOTE Please avoid, as much as possible, including headers from other headers file. Doing so should be properly explained and justified. 
> Headers should be protected against multiple inclusion with the usual: 

Are you sure you want to do that?
[nhorman@hmsreliant dpdk]$ find . -name '*.h' | xargs grep include | wc -l
1300

What would the justification be?  Its common practice to do this, so I'm not
sure why you would discourage it.

>  #ifndef _FILE_H_
>  #define _FILE_H_
>  
>  /* Code */
>  
>  #endif /* _FILE_H_ */
> 
> 
> Macros
> 
> Do not ``#define`` or declare names in the implementation namespace except for implementing application interfaces. 
> 

I'm not sure I understand what this means.  Can you clarify the intent here?

> The names of ``unsafe`` macros (ones that have side effects), and the names of macros for manifest constants, are all in uppercase. 
> 
> The expansions of expression-like macros are either a single token or have outer parentheses. If a macro is an inline expansion of a function, 
> the function name is all in lowercase and the macro has the same name all in uppercase. Right-justify the backslashes; 
> it makes it easier to read. If the macro encapsulates a compound statement, enclose it in a do loop, so that it can be used safely in if statements. 
> Any final statement-terminating semicolon should be supplied by the macro invocation rather than the macro, to make parsing easier for pretty-printers and editors. 
>  #define MACRO(x, y) do {                                        \
>          variable = (x) + (y);                                   \
>          (y) += 2;                                               \
>  }while (0)
> 
> NOTE Wherever possible, enums and typedefs should be preferred to macros, since they provide additional degrees 
> of type-safety and can allow compilers to emit extra warnings about unsafe code. 
> 
> Conditional Compilation
> -----------------------
> 
> When code is conditionally compiled using #ifdef or #if, a comment may be added following the matching #endif or #else to 
> permit the reader to easily discern where conditionally compiled code regions end. This comment should be used only for 
> (subjectively) long regions, regions greater than 20 lines, or where a series of nested #ifdef 's may be confusing to the reader. 
> Exceptions may be made for cases where code is conditionally not compiled for the purposes of lint(1), even though the uncompiled 
> region may be small. The comment should be separated from the #endif or #else by a single space. For short conditionally compiled regions, 
> a closing comment should not be used. 
> 
> The comment for #endif should match the expression used in the corresponding #if or #ifdef. The comment for #else and #elif 
> should match the inverse of the expression(s) used in the preceding #if and/or #elif statements. In the comments, 
> the subexpression defined(FOO) is abbreviated as FOO. For the purposes of comments, #ifndef FOO is treated as #if !defined(FOO). 
>  #ifdef KTRACE
>  #include <sys/ktrace.h>
>  #endif
>  
>  #ifdef COMPAT_43
>  /* A large region here, or other conditional code. */
>  #else /* !COMPAT_43 */
>  /* Or here. */
>  #endif /* COMPAT_43 */
>  
>  #ifndef COMPAT_43
>  /* Yet another large region here, or other conditional code. */
>  #else /* COMPAT_43 */
>  /* Or here. */
>  #endif /* !COMPAT_43 */
> 
> NOTE Conditional compilation should be used only when absolutely necessary, as it increases the number of target binaries that need to be built and tested. 
> C Types
> 
> Integers
> 
> For fixed/minimum-size integer values, the project uses the form uintXX_t (from stdint.h) instead of older BSD-style integer identifiers of the form u_intXX_t. 
> 
> Enumerations
> ------------
> 
> Enumeration values are all uppercase. 
>  enum enumtype { ONE, TWO } et;
> 
> 
> Bitfields
> ---------
> 
> The developer should group bitfields that are included in the same integer, as follows: 
>  struct grehdr {
>    uint16_t rec:3,
>        srr:1,
>        seq:1,
>        key:1,
>        routing:1,
>        csum:1,
>        version:3,
>        reserved:4,
>        ack:1;
>  /* ... */
>  }
> 
> 
> Variable Declarations
> ---------------------
> 
> In declarations, do not put any whitespace between asterisks and adjacent tokens, except for tokens that are identifiers related to types. 
> (These identifiers are the names of basic types, type qualifiers, and typedef-names other than the one being declared.) 
> Separate these identifiers from asterisks using a single space. 
> 
> Structure Declarations
> 
> When declaring variables in structures, declare them sorted by use, then by size (largest to smallest), and then in alphabetical order. 
> Alignment constraints may override the previous rules. The first category normally does not apply, but there are exceptions. 
> Each structure element gets its own line. Try to make the structure readable by aligning the member names using spaces as shown below. 
> Names following extremely long types, which therefore cannot be easily aligned with the rest, should be separated by a single space. 
>  struct foo {
>          struct foo      *next;          /* List of active foo. */
>          struct mumble   amumble;        /* Comment for mumble. */
>          int             bar;            /* Try to align the comments. */
>          struct verylongtypename *baz;   /* Won't fit with other members */
>  };
> 

This is going to cause conflicts with ABI preservation.  While its fine to do
when creating a new structure, you need to be very careful about shuffling
structure members around on public facing structures. Recommend not mandating
this.  The other option is to allow this, but start converting public facing
api's to use opaque types with get/set routines, so that library internals can
codify the offsets to member structures appropriately.


> 
> Major structures should be declared at the top of the file in which they are used, or in separate header files if they are used 
> in multiple source files. Use of the structures should be by separate declarations and should be extern if they are declared in a header file. 
> 
> Queues
> 
> Use queue(3) macros rather than rolling your own lists, whenever possible. Thus, the previous example would be better written: 
>  #include <sys/queue.h>
>  
>  struct foo {
>          LIST_ENTRY(foo) link;      /* Use queue macros for foo lists. */
>          struct mumble   amumble;   /* Comment for mumble. */
>          int             bar;       /* Try to align the comments. */
>          struct verylongtypename *baz;   /* Won't fit with other members */
>  };
>  LIST_HEAD(, foo) foohead;          /* Head of global foo list. */
> 
> 
> DPDK also provides an optimized way to store elements in lockless rings. This should be used in all data-path code, when there are several 
> consumer and/or producers to avoid locking for concurrent access. 
> 
You probably want to reference the api directly in some way here, so people can
go look up how to do that.

> Typedefs
> 
> Avoid using typedefs for structure types. For example, use: 
>  struct my_struct_type {
>  /* ... */
>  };
>  
>  struct my_struct_type my_var;
> 
> 
> rather than: 
>  typedef struct my_struct_type {
>  /* ... */
>  } my_struct_type;
>  
>  my_struct_type my_var
> 
> 
> Typedefs are problematic because they do not properly hide their underlying type; for example, you need to know if the typedef is 
> the structure itself, as shown above, or a pointer to the structure.
This isn't really true.  If you make the structure opaque, so that it references
a externally declared structure, then its just a handle, and can be used without
type knoweldge (assuming the appropriate API is built for it).

> In addition, they must be declared exactly once, whereas an 
> incomplete structure type can be mentioned as many times as necessary. Typedefs are difficult to use in stand-alone header files. 
> The header that defines the typedef must be included before the header that uses it,
This is an excellent reason to allow header include chains.

> or by the header that uses it (which causes namespace pollution), 
> or there must be a back-door mechanism for obtaining the typedef. 
> NOTE #defines used instead of typedefs also are problematic (since they do not propagate the pointer type correctly due to direct text replacement). 
> For example, ``#define pint int *`` does not work as expected, while ``typedef int *pint`` does work. As stated when discussing macros, typedefs 
> should be preferred to macros in cases like this. 
> When convention requires a typedef; make its name match the struct tag. Avoid typedefs ending in ``_t``, except as specified in Standard C or by POSIX. 
>  /* Make the structure name match the typedef. */
>  typedef struct bar {
>          int     level;
>  } BAR;
>  typedef int             foo;            /* This is foo. */
>  typedef const long      baz;            /* This is baz. */
> 

So, I'd suggest removing the explination here.  The rule above seems reasonably
clear (don't typedef structures), but the reasoning sort of devolves into a
discussion on why typedefs and macros are hard (but still sometimes necessecary
as referenced above).  I think it would be enough to say "DPDK prefers that
structures be used to codify complex data types as a matter of style".  Its
really the reason to do so.

Note also, in mandating this, you are hindering the development of API's that
use opaque data types.  You can still do it of course, but you have to be sure
to define your data types as anonymous structures:
extern struct foo;
rather than new types.  Not a big deal, but something to be aware of.

> 
> C Function Definition, Declaration and Use
> 
> Prototypes
> 
> It is recommended, but not required that all functions are prototyped somewhere. 

IIRC it is actually required at the moment because public functions with no
prototypes generate warnings (-Wmising-prototypes is currently implied in one of
the gcc warning options)

> 
> Any function prototypes for private functions (that is, functions not used elsewhere) go at the top of the first source module. Functions 
> local to one source module should be declared static. 
> 
> Functions used from other parts of code (external API) must be prototyped in the relevant include file. 
> Function prototypes should be listed in a logical order, preferably alphabetical unless there is a compelling reason to use a different ordering. 
> 
> Functions that are used locally in more than one module go into a separate header file, for example, "extern.h". 
> 
> Do not use the ``__P`` macro. 
> 
Just out of curiosity, has this been a problem?  __P was introduced in c89, and
I don't think I've seen it in code since C99 was released.  I don't mind
including it, but I'm curious to know the history here.

> Functions that are part of an external API should be documented using Doxygen-like comments above declarations. See the Doxgen documentation topic for details. 
> 
> Associate names with parameter types, for example: 
>  void function(int fd);
> 
> 
> Short function prototypes should be contained on a single line. Longer prototypes, e.g. those with many parameters, 
> can be split across multiple lines. Multi-line prototypes should use space-indentation to enable function parameters to line up: 
>  static char *function1(int _arg, const char *_arg2, 
>                       struct foo *_arg3,
>                       struct bar *_arg4,
>                       struct baz *_arg5);

2 things:

1) Clarify the meaning of space indentation.  Is it ok to use tabs and spaces
for alignment, or are only spaces allowed (note the code currently uses the
former).

2) You say function prameters should "line up", but in the example you give,
they don't.  What I think you want is:
static char *function1(int _arg, const char *_arg2,
		       struct foo *_arg3,
		       struct bar *_arg4,
		       struct baz *_arg5);

>  static void usage(void);
> 
> 
> Definitions
> -----------
> 
> The function type should be on a line by itself preceding the function. The opening brace of the function body should be on a line by itself. 
>  static char *
>  function(int a1, int a2, float fl, int a4)
>  {
> 
The example immediately above this section doesn't follow this convention.  You
should fix that.

> 
> Do not declare functions inside other functions. ANSI C states that such declarations have file scope regardless of the nesting of the declaration. 
> Hiding file declarations in what appears to be a local scope is undesirable and will elicit complaints from a good compiler. 
> 
> Old-style (K&R) function declaration should not be used, use ANSI function declarations instead as shown below. Long argument lists 
> should be wrapped as described above in the function prototypes section. 
>  /*
>   * All major routines should have a comment briefly describing what
>   * they do. The comment before the "main" routine should describe
>   * what the program does.
>   */
>  int
>  main(int argc, char *argv[])
>  {
>          char *ep;
>          long num;
>          int ch;
> 
> 
> C Command Line Parsing
> ----------------------
> 
> For consistency, getopt(3) should be used to parse options. Options should be sorted in the getopt(3) call and the switch statement, 
> unless parts of the switch cascade. Elements in a switch statement that cascade should have a FALLTHROUGH comment. 
> Numerical arguments should be checked for accuracy. Code that cannot be reached should have a NOTREACHED comment. 
>  while ((ch = getopt(argc, argv, "abNn:")) != -1)
>          switch (ch) {         /* Indent the switch. */
>          case 'a':             /* Don't indent the case. */
>                  aflag = 1;    /* Indent case body one tab. */
>                  /* FALLTHROUGH */
>          case 'b':
>                  bflag = 1;
>                  break;
>          case 'N':
>                  Nflag = 1;
>                  break;
>          case 'n':
>                  num = strtol(optarg, &ep, 10);
>                  if (num <= 0 || *ep != '\0') {
>                          warnx("illegal number, -n argument -- %s",
>                                optarg);
>                          usage();
>                  }
>                  break;
>          case '?':
>          default:
>                  usage();
>                  /* NOTREACHED */
>          }
>  argc -= optind;
>  argv += optind;
> 

I'm not sure we need this section.  I understand we have lots of examples that
use getopt, but by and large this addresses application coding, which is
somewhat outside of the purview of the DPDK.  I'm wholly supportive of some
style guidelines regarding switch statements mind you, but I'm not sure we need
to mandate the usage of getopt.

> 
> 
> 
> 
> C Indentation
> -------------
> 
> Control Statements and Loops
> 
> Include a space after keywords (if, while, for, return, switch). Do not use braces (``{`` and ``}``) for control statements with zero or just a 
> single statement, unless that statement is more than a single line in which case the braces are permitted. Forever loops are done with for statements, not while statements. 
>  for (p = buf; *p != '\0'; ++p)
>          ;       /* nothing */
>  for (;;)
>          stmt;
>  for (;;) {
>          z = a + really + long + statement + that + needs +
>                  two + lines + gets + indented + on + the + 
>                  second + and + subsequent + lines;
>  }
>  for (;;) {
>          if (cond)
>                  stmt;
>  }
>  if (val != NULL)
>          val = realloc(val, newsize);
> 
> 
> Parts of a for loop may be left empty. It is recommended that you do not put declarations inside blocks unless the routine is unusually complicated. 
>  for (; cnt < 15; cnt++) {
>          stmt1;
>          stmt2;
>  }
> 
> 
> Indentation is a hard tab, that is, a tab character, not a sequence of spaces. 
> NOTE General rule in DPDK, use tabs for indentation, spaces for alignment. 
> If you have to wrap a long statement, put the operator at the end of the line, and indent again. For control statements (if, while, etc.), 
> it is recommended that the next line be indented by two tabs, rather than one, to prevent confusion as to whether the second line of the 
> control statement forms part of the statement body or not. For non-control statements, this issue does not apply, so they can be indented 
> by a single tab. However, a two-tab indent is recommended in this case also to keep consistency across all statement types. 
>  while (really_long_variable_name_1 == really_long_variable_name_2 &&
>      var3 == var4){
>      x = y + z;      /* control stmt body lines up with second line of */
>      a = b + c;      /* control statement itself if single indent used */
>  }
>  
>  if (really_long_variable_name_1 == really_long_variable_name_2 &&
>          var3 == var4){  /* two tabs used */
>      x = y + z;          /* statement body no longer lines up */
>      a = b + c;
>  }
>  
>  z = a + really + long + statement + that + needs +
>          two + lines + gets + indented + on + the + 
>          second + and + subsequent + lines;
> 
> 
> Do not add whitespace at the end of a line. 
> 
> Closing and opening braces go on the same line as the else keyword. Braces that are not necessary should be left out. 
>  if (test)
>          stmt;
>  else if (bar) {
>          stmt;
>          stmt;
>  } else
>          stmt;
> 
> 
> Function Calls
> --------------
> 
> Do not use spaces after function names. Commas should have a space after them. No spaces after ``(`` or ``[`` or preceding the ``]`` or ``)`` characters. 
>  error = function(a1, a2);
>  if (error != 0)
>          exit(error);
> 
> 
> Operators
> ---------
> 
> Unary operators do not require spaces, binary operators do. Do not use parentheses unless they are required for precedence or unless the 
> statement is confusing without them. Remember that other people may be more easily confused than you. 
> 

I recommend changing this such that all logical operators require spaces around
them.  It simplifies the rules when writing code.  Also, it appears from some
quick grepping that always including space around unary or binary logical
operations is the de-facto rule.

> Exit
> 
> Exits should be 0 on success, or 1 on failure. 
>          exit(0);        /*
>                           * Avoid obvious comments such as
>                           * "Exit 0 on success."
>                           */
>  }
> 
This again codifies application behavior, not coding style.  I don't think we
need to mandate that as we have no purview over application behavior (example
applications being the exception here of course).


> 
> Local Variables
> ---------------
> 
> When declaring variables in functions, declare them sorted by size, then in alphabetical order. Multiple variables per line are OK. 
> If a line overflows reuse the type keyword. 
> 
> Be careful to not obfuscate the code by initializing variables in the declarations, only the last variable on a line should be initialized. 
> If multiple variables are to be initialised when defined, put one per line. Do not use function calls in initializers. 
>  int i = 0, j = 0, k = 0;  /* bad, too many initializer */
>  
>  char a = 0;        /* OK, one variable per line with initializer */
>  char b = 0;
>  
>  float x, y = 0.0;  /* OK, only last variable has initializer */
> 
> 
> Casts and sizeof
> 
> Casts and sizeof statements are not followed by a space. Always write sizeof statements with parenthesis. 
> The redundant parenthesis rules do not apply to sizeof(var) instances. 
> 
What redundant parenthesis rules?  I presume you are referring to implied
practice of not including parenthesis in operations that don't change the order
of operations?  If so, you probably want to state that clearly ealier so reader
have something to reference here.


> C Style and Conventions
> 
> NULL Pointers
> 
> NULL is the preferred null pointer constant. Use NULL instead of ``(type *)0`` or ``(type *)NULL`` in contexts where the compiler knows the type, 
> for example, in assignments. Use ``(type *)NULL`` in other contexts, in particular for all function args. 
> (Casting is essential for variadic args and is necessary for other args if the function prototype might not be in scope.) Test pointers against NULL, for example, use:: 
>  (p = f()) == NULL
> 
> 
> not:: 
>  !(p = f())
> 
> 
> Do not use ! for tests unless it is a boolean, for example, use:: 
>  if (*p == '\0')
> 
> 
> not:: 
>  if (!*p)
> 
> 
> Return Value
> ------------
> 
> If possible, functions should return 0 on success and a negative value on error. The negative value should be ``-errno`` if relevant, for example, ``-EINVAL``. 
> 
> Routines returning ``void *`` should not have their return values cast to any pointer type. 
> (Typecasting can prevent the compiler from warning about missing prototypes as any implicit definition of a function returns int - which, unlike "void *" needs a typecast to assign to a pointer variable.) 
> NOTE The above rule applies to malloc, as well as to DPDK functions. 
> Values in return statements should be enclosed in parentheses. 
> 
> Logging and Errors
> ------------------
> 
> In the DPDK environment, use the logging interface provided:: 
>  #define RTE_LOGTYPE_TESTAPP1 RTE_LOGTYPE_USER1
>  #define RTE_LOGTYPE_TESTAPP2 RTE_LOGTYPE_USER2
>  
>  /* enable these logs type */
>  rte_set_log_type(RTE_LOGTYPE_TESTAPP1, 1);
>  rte_set_log_type(RTE_LOGTYPE_TESTAPP2, 1);
>  
>  /* log in debug level */
>  rte_set_log_level(RTE_LOG_DEBUG);
>  RTE_LOG(DEBUG, TESTAPP1, "this is is a debug level message\n");
>  RTE_LOG(INFO, TESTAPP1, "this is is a info level message\n");
>  RTE_LOG(WARNING, TESTAPP1, "this is is a warning level message\n");
>  
>  /* log in info level */
>  rte_set_log_level(RTE_LOG_INFO);
>  RTE_LOG(DEBUG, TESTAPP2, "debug level message (not displayed)\n");
> 

While I'm not opposed at all to mandating a Log mechanism, it seems to me that
this is a review issue, not a coding style issue.  It will also be almost
impossible to codify this requirement in a tool (how will a tool know when you
are trying to do logging, but aren't using the above log macros)?

> 
> In a userland program that is not a DPDK application, use err(3) or warn(3). Do not create your own variant. 
>          if ((four = malloc(sizeof(struct foo))) == NULL)
>                  err(1, (char *)NULL);
>          if ((six = (int *)overflow()) == NULL)
>                  errx(1, "number overflowed");
>          return (eight);
>  }
> 
Not within our purview, don't include this.  If its not part of the DPDK,
we don't get to mandate style on it.

> 
> Variable Arguments List
> -----------------------
> 
> Variable numbers of arguments should look like this: 
>  #include <stdarg.h>
>  
>  void
>  vaf(const char *fmt, ...)
>  {
>          va_list ap;
>  
>          va_start(ap, fmt);
>          STUFF;
>          va_end(ap);
>          /* No return needed for void functions. */
>  }
>  
>  static void
>  usage()
>  {
>          /* Insert an empty line if the function has no local variables. */
> 
> 
This isn't a coding style issue, this is just how variable arguments work in C.
I think you can remove this.

> Printf
> ------
> 
> Use printf(3), not fputs(3), puts(3), putchar(3) or whatever. It is faster and usually cleaner, and helps to avoid unnecessary bugs. However, be aware of format string bugs:: 
>  int
>  main(int argc, char **argv)
>  {
>          if(argc != 2)
>              exit(1);
>          printf(argv[1]); /* bad ! */
>          printf("%s", argv[1]); /* correct */
> 
> 

Again, not a coding style issue.  There may be perfectly good reasons to use
putchar/putc/puts, I don't think our coding sytle guideline needs to restrict
their usage.  Let developers review the appropriateness of the calls used in the
code.

> Usage
> -----
> 
> Usage statements should look like the manual pages SYNOPSIS. The usage statement should be structured in the following order: 
> 1. Options without operands come first, in alphabetical order, inside a single set of brackets (``[`` and ``]``). 
> 2. Options with operands come next, also in alphabetical order, with each option and its argument inside its own pair of brackets. 
> 3. Required arguments (if any) are next, listed in the order they should be specified on the command line. 
> 4. Finally, any optional arguments, listed in the order they should be specified, and all inside brackets. 
> 
> A bar (`|') separates ``either-or`` options/arguments, and multiple options/arguments, which are specified together, are placed in a single set of brackets. 
>  "usage: f [-aDde] [-b b_arg] [-m m_arg] req1 req2 [opt1 [opt2]]\n"
>  "usage: f [-a | -b] [-c [-dEe] [-n number]]\n"
>  
>  (void)fprintf(stderr, "usage: f [-ab]\n");
>          exit(1);
>  }
> 
> 
> Note that the manual page options description should list the options in pure alphabetical order. That is, without regard to 
> whether an option takes arguments or not. The alphabetical ordering should take into account the case ordering shown above. 
> 
> Branch Prediction
> -----------------
> 
> When a test is done in a critical zone (called often or in a data path) use the ``likely()`` and ``unlikely()`` macros. They are expanded 
> as a compiler builtin and allow the developer to indicate if the branch is likely to be taken or not. Example: 
>  #include <rte_branch_prediction.h>
>  if (likely(x > 1))
>    do_stuff();
> 

Don't need this.  Developers know when to use proper branch prediction.
Mandating it can lead to inappropriate guesses, espeically if you codify this
check in a tool that mandates all conditionals be branch predicted.


The remainder of the document looks fine
Neil

^ permalink raw reply	[relevance 2%]

* [dpdk-dev] [dpdk-announce] DPDK 2.0.0 released
@ 2015-04-03 21:12  4% Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-04-03 21:12 UTC (permalink / raw)
  To: announce

A new DPDK release can be downloaded here:
	http://dpdk.org/browse/dpdk/tag/?id=v2.0.0

During this cycle, 23 maintainers for 48 areas of interest have been set up:
	http://dpdk.org/browse/dpdk/tree/MAINTAINERS
A dozen areas are still orphans.

Changelog (main changes since 1.8.0)
	- enhancements:
		* ABI versioning
		* x32 ABI
		* non-eal thread supports
		* multi-pthread per core
		* enable big contigmem blocks in BSD
		* port hotplug
		* jobstats library
		* reorder library
		* memcpy optimization
		* acl for AVX2
		* crc hash arch-independent
		* uio_pci_generic support
		* kni optimizations
		* vhost-user support
		* virtio (link, vlan, mac, port IO, perf)
		* ixgbevf RSS
		* i40e hash filtering
		* i40e nvgre offloading
		* i40e TSO
		* fm10k driver
		* mlx4 driver
		* bonding mode 4 tests
		* bonding mode 6
		* Rx/Tx callbacks
		* unified flow types
		* remove old filtering API (flow director, flex, ethertype, syn, ntuple)
		* remove static tailqs from EAL
		* remove device arguments limit
		* add indirect attached mbuf flag
		* use default port configuration in testpmd
		* tunnel offloading in testpmd
		* PDF doc output
		* NICs guide
	- fixes for:
		* build
		* memory leaks
		* big endian
		* devargs
		* kvargs
		* cmdline
		* timer
		* lpm
		* pipeline
		* bonding
		* vhost
		* virtio
		* pcap
		* igb
		* ixgbe
		* i40e
		* enic
		* testpmd

Statistics for this release cycle:
	530 patches from 60 authors
	513 files changed, 44567 insertions(+), 11269 deletions(-)
Although it is not visible in these numbers, reviewing process is at least
as important as patch authoring.
Thank you everyone!

It is the first release based on deadlines, and is delivered almost on time.
Some features were submitted too late or need still some work to reach a
consensus. They were deferred and come back for the new release cycle.
In order to avoid trashing some work because of design issues, it is
recommended to submit design ideas early with a bit of code, e.g. the API.
Then the roadmap will be feeded to show the approved ideas.

New features for 2.1 cycle must be submitted before June.
If not properly reviewed and acked on June 26th, feature will be deferred.

For now, enjoy this Easter egg release!

^ permalink raw reply	[relevance 4%]

* [dpdk-dev] [PATCHv2] doc: remove duplicate in release nots new features
@ 2015-04-03  9:51  6% Siobhan Butler
  0 siblings, 0 replies; 200+ results
From: Siobhan Butler @ 2015-04-03  9:51 UTC (permalink / raw)
  To: dev

- Remove duplicate entry
- Added remaining features to list of new features

Signed-off-by: Siobhan Butler <siobhan.a.butler@intel.com>
---
 doc/guides/rel_notes/new_features.rst | 74 +++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 20 deletions(-)

diff --git a/doc/guides/rel_notes/new_features.rst b/doc/guides/rel_notes/new_features.rst
index e3edec4..ab308a4 100644
--- a/doc/guides/rel_notes/new_features.rst
+++ b/doc/guides/rel_notes/new_features.rst
@@ -32,6 +32,24 @@ New Features
 ============
 *   Poll-mode driver support for an early release of the PCIE host interface of the Intel(R) Ethernet Switch FM10000.
 
+    *   Basic Rx/Tx functions for PF/VF
+
+    *   Interrupt handling support for PF/VF
+
+    *   Per queue start/stop functions for PF/VF
+
+    *   Support Mailbox handling between PF/VF and PF/Switch Manager
+
+    *   Receive Side Scaling (RSS) for PF/VF
+
+    *   Scatter receive function for PF/VF
+
+    *   Reta update/query for PF/VF
+
+    *   VLAN filter set for PF
+
+    *   Link status query for PF/VF
+
 .. note:: The software is intended to run on pre-release hardware and may contain unknown or unresolved defects or
           issues related to functionality and performance.
           The poll mode driver is also pre-release and will be updated to a released version post hardware and base driver release.
@@ -45,6 +63,42 @@ New Features
 
     *   Support for slaves devices which do not support link status change interrupts in the link bonding library via a link status polling mechanism.
 
+*   PCI Hotplug with NULL PMD sample application
+
+*   ABI versioning
+
+*   x32 ABI
+
+*   Non-EAL Thread Support
+
+*   Multi-pthread Support
+
+*   Re-order Library
+
+*   ACL for AVX2
+
+*   Architecture Independant CRC Hash
+
+*   uio_pci_generic Support
+
+*   KNI Optimizations
+
+*   Vhost-user support
+
+*   Virtio (link, vlan, mac, port IO, perf)
+
+*   IXGBE-VF RSS
+
+*   RX/TX Callbacks
+
+*   Unified Flow Types
+
+*   Indirect Attached MBUF Flag
+
+*   Use default port configuration in TestPMD
+
+*   Tunnel offloading in TestPMD
+
 *   Poll Mode Driver - 40 GbE Controllers (librte_pmd_i40e)
 
     *   Support for Flow Director
@@ -67,24 +121,4 @@ New Features
 
 *   Job Stats library and Sample Application.
 
-*   Poll Mode Driver - PCIE host-interface of Intel Ethernet Switch FM10000 Series (librte_pmd_fm10k)
-
-    *   Basic Rx/Tx functions for PF/VF
-
-    *   Interrupt handling support for PF/VF
-
-    *   Per queue start/stop functions for PF/VF
-
-    *   Support Mailbox handling between PF/VF and PF/Switch Manager
-
-    *   Receive Side Scaling (RSS) for PF/VF
-
-    *   Scatter receive function for PF/VF
-
-    *   Reta update/query for PF/VF
-
-    *   VLAN filter set for PF
-
-    *   Link status query for PF/VF
-
 For further features supported in this release, see Chapter 3 Supported Features.
-- 
1.8.3.1

^ permalink raw reply	[relevance 6%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-23 16:00  3%                     ` Neil Horman
@ 2015-03-30 19:52  0%                       ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-03-30 19:52 UTC (permalink / raw)
  To: Mcnamara, John; +Cc: dev

2015-03-23 12:00, Neil Horman:
> On Mon, Mar 23, 2015 at 04:16:36PM +0100, Thomas Monjalon wrote:
> > I think John is saying that the API of rte_eth_rx_burst() already includes
> > the nb_pkts parameter. So it's natural to push it to the callback.
> > I also think Neil is saying that this parameter is useless in the callback
> > and in rte_eth_rx_burst() if the array was null terminated.
> > In any case, having a mix (null termination + parameter in rte_eth_rx_burst)
> > is not acceptable.
> > Moreover, I wonder how efficient are the compiler optimizations in each loop
> > case (index and null termination).
> > 
> > As the API was using an integer count, my opinion is to keep it and push it to
> > the callback for 2.0.
> > If null termination is validated to be better, it could be a later rework.
> > 
> 
> I'm fine with this if thats the consensus, I'm more interested in making sure we
> think about these problems in such a way that we're not just running from ABI
> issues, because we're eventually going to have to deal with them
> Neil

Acked-by: Thomas Monjalon <thomas.monjalon@6wind.com>

Applied, thanks

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] [PATCH v2 3/7] hv: add basic vmbus support
  @ 2015-03-25 18:11  1% ` Stephen Hemminger
  0 siblings, 0 replies; 200+ results
From: Stephen Hemminger @ 2015-03-25 18:11 UTC (permalink / raw)
  To: simonxiaolinux, alexmay, kys; +Cc: dev

From: Stephen Hemminger <stephen@networkplumber.org>

The hyper-v device driver forces the base EAL code to change
to support multiple bus types. This is done changing the pci_device
in ether driver to a generic union.

As much as possible this is done in a backwards source compatiable
way. It will break ABI for device drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/librte_eal/common/Makefile             |   2 +-
 lib/librte_eal/common/eal_common_options.c |   5 +
 lib/librte_eal/common/eal_internal_cfg.h   |   1 +
 lib/librte_eal/common/eal_options.h        |   2 +
 lib/librte_eal/common/eal_private.h        |  10 +
 lib/librte_eal/common/include/rte_vmbus.h  | 153 +++++++
 lib/librte_eal/linuxapp/eal/Makefile       |   3 +
 lib/librte_eal/linuxapp/eal/eal.c          |  11 +
 lib/librte_eal/linuxapp/eal/eal_vmbus.c    | 639 +++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.c              |  84 +++-
 lib/librte_ether/rte_ethdev.h              |  10 +-
 lib/librte_pmd_fm10k/fm10k_ethdev.c        |   2 +-
 12 files changed, 915 insertions(+), 7 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 3ea3bbf..202485e 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 INC := rte_branch_prediction.h rte_common.h
 INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
 INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
 INC += rte_rwlock.h rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 8fcb1ab..76a3394 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -80,6 +80,7 @@ eal_long_options[] = {
 	{OPT_NO_HPET,           0, NULL, OPT_NO_HPET_NUM          },
 	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
 	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
+	{OPT_NO_VMBUS,		0, NULL, OPT_NO_VMBUS_NUM	  },
 	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
 	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
@@ -726,6 +727,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_pci = 1;
 		break;
 
+	case OPT_NO_VMBUS_NUM:
+		conf->no_vmbus = 1;
+		break;
+
 	case OPT_NO_HPET_NUM:
 		conf->no_hpet = 1;
 		break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index e2ecb0d..0e7de34 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -66,6 +66,7 @@ struct internal_config {
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
 	volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
 	volatile unsigned no_pci;         /**< true to disable PCI */
+	volatile unsigned no_vmbus;	  /**< true to disable VMBUS */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
 										* instead of native TSC */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index f6714d9..54f03dc 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -67,6 +67,8 @@ enum {
 	OPT_NO_HUGE_NUM,
 #define OPT_NO_PCI            "no-pci"
 	OPT_NO_PCI_NUM,
+#define OPT_NO_VMBUS          "no-vmbus"
+	OPT_NO_VMBUS_NUM,
 #define OPT_NO_SHCONF         "no-shconf"
 	OPT_NO_SHCONF_NUM,
 #define OPT_SOCKET_MEM        "socket-mem"
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 4acf5a0..039e9f3 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -180,6 +180,16 @@ int rte_eal_pci_close_one_driver(struct rte_pci_driver *dr,
 		struct rte_pci_device *dev);
 
 /**
+ * VMBUS related functions and structures
+ */
+int rte_eal_vmbus_init(void);
+
+struct rte_vmbus_driver;
+struct rte_vmbus_device;
+
+int rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+		struct rte_vmbus_device *dev);
+/**
  * Init tail queues for non-EAL library structures. This is to allow
  * the rings, mempools, etc. lists to be shared among multiple processes
  *
diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
new file mode 100644
index 0000000..2742cb1
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vmbus.h
@@ -0,0 +1,153 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_VMBUS_H_
+#define _RTE_VMBUS_H_
+
+/**
+ * @file
+ *
+ * RTE VMBUS Interface
+ */
+
+#include <sys/queue.h>
+
+/** Pathname of VMBUS devices directory. */
+#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
+
+/** Formatting string for VMBUS device identifier: Ex: vmbus_0_9 */
+#define VMBUS_PRI_FMT "vmbus_0_%u"
+
+#define VMBUS_ID_ANY 0xFFFF
+
+#define VMBUS_NETWORK_DEVICE "{f8615163-df3e-46c5-913f-f2d2f965ed0e}"
+
+/** Maximum number of VMBUS resources. */
+#define VMBUS_MAX_RESOURCE 7
+
+/**
+ * A structure describing an ID for a VMBUS driver. Each driver provides a
+ * table of these IDs for each device that it supports.
+ */
+struct rte_vmbus_id {
+	uint16_t device_id;           /**< VMBUS Device ID */
+	uint16_t sysfs_num;           /**< vmbus_0_X */
+};
+
+/**
+ * A structure describing a VMBUS memory resource.
+ */
+struct rte_vmbus_resource {
+	uint64_t phys_addr;   /**< Physical address, 0 if no resource. */
+	uint64_t len;         /**< Length of the resource. */
+	void *addr;           /**< Virtual address, NULL when not mapped. */
+};
+
+/**
+ * A structure describing a VMBUS device.
+ */
+struct rte_vmbus_device {
+	TAILQ_ENTRY(rte_vmbus_device) next;     /**< Next probed VMBUS device. */
+	struct rte_vmbus_id id;                 /**< VMBUS ID. */
+	const struct rte_vmbus_driver *driver;  /**< Associated driver */
+	int numa_node;                          /**< NUMA node connection */
+	unsigned int blacklisted:1;             /**< Device is blacklisted */
+	struct rte_vmbus_resource mem_resource[VMBUS_MAX_RESOURCE];   /**< VMBUS Memory Resource */
+	uint32_t vmbus_monitor_id;              /**< VMBus monitor ID for device */
+	int uio_fd;                             /** UIO device file descriptor */
+};
+
+/** Macro used to help building up tables of device IDs */
+#define RTE_VMBUS_DEVICE(dev)          \
+	.device_id = (dev)
+
+struct rte_vmbus_driver;
+
+/**
+ * Initialisation function for the driver called during VMBUS probing.
+ */
+typedef int (vmbus_devinit_t)(struct rte_vmbus_driver *, struct rte_vmbus_device *);
+
+/**
+ * A structure describing a VMBUS driver.
+ */
+struct rte_vmbus_driver {
+	TAILQ_ENTRY(rte_vmbus_driver) next;     /**< Next in list. */
+	const char *name;                       /**< Driver name. */
+	vmbus_devinit_t *devinit;               /**< Device init. function. */
+	struct rte_vmbus_id *id_table;          /**< ID table, NULL terminated. */
+	uint32_t drv_flags;                     /**< Flags contolling handling of device. */
+	const char *module_name;		/**< Associated kernel module */
+};
+
+/**
+ * Probe the VMBUS device for registered drivers.
+ *
+ * Scan the content of the vmbus, and call the probe() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_vmbus_probe(void);
+
+/**
+ * Dump the content of the vmbus.
+ */
+void rte_eal_vmbus_dump(void);
+
+/**
+ * Register a VMBUS driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vmbus_driver structure describing the driver
+ *   to be registered.
+ */
+void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
+
+/**
+ * Unregister a VMBUS driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vmbus_driver structure describing the driver
+ *   to be unregistered.
+ */
+void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
+
+int vmbus_uio_map_resource(struct rte_vmbus_device *dev);
+
+#endif /* _RTE_VMBUS_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 01f7b70..acd5127 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -74,6 +74,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
 ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
 endif
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_vmbus.c
+endif
 
 # from common dir
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index bd770cf..3305b2d 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -70,6 +70,7 @@
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_devargs.h>
 #include <rte_common.h>
 #include <rte_version.h>
@@ -758,6 +759,11 @@ rte_eal_init(int argc, char **argv)
 	if (rte_eal_pci_init() < 0)
 		rte_panic("Cannot init PCI\n");
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_init() < 0)
+		RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
+#endif
+
 #ifdef RTE_LIBRTE_IVSHMEM
 	if (rte_eal_ivshmem_init() < 0)
 		rte_panic("Cannot init IVSHMEM\n");
@@ -845,6 +851,11 @@ rte_eal_init(int argc, char **argv)
 	if (rte_eal_pci_probe())
 		rte_panic("Cannot probe PCI\n");
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_probe() < 0)
+		rte_panic("Cannot probe VMBUS\n");
+#endif
+
 	return fctret;
 }
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
new file mode 100644
index 0000000..4fdfc46
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
@@ -0,0 +1,639 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_vmbus.h>
+#include <rte_common.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_malloc.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#define PROC_MODULES "/proc/modules"
+#define VMBUS_DRV_PATH "/sys/bus/vmbus/drivers/%s"
+
+TAILQ_HEAD(vmbus_device_list, rte_vmbus_device); /**< VMBUS devices in D-linked Q. */
+TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver); /**< VMBUS drivers in D-linked Q. */
+
+static struct vmbus_driver_list vmbus_driver_list =
+	TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
+static struct vmbus_device_list vmbus_device_list =
+	TAILQ_HEAD_INITIALIZER(vmbus_device_list);
+
+struct uio_map {
+	void *addr;
+	uint64_t offset;
+	uint64_t size;
+	uint64_t phaddr;
+};
+
+/*
+ * For multi-process we need to reproduce all vmbus mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct uio_resource {
+	TAILQ_ENTRY(uio_resource) next;
+
+	struct rte_vmbus_id vmbus_addr;
+	char path[PATH_MAX];
+	size_t nb_maps;
+	struct uio_map maps[VMBUS_MAX_RESOURCE];
+};
+
+/*
+ * parse a sysfs file containing one integer value
+ * different to the eal version, as it needs to work with 64-bit values
+ */
+static int
+vmbus_parse_sysfs_value(const char *filename, uint64_t *val)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+				__func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	*val = strtoull(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+#define OFF_MAX              ((uint64_t)(off_t)-1)
+static ssize_t
+vmbus_uio_get_mappings(const char *devname, struct uio_map maps[], size_t nb_maps)
+{
+	size_t i;
+	char dirname[PATH_MAX];
+	char filename[PATH_MAX];
+	uint64_t offset, size;
+
+	for (i = 0; i != nb_maps; i++) {
+
+		/* check if map directory exists */
+		snprintf(dirname, sizeof(dirname),
+				"%s/maps/map%zu", devname, i);
+
+		RTE_LOG(DEBUG, EAL, "Scanning maps in %s\n", (char *)dirname);
+
+		if (access(dirname, F_OK) != 0)
+			break;
+
+		/* get mapping offset */
+		snprintf(filename, sizeof(filename),
+				"%s/offset", dirname);
+		if (vmbus_parse_sysfs_value(filename, &offset) < 0) {
+			RTE_LOG(ERR, EAL,
+					"%s(): cannot parse offset of %s\n",
+					__func__, dirname);
+			return -1;
+		}
+
+		/* get mapping size */
+		snprintf(filename, sizeof(filename),
+				"%s/size", dirname);
+		if (vmbus_parse_sysfs_value(filename, &size) < 0) {
+			RTE_LOG(ERR, EAL,
+					"%s(): cannot parse size of %s\n",
+					__func__, dirname);
+			return -1;
+		}
+
+		/* get mapping physical address */
+		snprintf(filename, sizeof(filename),
+				"%s/addr", dirname);
+		if (vmbus_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
+			RTE_LOG(ERR, EAL,
+					"%s(): cannot parse addr of %s\n",
+					__func__, dirname);
+			return -1;
+		}
+
+		if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+			RTE_LOG(ERR, EAL,
+					"%s(): offset/size exceed system max value\n",
+					__func__);
+			return -1;
+		}
+
+		maps[i].offset = offset;
+		maps[i].size = size;
+	}
+	return i;
+}
+
+/* maximum time to wait that /dev/uioX appears */
+#define UIO_DEV_WAIT_TIMEOUT 3 /* seconds */
+
+/* map a particular resource from a file */
+static void *
+vmbus_map_resource(struct rte_vmbus_device *dev, void *requested_addr,
+		const char *devname, off_t offset, size_t size)
+{
+	int fd;
+	void *mapaddr;
+
+	if (dev->uio_fd <= 0)
+		fd = open(devname, O_RDWR);
+	else
+		fd = dev->uio_fd;
+
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+				devname, strerror(errno));
+		goto fail;
+	}
+
+	dev->uio_fd = fd;
+	/* Map the memory resource of device */
+	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, offset);
+	if (mapaddr == MAP_FAILED ||
+			(requested_addr != NULL && mapaddr != requested_addr)) {
+		RTE_LOG(ERR, EAL, "%s(): cannot mmap(%s(%d), %p, 0x%lx, 0x%lx):"
+				" %s (%p)\n", __func__, devname, fd, requested_addr,
+				(unsigned long)size, (unsigned long)offset,
+				strerror(errno), mapaddr);
+		close(fd);
+		goto fail;
+	}
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		close(fd);
+
+	RTE_LOG(DEBUG, EAL, "  VMBUS memory mapped at %p\n", mapaddr);
+
+	return mapaddr;
+
+fail:
+	return NULL;
+}
+
+/* map the resources of a vmbus device in virtual memory */
+int
+vmbus_uio_map_resource(struct rte_vmbus_device *dev)
+{
+	int i;
+	struct dirent *e;
+	DIR *dir;
+	char dirname[PATH_MAX];
+	char dirname2[PATH_MAX];
+	char devname[PATH_MAX]; /* contains the /dev/uioX */
+	void *mapaddr;
+	unsigned uio_num;
+	uint64_t phaddr;
+	uint64_t offset;
+	uint64_t pagesz;
+	ssize_t nb_maps;
+	struct rte_vmbus_id *loc = &dev->id;
+	struct uio_resource *uio_res;
+	struct uio_map *maps;
+
+	/* depending on kernel version, uio can be located in uio/uioX
+	 * or uio:uioX */
+	snprintf(dirname, sizeof(dirname),
+			"/sys/bus/vmbus/devices/" VMBUS_PRI_FMT "/uio", loc->sysfs_num);
+
+	dir = opendir(dirname);
+	if (dir == NULL) {
+		/* retry with the parent directory */
+		snprintf(dirname, sizeof(dirname),
+				"/sys/bus/vmbus/devices/" VMBUS_PRI_FMT, loc->sysfs_num);
+		dir = opendir(dirname);
+
+		if (dir == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
+			return -1;
+		}
+	}
+
+	/* take the first file starting with "uio" */
+	while ((e = readdir(dir)) != NULL) {
+		/* format could be uio%d ...*/
+		int shortprefix_len = sizeof("uio") - 1;
+		/* ... or uio:uio%d */
+		int longprefix_len = sizeof("uio:uio") - 1;
+		char *endptr;
+
+		if (strncmp(e->d_name, "uio", 3) != 0)
+			continue;
+
+		/* first try uio%d */
+		errno = 0;
+		uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != e->d_name) {
+			snprintf(dirname2, sizeof(dirname2),
+					"%s/uio%u", dirname, uio_num);
+			break;
+		}
+
+		/* then try uio:uio%d */
+		errno = 0;
+		uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != e->d_name) {
+			snprintf(dirname2, sizeof(dirname2),
+					"%s/uio:uio%u", dirname, uio_num);
+			break;
+		}
+	}
+	closedir(dir);
+
+	/* No uio resource found */
+	if (e == NULL) {
+		RTE_LOG(WARNING, EAL, "  "VMBUS_PRI_FMT" not managed by UIO driver, "
+				"skipping\n", loc->sysfs_num);
+		return -1;
+	}
+
+	/* allocate the mapping details for secondary processes*/
+	uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
+	if (uio_res == NULL) {
+		RTE_LOG(ERR, EAL,
+				"%s(): cannot store uio mmap details\n", __func__);
+		return -1;
+	}
+
+	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+	snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+	memcpy(&uio_res->vmbus_addr, &dev->id, sizeof(uio_res->vmbus_addr));
+
+	/* collect info about device mappings */
+	nb_maps = vmbus_uio_get_mappings(dirname2, uio_res->maps,
+			sizeof(uio_res->maps) / sizeof(uio_res->maps[0]));
+	if (nb_maps < 0)
+		return nb_maps;
+
+	RTE_LOG(DEBUG, EAL, "Found %d memory maps for device "VMBUS_PRI_FMT"\n",
+			(int)nb_maps, loc->sysfs_num);
+
+	uio_res->nb_maps = nb_maps;
+
+	pagesz = sysconf(_SC_PAGESIZE);
+
+	maps = uio_res->maps;
+	for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
+		phaddr = maps[i].phaddr;
+		if (phaddr == 0)
+			continue;
+
+		RTE_LOG(DEBUG, EAL, "	mem_map%d: addr=0x%lx len = %lu\n",
+				i,
+				maps[i].phaddr,
+				maps[i].size);
+
+		if (i != nb_maps) {
+			offset = i * pagesz;
+			mapaddr = vmbus_map_resource(dev, NULL, devname, (off_t)offset,
+					(size_t)maps[i].size);
+			if (mapaddr == NULL)
+				return -1;
+
+			/* Important: offset for mapping can be non-zero, pad the addr */
+			mapaddr = ((char *)mapaddr + maps[i].offset);
+			maps[i].addr = mapaddr;
+			maps[i].offset = offset;
+			dev->mem_resource[i].addr = mapaddr;
+			dev->mem_resource[i].phys_addr = phaddr;
+			dev->mem_resource[i].len = maps[i].size;
+		}
+	}
+
+	return 0;
+}
+
+/* Compare two VMBUS device addresses. */
+static int
+vmbus_compare(struct rte_vmbus_id *id, struct rte_vmbus_id *id2)
+{
+	return id->device_id > id2->device_id;
+}
+
+/* Scan one vmbus sysfs entry, and fill the devices list from it. */
+static int
+vmbus_scan_one(const char *name)
+{
+	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	char dirname[PATH_MAX];
+	unsigned long tmp;
+	struct rte_vmbus_device *dev;
+	FILE *f;
+
+	dev = rte_zmalloc("vmbus_device", sizeof(*dev), 0);
+	if (dev == NULL)
+		return -1;
+
+	snprintf(dirname, sizeof(dirname), "%s/%s",
+		 SYSFS_VMBUS_DEVICES, name);
+
+	/* parse directory name in sysfs.  this does not always reflect
+	 * the device id read below.
+	 */
+	unsigned int sysfs_num;
+	if (sscanf(name, VMBUS_PRI_FMT, &sysfs_num) != 1) {
+		RTE_LOG(ERR, EAL, "Unable to parse vmbus sysfs name\n");
+		rte_free(dev);
+		return -1;
+	}
+	dev->id.sysfs_num = sysfs_num;
+
+	/* get device id */
+	snprintf(filename, sizeof(filename), "%s/id", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		rte_free(dev);
+		return -1;
+	}
+	dev->id.device_id = (uint16_t)tmp;
+
+	/* get monitor id */
+	snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		rte_free(dev);
+		return -1;
+	}
+	dev->vmbus_monitor_id = tmp;
+
+	/* compare class_id of device with {f8615163-df3e-46c5-913ff2d2f965ed0e} */
+	snprintf(filename, sizeof(filename), "%s/class_id", dirname);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+				__func__, filename);
+		rte_free(dev);
+		return -1;
+	}
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		rte_free(dev);
+		return -1;
+	}
+	fclose(f);
+
+	if (strncmp(buf, VMBUS_NETWORK_DEVICE, strlen(VMBUS_NETWORK_DEVICE))) {
+		RTE_LOG(DEBUG, EAL, "%s(): skip vmbus_0_%u with class_id = %s",
+				__func__, dev->id.sysfs_num, buf);
+		rte_free(dev);
+		return 0;
+	}
+
+	/* device is valid, add in list (sorted) */
+	RTE_LOG(DEBUG, EAL, "Adding vmbus device %d\n", dev->id.device_id);
+	if (!TAILQ_EMPTY(&vmbus_device_list)) {
+		struct rte_vmbus_device *dev2 = NULL;
+
+		TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
+			if (vmbus_compare(&dev->id, &dev2->id))
+				continue;
+
+			TAILQ_INSERT_BEFORE(dev2, dev, next);
+			return 0;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
+
+	return 0;
+}
+
+static int
+check_vmbus_device(const char *buf, int bufsize)
+{
+	char *n = strrchr(buf, '_');
+	/* the format is 'vmbus_0_%d' */
+	if (n == NULL)
+		return -1;
+	n++;
+	char *buf_copy = strndup(n, bufsize);
+	if (buf_copy == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): failed to strndup: %s\n",
+				__func__, strerror(errno));
+		return -1;
+	}
+
+	int err = strtoul(buf_copy, NULL, 10);
+	free(buf_copy);
+
+	if (errno || err < 0) {
+		RTE_LOG(ERR, EAL, "%s(): can't parse devid: %s\n",
+				__func__, strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Scan the content of the vmbus, and the devices in the devices list
+ */
+static int
+vmbus_scan(void)
+{
+	struct dirent *e;
+	DIR *dir;
+
+	dir = opendir(SYSFS_VMBUS_DEVICES);
+	if (dir == NULL) {
+		if (errno == ENOENT)
+			return 0;
+		else {
+			RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
+					__func__, strerror(errno));
+			return -1;
+		}
+	}
+
+	while ((e = readdir(dir)) != NULL) {
+		if (e->d_name[0] == '.')
+			continue;
+
+		if (check_vmbus_device(e->d_name, sizeof(e->d_name)))
+			continue;
+
+		if (vmbus_scan_one(e->d_name) < 0)
+			goto error;
+	}
+	closedir(dir);
+	return 0;
+
+error:
+	closedir(dir);
+	return -1;
+}
+
+/* Init the VMBUS EAL subsystem */
+int rte_eal_vmbus_init(void)
+{
+	/* VMBUS can be disabled */
+	if (internal_config.no_vmbus)
+		return 0;
+
+	if (vmbus_scan() < 0) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
+		return -1;
+	}
+	return 0;
+}
+
+/* Below is PROBE part of eal_vmbus library */
+
+/*
+ * If device ID match, call the devinit() function of the driver.
+ */
+int
+rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+		struct rte_vmbus_device *dev)
+{
+	struct rte_vmbus_id *id_table;
+
+	for (id_table = dr->id_table; id_table->device_id != VMBUS_ID_ANY; id_table++) {
+
+		struct rte_vmbus_id *loc = &dev->id;
+
+		RTE_LOG(DEBUG, EAL, "VMBUS device "VMBUS_PRI_FMT"\n",
+				loc->sysfs_num);
+
+		RTE_LOG(DEBUG, EAL, "  probe driver: %s\n", dr->name);
+
+		/* no initialization when blacklisted, return without error */
+		if (dev->blacklisted) {
+			RTE_LOG(DEBUG, EAL, "  Device is blacklisted, not initializing\n");
+			return 0;
+		}
+
+		/* map the resources */
+		if (vmbus_uio_map_resource(dev) < 0)
+			return -1;
+
+		/* reference driver structure */
+		dev->driver = dr;
+
+		/* call the driver devinit() function */
+		return dr->devinit(dr, dev);
+	}
+
+	/* return positive value if driver is not found */
+	return 1;
+}
+
+/*
+ * call the devinit() function of all
+ * registered drivers for the vmbus device. Return -1 if no driver is
+ * found for this class of vmbus device.
+ * The present assumption is that we have drivers only for vmbus network
+ * devices. That's why we don't check driver's id_table now.
+ */
+static int
+vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
+{
+	struct rte_vmbus_driver *dr = NULL;
+	int ret;
+
+	TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
+		ret = rte_eal_vmbus_probe_one_driver(dr, dev);
+		if (ret < 0) {
+			/* negative value is an error */
+			RTE_LOG(ERR, EAL, "Failed to probe driver %s\n", dr->name);
+			break;
+		}
+		if (ret > 0) {
+			/* positive value means driver not found */
+			RTE_LOG(DEBUG, EAL, "Driver %s not found", dr->name);
+			continue;
+		}
+
+		RTE_LOG(DEBUG, EAL, "OK. Driver was found and probed.\n");
+		return 0;
+	}
+	return -1;
+}
+
+
+/*
+ * Scan the vmbus, and call the devinit() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ */
+int
+rte_eal_vmbus_probe(void)
+{
+	struct rte_vmbus_device *dev = NULL;
+
+	TAILQ_FOREACH(dev, &vmbus_device_list, next) {
+		RTE_LOG(DEBUG, EAL, "Probing driver for device %d ...\n",
+				dev->id.device_id);
+		vmbus_probe_all_drivers(dev);
+	}
+	return 0;
+}
+
+/* register vmbus driver */
+void
+rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
+{
+	TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
+}
+
+/* unregister vmbus driver */
+void
+rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
+{
+	TAILQ_REMOVE(&vmbus_driver_list, driver, next);
+}
+
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index afe6923..4dd0bf4 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -379,6 +379,57 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
 	return 0;
 }
 
+#ifdef RTE_LIBRTE_HV_PMD
+static int
+rte_vmbus_dev_init(struct rte_vmbus_driver *vmbus_drv,
+		   struct rte_vmbus_device *vmbus_dev)
+{
+	struct eth_driver  *eth_drv = (struct eth_driver *)vmbus_drv;
+	struct rte_eth_dev *eth_dev;
+	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+	int diag;
+
+	snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u",
+		 vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num);
+
+	eth_dev = rte_eth_dev_allocate(ethdev_name);
+	if (eth_dev == NULL)
+		return -ENOMEM;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
+				  eth_drv->dev_private_size,
+				  RTE_CACHE_LINE_SIZE);
+		if (eth_dev->data->dev_private == NULL)
+			rte_panic("Cannot allocate memzone for private port data\n");
+	}
+	eth_dev->vmbus_dev = vmbus_dev;
+	eth_dev->driver = eth_drv;
+	eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+	/* init user callbacks */
+	TAILQ_INIT(&(eth_dev->callbacks));
+
+	/*
+	 * Set the default maximum frame size.
+	 */
+	eth_dev->data->mtu = ETHER_MTU;
+
+	/* Invoke PMD device initialization function */
+	diag = (*eth_drv->eth_dev_init)(eth_drv, eth_dev);
+	if (diag == 0)
+		return 0;
+
+	PMD_DEBUG_TRACE("driver %s: eth_dev_init(device_id=0x%x)"
+			" failed\n", vmbus_drv->name,
+			(unsigned) vmbus_dev->id.device_id);
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+	nb_ports--;
+	return diag;
+}
+#endif
+
 /**
  * Register an Ethernet [Poll Mode] driver.
  *
@@ -398,7 +449,21 @@ rte_eth_driver_register(struct eth_driver *eth_drv)
 {
 	eth_drv->pci_drv.devinit = rte_eth_dev_init;
 	eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
-	rte_eal_pci_register(&eth_drv->pci_drv);
+
+	switch (eth_drv->bus_type) {
+	case RTE_BUS_PCI:
+		eth_drv->pci_drv.devinit = rte_eth_dev_init;
+		rte_eal_pci_register(&eth_drv->pci_drv);
+		break;
+#ifdef RTE_LIBRTE_HV_PMD
+	case RTE_BUS_VMBUS:
+		eth_drv->vmbus_drv.devinit = rte_vmbus_dev_init;
+		rte_eal_vmbus_register(&eth_drv->vmbus_drv);
+		break;
+#endif
+	default:
+		rte_panic("unknown bus type %u\n", eth_drv->bus_type);
+	}
 }
 
 static int
@@ -1387,6 +1452,9 @@ rte_eth_has_link_state(uint8_t port_id)
 	}
 	dev = &rte_eth_devices[port_id];
 
+	if (dev->driver->bus_type != RTE_BUS_PCI)
+		return 0;
+
 	return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
 }
 
@@ -1937,9 +2005,17 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
 
 	FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
 	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
-	dev_info->pci_dev = dev->pci_dev;
-	if (dev->driver)
-		dev_info->driver_name = dev->driver->pci_drv.name;
+
+	if (dev->driver) {
+		switch (dev->driver->bus_type) {
+		case RTE_BUS_PCI:
+			dev_info->driver_name = dev->driver->pci_drv.name;
+			dev_info->pci_dev = dev->pci_dev;
+			break;
+		case RTE_BUS_VMBUS:
+			dev_info->driver_name = dev->driver->vmbus_drv.name;
+		}
+	}
 }
 
 void
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 124117a..21f7a08 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -175,6 +175,7 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_mbuf.h>
@@ -1669,7 +1670,14 @@ typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev);
  * - The size of the private data to allocate for each matching device.
  */
 struct eth_driver {
-	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+	union {
+		struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+		struct rte_vmbus_driver vmbus_drv;/**< The PMD is also a VMBUS drv. */
+	};
+	enum {
+		RTE_BUS_PCI=0,
+		RTE_BUS_VMBUS
+	} bus_type; 			  /**< Device bus type. */
 	eth_dev_init_t eth_dev_init;      /**< Device init function. */
 	eth_dev_uninit_t eth_dev_uninit;  /**< Device uninit function. */
 	unsigned int dev_private_size;    /**< Size of device private data. */
diff --git a/lib/librte_pmd_fm10k/fm10k_ethdev.c b/lib/librte_pmd_fm10k/fm10k_ethdev.c
index 0c7a80c..acde928 100644
--- a/lib/librte_pmd_fm10k/fm10k_ethdev.c
+++ b/lib/librte_pmd_fm10k/fm10k_ethdev.c
@@ -1843,7 +1843,7 @@ static struct rte_pci_id pci_id_fm10k_map[] = {
 };
 
 static struct eth_driver rte_pmd_fm10k = {
-	{
+	.pci_drv = {
 		.name = "rte_pmd_fm10k",
 		.id_table = pci_id_fm10k_map,
 		.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
-- 
2.1.4

^ permalink raw reply	[relevance 1%]

* Re: [dpdk-dev] tools brainstorming
  2015-03-20 14:51  4% [dpdk-dev] tools brainstorming Thomas Monjalon
                   ` (3 preceding siblings ...)
  2015-03-23  8:41  0% ` Cao, Waterman
@ 2015-03-23 16:18  0% ` Mcnamara, John
    5 siblings, 0 replies; 200+ results
From: Mcnamara, John @ 2015-03-23 16:18 UTC (permalink / raw)
  To: Thomas Monjalon, dev

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Friday, March 20, 2015 2:51 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] tools brainstorming
> 
> As we are lazy developers, writing guidelines is not enough. It must be
> coupled with the integration of some tools. Let's work on these ones:
> 	- make autotests easier and faster to run for smoke testing
> 	- automated basic testpmd check
> 	- build check with various options combinations
> 	- abi check (started with validate-abi.sh)
> 	- static analyze (clang, free online coverity)


Hi,

There was a discussion about using Coverity several months ago and Vincent set up an initial project.

    http://dpdk.org/ml/archives/dev/2014-August/004962.html

I created a similar Coverity project and have been uploading snapshots since January and comparing the results to a similar tool we use internally (it compares well). 

If anyone would like to view the analysis results you can sign up to Coverity and apply for access to the DPDK project. 

    https://scan.coverity.com/users/sign_up
    https://scan.coverity.com/projects/4005 (DPDK)

Alternatively, drop me an email and I'll send you an invite. Apply as Contributor/Member if you plan to review/close issues or as Defect Viewer if you just wish to see the issues.

Like any static analysis tool there may be false positives. These can be flagged as such.

John
-- 

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-23 15:16  0%                   ` Thomas Monjalon
  2015-03-23 15:29  0%                     ` Bruce Richardson
@ 2015-03-23 16:00  3%                     ` Neil Horman
  2015-03-30 19:52  0%                       ` Thomas Monjalon
  1 sibling, 1 reply; 200+ results
From: Neil Horman @ 2015-03-23 16:00 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Mon, Mar 23, 2015 at 04:16:36PM +0100, Thomas Monjalon wrote:
> 2015-03-13 19:15, Neil Horman:
> > On Fri, Mar 13, 2015 at 06:28:31PM +0000, Mcnamara, John wrote:
> > > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > > > 
> > > > > Is encoding the information in the array really a better solution here?
> > > > The cb->param already exists for passing in user defined information to
> > > > the callback. The proposed patch merely transmits the parent function
> > > > arguments to the enclosed callback.
> > > > >
> > > > The cb->param can't be used here, because its opaque to the internals of
> > > > the DPDK.  rte_eth_rx_burst doesn't (and can't) know where in the cb-
> > > > >params pointer to store that information.  Thats why you added an
> > > > additional parameter in the first place, isn't it?
> > > 
> > > Yes. That is correct.
> > > 
> > Then why did you suggest doing so?
> > 
> > > > My point is that using
> > > > an array terminator keeps us out of this habbit of just adding parameters
> > > > to communicate more information (as thats an ABI breaking method, and not
> > > > particularly scalable if there is more information to be transmitted in
> > > > the future).  Using a context sensitive API set goes beyond even that, and
> > > > allows to retrieve arbitrary information form callbacks as needed in an
> > > > ABI safe manner
> > > 
> > > Again I can agree with this in the general case, but it isn't necessary,
> > > in this case, to encode the information in the array since it is already
> > > local to and available in the function. It seems artificial, at this point,
> > > to implement an array terminator solution to protect an API that,
> > > effectively, hasn't been published yet.
> > > 
> > You indicate that you agree an alternate solution is preferable in the general
> > case, so as to provide an API that is extensible in a way that isn't subject to
> > ABI breakage, correct?  If so, why do assert that its not necessecary in this
> > specific case?  If you feel you need to add information so that callbacks can be
> > more flexible (in this case specifying the size of a passed in array), why
> > immediately shoehorn another parmeter in place, and break the consistency
> > between rx and tx callbacks, when you don't have to?  I don't care if you break
> > ABI today (although to call it unpublished I think is disingenuous, as lots of
> > testing and development has already taken place with the ABI as it currently
> > stands).  I care, as I noted above about not getting into the habbit of just
> > assuming a change like this requires that you invaliate ABI somehow.  You don't
> > have to, you can create an API that is fairly invariant to it here if you like.
> > The question in my mind is, why don't you?
> 
> I think John is saying that the API of rte_eth_rx_burst() already includes
> the nb_pkts parameter. So it's natural to push it to the callback.
> I also think Neil is saying that this parameter is useless in the callback
> and in rte_eth_rx_burst() if the array was null terminated.
> In any case, having a mix (null termination + parameter in rte_eth_rx_burst)
> is not acceptable.
> Moreover, I wonder how efficient are the compiler optimizations in each loop
> case (index and null termination).
> 
> As the API was using an integer count, my opinion is to keep it and push it to
> the callback for 2.0.
> If null termination is validated to be better, it could be a later rework.
> 

I'm fine with this if thats the consensus, I'm more interested in making sure we
think about these problems in such a way that we're not just running from ABI
issues, because we're eventually going to have to deal with them
Neil

> Is there something I'm missing?
> Thoughts?
> 

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-23 15:16  0%                   ` Thomas Monjalon
@ 2015-03-23 15:29  0%                     ` Bruce Richardson
  2015-03-23 16:00  3%                     ` Neil Horman
  1 sibling, 0 replies; 200+ results
From: Bruce Richardson @ 2015-03-23 15:29 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Mon, Mar 23, 2015 at 04:16:36PM +0100, Thomas Monjalon wrote:
> 2015-03-13 19:15, Neil Horman:
> > On Fri, Mar 13, 2015 at 06:28:31PM +0000, Mcnamara, John wrote:
> > > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > > > 
> > > > > Is encoding the information in the array really a better solution here?
> > > > The cb->param already exists for passing in user defined information to
> > > > the callback. The proposed patch merely transmits the parent function
> > > > arguments to the enclosed callback.
> > > > >
> > > > The cb->param can't be used here, because its opaque to the internals of
> > > > the DPDK.  rte_eth_rx_burst doesn't (and can't) know where in the cb-
> > > > >params pointer to store that information.  Thats why you added an
> > > > additional parameter in the first place, isn't it?
> > > 
> > > Yes. That is correct.
> > > 
> > Then why did you suggest doing so?
> > 
> > > > My point is that using
> > > > an array terminator keeps us out of this habbit of just adding parameters
> > > > to communicate more information (as thats an ABI breaking method, and not
> > > > particularly scalable if there is more information to be transmitted in
> > > > the future).  Using a context sensitive API set goes beyond even that, and
> > > > allows to retrieve arbitrary information form callbacks as needed in an
> > > > ABI safe manner
> > > 
> > > Again I can agree with this in the general case, but it isn't necessary,
> > > in this case, to encode the information in the array since it is already
> > > local to and available in the function. It seems artificial, at this point,
> > > to implement an array terminator solution to protect an API that,
> > > effectively, hasn't been published yet.
> > > 
> > You indicate that you agree an alternate solution is preferable in the general
> > case, so as to provide an API that is extensible in a way that isn't subject to
> > ABI breakage, correct?  If so, why do assert that its not necessecary in this
> > specific case?  If you feel you need to add information so that callbacks can be
> > more flexible (in this case specifying the size of a passed in array), why
> > immediately shoehorn another parmeter in place, and break the consistency
> > between rx and tx callbacks, when you don't have to?  I don't care if you break
> > ABI today (although to call it unpublished I think is disingenuous, as lots of
> > testing and development has already taken place with the ABI as it currently
> > stands).  I care, as I noted above about not getting into the habbit of just
> > assuming a change like this requires that you invaliate ABI somehow.  You don't
> > have to, you can create an API that is fairly invariant to it here if you like.
> > The question in my mind is, why don't you?
> 
> I think John is saying that the API of rte_eth_rx_burst() already includes
> the nb_pkts parameter. So it's natural to push it to the callback.
> I also think Neil is saying that this parameter is useless in the callback
> and in rte_eth_rx_burst() if the array was null terminated.
> In any case, having a mix (null termination + parameter in rte_eth_rx_burst)
> is not acceptable.
> Moreover, I wonder how efficient are the compiler optimizations in each loop
> case (index and null termination).

Compiler can't optimize/unroll the loop in the null termination case. For the
passing-the-size through option, in any app where the RX buffer size is constant,
i.e. probably a lot of them - like in our examples, the compiler can do loop
unrolling, and possibly other optimizations on the known value. [Whether it choses
too or not, is not something we have tested :-)]

/Bruce

> 
> As the API was using an integer count, my opinion is to keep it and push it to
> the callback for 2.0.
> If null termination is validated to be better, it could be a later rework.
> 
> Is there something I'm missing?
> Thoughts?

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-13 23:15  5%                 ` Neil Horman
@ 2015-03-23 15:16  0%                   ` Thomas Monjalon
  2015-03-23 15:29  0%                     ` Bruce Richardson
  2015-03-23 16:00  3%                     ` Neil Horman
  0 siblings, 2 replies; 200+ results
From: Thomas Monjalon @ 2015-03-23 15:16 UTC (permalink / raw)
  To: Neil Horman, Mcnamara, John; +Cc: dev

2015-03-13 19:15, Neil Horman:
> On Fri, Mar 13, 2015 at 06:28:31PM +0000, Mcnamara, John wrote:
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > > 
> > > > Is encoding the information in the array really a better solution here?
> > > The cb->param already exists for passing in user defined information to
> > > the callback. The proposed patch merely transmits the parent function
> > > arguments to the enclosed callback.
> > > >
> > > The cb->param can't be used here, because its opaque to the internals of
> > > the DPDK.  rte_eth_rx_burst doesn't (and can't) know where in the cb-
> > > >params pointer to store that information.  Thats why you added an
> > > additional parameter in the first place, isn't it?
> > 
> > Yes. That is correct.
> > 
> Then why did you suggest doing so?
> 
> > > My point is that using
> > > an array terminator keeps us out of this habbit of just adding parameters
> > > to communicate more information (as thats an ABI breaking method, and not
> > > particularly scalable if there is more information to be transmitted in
> > > the future).  Using a context sensitive API set goes beyond even that, and
> > > allows to retrieve arbitrary information form callbacks as needed in an
> > > ABI safe manner
> > 
> > Again I can agree with this in the general case, but it isn't necessary,
> > in this case, to encode the information in the array since it is already
> > local to and available in the function. It seems artificial, at this point,
> > to implement an array terminator solution to protect an API that,
> > effectively, hasn't been published yet.
> > 
> You indicate that you agree an alternate solution is preferable in the general
> case, so as to provide an API that is extensible in a way that isn't subject to
> ABI breakage, correct?  If so, why do assert that its not necessecary in this
> specific case?  If you feel you need to add information so that callbacks can be
> more flexible (in this case specifying the size of a passed in array), why
> immediately shoehorn another parmeter in place, and break the consistency
> between rx and tx callbacks, when you don't have to?  I don't care if you break
> ABI today (although to call it unpublished I think is disingenuous, as lots of
> testing and development has already taken place with the ABI as it currently
> stands).  I care, as I noted above about not getting into the habbit of just
> assuming a change like this requires that you invaliate ABI somehow.  You don't
> have to, you can create an API that is fairly invariant to it here if you like.
> The question in my mind is, why don't you?

I think John is saying that the API of rte_eth_rx_burst() already includes
the nb_pkts parameter. So it's natural to push it to the callback.
I also think Neil is saying that this parameter is useless in the callback
and in rte_eth_rx_burst() if the array was null terminated.
In any case, having a mix (null termination + parameter in rte_eth_rx_burst)
is not acceptable.
Moreover, I wonder how efficient are the compiler optimizations in each loop
case (index and null termination).

As the API was using an integer count, my opinion is to keep it and push it to
the callback for 2.0.
If null termination is validated to be better, it could be a later rework.

Is there something I'm missing?
Thoughts?

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] tools brainstorming
  2015-03-20 14:51  4% [dpdk-dev] tools brainstorming Thomas Monjalon
                   ` (2 preceding siblings ...)
  2015-03-20 15:18  0% ` Simon Kågström
@ 2015-03-23  8:41  0% ` Cao, Waterman
  2015-03-23 16:18  0% ` Mcnamara, John
    5 siblings, 0 replies; 200+ results
From: Cao, Waterman @ 2015-03-23  8:41 UTC (permalink / raw)
  To: Thomas Monjalon, dev

On 2015/3/20 22:52, Thomas Monjalon wrote:
> Hi,
>
> As you probably know, a MAINTAINERS file is being filled, which is a great
> help to request patch reviews and discuss design with the knowledgeable people
> of this young DPDK community:
> 	http://dpdk.org/browse/dpdk/tree/MAINTAINERS
>
> The next step is to clearly define what are the guidelines to review a patch
> and accept it. So let's write a new document CONTRIBUTING (or another
> capitalized file ;). It will help contributors to do the right checks
> before submitting, and will help reviewers.
>
> As we are lazy developers, writing guidelines is not enough. It must be
> coupled with the integration of some tools. Let's work on these ones:
> 	- make autotests easier and faster to run for smoke testing
> 	- automated basic testpmd check
> 	- build check with various options combinations
> 	- abi check (started with validate-abi.sh)
> 	- static analyze (clang, free online coverity)
> 	- comment check (doxygen, codespell, kerspell)
> 	- format check (customized checkpatch)
>
> I'm sure this last item will trigger a lot of debate.
> Actually, format checking can be of two kinds:
> 	- commit message formatting (how to write the title, how and when adding
> 	Fixes tag, Signed-off-by tag, etc);
> 	- coding style might deserve its own document.
>
> At the end, we should be able to pass a "make check" on the whole code and
> a "make checkpatch" before submitting.
> Then the result of these tools could be automatically checked and displayed
> in patchwork or in an adapted version of qemu's patchew. But this is
> obviously a later step.
> When all automatic lights are green and human design review is properly done,
> the patch can be acknowledged by one or many reviewers. Speaking about that,
> it would be helpful to have a column in our patchwork to summarize the counts
> of tests, reviews and acknowledgements.
>
> Comments and contributions are more than welcome!
>
Hi Thomas,

     That's good idea to check patch before merging it into branch.
     We can perform basic test per each patch and improve the quality of
patch.
    
    As you knew, currently Intel DPDK test team maintained automation
test tool to perform build check and smoke test on a lot of mainstream
platforms.
    It will a good chance to share these knowledge with whole DPDK
community.

- Daily Build Test
     So far, Intel test team run daily build test on CentOS6.5, Fedora
18/20/21, RedHat 6.5/7.0, SUSE 11 SP2/SP3, Ubuntu 12/14, Oracle Linux
6.4 and FreeBSD 10.
     In addition, we also verified with different compilers, kernels and
DPDK build options.
     Since Our daily build test is focus on master branch and only
monitor latest code changes.
     Maybe we don't need to check so much OS per each patch, just make
quick build check with short list.
     We can share our build script with contributors/maintainer. they
can use it to verify their patch set.

- Automated Smoke Test
   Based on DTS (DPDK test suite), we already built up automated smoke
test on FC16/18/20/21/ , Ubuntu and Redhat. it's composed of unit test
and function test for dpdk sample application.
    I think that it's easy to build up automated smoke test based on
patch, we just need to define which test cases should include in the
list, and make sure if it can achieve at shortest time.

- Bug Tracking
    During our test cycle, we found some defects in release candidates. 
But it's difficult to track/report them without public bug tool.
    It's really helpful to get one formal tool to manage these
information and speed up bug fixing.
   
In addition, I think that patchwork is a good tool, which provides a
place to show test result for each patch.
But patchwork is focus on patch level, we need to think how to test
latest code branch in package level.
Finally, we are eager to share our experience of validation with DPDK
community.
We would like to contribute tool and script,  and help to improve
quality of DPDK release.

regards

Waterman
 
   

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] tools brainstorming
  2015-03-20 14:51  4% [dpdk-dev] tools brainstorming Thomas Monjalon
  2015-03-20 15:07  0% ` Butler, Siobhan A
  2015-03-20 15:16  3% ` Neil Horman
@ 2015-03-20 15:18  0% ` Simon Kågström
  2015-03-23  8:41  0% ` Cao, Waterman
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 200+ results
From: Simon Kågström @ 2015-03-20 15:18 UTC (permalink / raw)
  To: dev

On 2015-03-20 15:51, Thomas Monjalon wrote:
> As we are lazy developers, writing guidelines is not enough. It must be
> coupled with the integration of some tools. Let's work on these ones:
> 	- make autotests easier and faster to run for smoke testing
> 	- automated basic testpmd check
> 	- build check with various options combinations
> 	- abi check (started with validate-abi.sh)
> 	- static analyze (clang, free online coverity)
> 	- comment check (doxygen, codespell, kerspell)
> 	- format check (customized checkpatch)

Code coverage for automated tests can be useful as well.

In a way I'm speaking in my own interests here since I've written a tool
to do just this (and produce nice HTML etc output), kcov, that can be
found at github (https://github.com/SimonKagstrom/kcov).

// Simon

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] tools brainstorming
  2015-03-20 14:51  4% [dpdk-dev] tools brainstorming Thomas Monjalon
  2015-03-20 15:07  0% ` Butler, Siobhan A
@ 2015-03-20 15:16  3% ` Neil Horman
  2015-03-20 15:18  0% ` Simon Kågström
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-03-20 15:16 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Fri, Mar 20, 2015 at 03:51:11PM +0100, Thomas Monjalon wrote:
> Hi,
> 
> As you probably know, a MAINTAINERS file is being filled, which is a great
> help to request patch reviews and discuss design with the knowledgeable people
> of this young DPDK community:
> 	http://dpdk.org/browse/dpdk/tree/MAINTAINERS
> 
> The next step is to clearly define what are the guidelines to review a patch
> and accept it. So let's write a new document CONTRIBUTING (or another
> capitalized file ;). It will help contributors to do the right checks
> before submitting, and will help reviewers.
> 
+100.  This is a great idea.  A few thoughts.

> As we are lazy developers, writing guidelines is not enough. It must be
> coupled with the integration of some tools. Let's work on these ones:
> 	- make autotests easier and faster to run for smoke testing
> 	- automated basic testpmd check
> 	- build check with various options combinations
The kernel does this with some special make targets (make allyesconfig, make
randconfig, etc).  They basically act as build time fuzzers and are very useful.
I'm not sure that the DPDK build system is really condusive to that yet though,
since its made up of static configuration files.  This may require some
build environment changes

> 	- abi check (started with validate-abi.sh)
This will need continued improvement, as it is currently a very interactive
tool. I'm not sure that in its current for it will ever be fully automated, save
for perhaps being able to give you a boolean response (yes, ABI is compatible,
or no it is not).


> 	- static analyze (clang, free online coverity)
> 	- comment check (doxygen, codespell, kerspell)
> 	- format check (customized checkpatch)
> 
> I'm sure this last item will trigger a lot of debate.
> Actually, format checking can be of two kinds:
> 	- commit message formatting (how to write the title, how and when adding
> 	Fixes tag, Signed-off-by tag, etc);
> 	- coding style might deserve its own document.
> 

I think both of these are worthwhile, especially if their not too egregious in
terms of work overhead.  A coding style is pretty common to enforce.  Commit
messages are a bit less so, but it would be reasonable to do some simple things,
like add a subsystem tag, signed off line, etc

> At the end, we should be able to pass a "make check" on the whole code and
> a "make checkpatch" before submitting.
> Then the result of these tools could be automatically checked and displayed
> in patchwork or in an adapted version of qemu's patchew. But this is
> obviously a later step.
> When all automatic lights are green and human design review is properly done,
> the patch can be acknowledged by one or many reviewers. Speaking about that,
> it would be helpful to have a column in our patchwork to summarize the counts
> of tests, reviews and acknowledgements.
> 
> Comments and contributions are more than welcome!
> 

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] tools brainstorming
  2015-03-20 14:51  4% [dpdk-dev] tools brainstorming Thomas Monjalon
@ 2015-03-20 15:07  0% ` Butler, Siobhan A
  2015-03-20 15:16  3% ` Neil Horman
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 200+ results
From: Butler, Siobhan A @ 2015-03-20 15:07 UTC (permalink / raw)
  To: Thomas Monjalon, dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Friday, March 20, 2015 2:51 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] tools brainstorming
> 
> Hi,
> 
> As you probably know, a MAINTAINERS file is being filled, which is a great
> help to request patch reviews and discuss design with the knowledgeable
> people of this young DPDK community:
> 	http://dpdk.org/browse/dpdk/tree/MAINTAINERS
> 
> The next step is to clearly define what are the guidelines to review a patch
> and accept it. So let's write a new document CONTRIBUTING (or another
> capitalized file ;). It will help contributors to do the right checks before
> submitting, and will help reviewers.
> 
> As we are lazy developers, writing guidelines is not enough. It must be
> coupled with the integration of some tools. Let's work on these ones:
> 	- make autotests easier and faster to run for smoke testing
> 	- automated basic testpmd check
> 	- build check with various options combinations
> 	- abi check (started with validate-abi.sh)
> 	- static analyze (clang, free online coverity)
> 	- comment check (doxygen, codespell, kerspell)
> 	- format check (customized checkpatch)

This is a great list Thomas, totally agree with you we need some guidelines,
and some ways of automating basic checks to catch basic issues,
save time and traffic on the mailing list.

I propose we also add a bug tracking tool (e.g. Bugzilla or other).

And also a standalone page/document/archive of FAQ's.

> 
> I'm sure this last item will trigger a lot of debate.
> Actually, format checking can be of two kinds:
> 	- commit message formatting (how to write the title, how and when
> adding
> 	Fixes tag, Signed-off-by tag, etc);
> 	- coding style might deserve its own document.
> 
> At the end, we should be able to pass a "make check" on the whole code and
> a "make checkpatch" before submitting.
> Then the result of these tools could be automatically checked and displayed
> in patchwork or in an adapted version of qemu's patchew. But this is
> obviously a later step.
> When all automatic lights are green and human design review is properly
> done, the patch can be acknowledged by one or many reviewers. Speaking
> about that, it would be helpful to have a column in our patchwork to
> summarize the counts of tests, reviews and acknowledgements.
> 
> Comments and contributions are more than welcome!

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] tools brainstorming
@ 2015-03-20 14:51  4% Thomas Monjalon
  2015-03-20 15:07  0% ` Butler, Siobhan A
                   ` (5 more replies)
  0 siblings, 6 replies; 200+ results
From: Thomas Monjalon @ 2015-03-20 14:51 UTC (permalink / raw)
  To: dev

Hi,

As you probably know, a MAINTAINERS file is being filled, which is a great
help to request patch reviews and discuss design with the knowledgeable people
of this young DPDK community:
	http://dpdk.org/browse/dpdk/tree/MAINTAINERS

The next step is to clearly define what are the guidelines to review a patch
and accept it. So let's write a new document CONTRIBUTING (or another
capitalized file ;). It will help contributors to do the right checks
before submitting, and will help reviewers.

As we are lazy developers, writing guidelines is not enough. It must be
coupled with the integration of some tools. Let's work on these ones:
	- make autotests easier and faster to run for smoke testing
	- automated basic testpmd check
	- build check with various options combinations
	- abi check (started with validate-abi.sh)
	- static analyze (clang, free online coverity)
	- comment check (doxygen, codespell, kerspell)
	- format check (customized checkpatch)

I'm sure this last item will trigger a lot of debate.
Actually, format checking can be of two kinds:
	- commit message formatting (how to write the title, how and when adding
	Fixes tag, Signed-off-by tag, etc);
	- coding style might deserve its own document.

At the end, we should be able to pass a "make check" on the whole code and
a "make checkpatch" before submitting.
Then the result of these tools could be automatically checked and displayed
in patchwork or in an adapted version of qemu's patchew. But this is
obviously a later step.
When all automatic lights are green and human design review is properly done,
the patch can be acknowledged by one or many reviewers. Speaking about that,
it would be helpful to have a column in our patchwork to summarize the counts
of tests, reviews and acknowledgements.

Comments and contributions are more than welcome!

^ permalink raw reply	[relevance 4%]

* [dpdk-dev] [dpdk-announce] release candidate 2.0.0-rc2
@ 2015-03-17 22:43  3% Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-03-17 22:43 UTC (permalink / raw)
  To: announce

A new DPDK release candidate is ready for testing:
	http://dpdk.org/browse/dpdk/tag/?id=v2.0.0-rc2

We are approaching the planned release date (end of the month).
It means that we must prioritize work on doc, important fixes and review
of pending fixes. Some build issues remain in some environments.
Please help especially on reviews:
	http://dpdk.org/dev/patchwork

Changelog (main changes since 2.0.0-rc1)
	- enhancements:
		* enable big contigmem blocks in BSD
		* i40e TSO
		* remove static tailqs from EAL
		* ABI checking utility
	- fixes for:
		* doc
		* build
		* memory leaks
		* devargs
		* kvargs
		* port hotplug
		* vhost
		* virtio
		* ixgbe preconditions
		* ixgbe flow director
		* ixgbe endianness
		* ixgbe Rx CRC stripping for X540
		* ixgbevf Tx for X550
		* testpmd

Thank you

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v5] ABI: Add abi checking utility
  2015-03-17 18:08 29% ` [dpdk-dev] [PATCH v5] " Neil Horman
@ 2015-03-17 21:17  5%   ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-03-17 21:17 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

2015-03-17 14:08, Neil Horman:
> There was a request for an abi validation utilty for the ongoing ABI stability
> work.  As it turns out there is a abi compliance checker in development that
> seems to be under active development and provides fairly detailed ABI compliance
> reports.  Its not yet intellegent enough to understand symbol versioning, but it
> does provide the ability to identify symbols which have changed between
> releases, along with details of the change, and offers developers the
> opportunity to identify which symbols then need versioning and validation for a
> given update via manual testing.
> 
> This script automates the use of the compliance checker between two arbitrarily
> specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
> and run:
> 
> ./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG
> 
> where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
> suitable for passing as the T= variable in the make config command.
> 
> Note the upstream source for the abi compliance checker is here:
> http://ispras.linuxbase.org/index.php/ABI_compliance_checker
> 
> It generates a report for each DSO built from the requested tags that developers
> can review to find ABI compliance issues.
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> ---
> 
> Change Notes:
> 
> v2) Fixed some typos as requested by Thomas
> 
> v3) Fixed some additional typos Thomas requested
>     Improved script to work from detached state
>     Added some documentation to the changelog
>     Added some comments to the scripts
> 
> v4) Remove duplicate exports.
>     Move restoration of starting branch/comit to cleanup_and_exit
> 
> v5) Fixed exit cleanup
>     Added MAINTAINERS entry

Acked-by: Thomas Monjalon <thomas.monjalon@6wind.com>

Applied, thanks

^ permalink raw reply	[relevance 5%]

* [dpdk-dev] [PATCH v5] ABI: Add abi checking utility
                     ` (2 preceding siblings ...)
  2015-03-13 14:09 17% ` [dpdk-dev] [PATCH v4] " Neil Horman
@ 2015-03-17 18:08 29% ` Neil Horman
  2015-03-17 21:17  5%   ` Thomas Monjalon
  3 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-17 18:08 UTC (permalink / raw)
  To: dev

There was a request for an abi validation utilty for the ongoing ABI stability
work.  As it turns out there is a abi compliance checker in development that
seems to be under active development and provides fairly detailed ABI compliance
reports.  Its not yet intellegent enough to understand symbol versioning, but it
does provide the ability to identify symbols which have changed between
releases, along with details of the change, and offers developers the
opportunity to identify which symbols then need versioning and validation for a
given update via manual testing.

This script automates the use of the compliance checker between two arbitrarily
specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
and run:

./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG

where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
suitable for passing as the T= variable in the make config command.

Note the upstream source for the abi compliance checker is here:
http://ispras.linuxbase.org/index.php/ABI_compliance_checker

It generates a report for each DSO built from the requested tags that developers
can review to find ABI compliance issues.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

---

Change Notes:

v2) Fixed some typos as requested by Thomas

v3) Fixed some additional typos Thomas requested
    Improved script to work from detached state
    Added some documentation to the changelog
    Added some comments to the scripts

v4) Remove duplicate exports.
    Move restoration of starting branch/comit to cleanup_and_exit

v5) Fixed exit cleanup
    Added MAINTAINERS entry
---
 MAINTAINERS             |   1 +
 scripts/validate-abi.sh | 245 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 246 insertions(+)
 create mode 100755 scripts/validate-abi.sh

diff --git a/MAINTAINERS b/MAINTAINERS
index 07fdf5e..fa309ff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -59,6 +59,7 @@ ABI versioning
 M: Neil Horman <nhorman@tuxdriver.com>
 F: lib/librte_compat/
 F: doc/guides/rel_notes/abi.rst
+F: scripts/validate-abi.sh
 
 Environment Abstraction Layer
 -----------------------------
diff --git a/scripts/validate-abi.sh b/scripts/validate-abi.sh
new file mode 100755
index 0000000..369ea8a
--- /dev/null
+++ b/scripts/validate-abi.sh
@@ -0,0 +1,245 @@
+#!/bin/sh
+#   BSD LICENSE
+#
+#   Copyright(c) 2015 Neil Horman. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TAG1=$1
+TAG2=$2
+TARGET=$3
+ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`
+
+usage() {
+	echo "$0 <TAG1> <TAG2> <TARGET>"
+}
+
+log() {
+	local level=$1
+	shift
+	echo "$*"
+}
+
+validate_tags() {
+	git tag -l | grep -q "$TAG1"
+	if [ $? -ne 0 ]
+	then
+		echo "$TAG1 is invalid"
+		return
+	fi
+	git tag -l | grep -q "$TAG2"
+	if [ $? -ne 0 ]
+	then
+		echo "$TAG2 is invalid"
+		return
+	fi
+}
+
+validate_args() {
+	if [ -z "$TAG1" ]
+	then
+		echo "Must Specify TAG1"
+		return
+	fi
+	if [ -z "$TAG2" ]
+	then
+		echo "Must Specify TAG2"
+		return
+	fi
+	if [ -z "$TARGET" ]
+	then
+		echo "Must Specify a build target"
+	fi
+}
+
+
+cleanup_and_exit() {
+	rm -rf $ABI_DIR
+	git checkout $CURRENT_BRANCH
+	exit $1
+}
+
+###########################################
+#START
+############################################
+
+#trap on ctrl-c to clean up
+trap cleanup_and_exit SIGINT
+
+#Save the current branch
+CURRENT_BRANCH=`git branch | grep \* | cut -d' ' -f2`
+
+if [ -z "$CURRENT_BRANCH" ]
+then
+	CURRENT_BRANCH=`git log --pretty=format:%H HEAD~1..HEAD`
+fi
+
+if [ -n "$VERBOSE" ]
+then
+	export VERBOSE=/dev/stdout
+else
+	export VERBOSE=/dev/null
+fi
+
+# Validate that we have all the arguments we need
+res=$(validate_args)
+if [ -n "$res" ]
+then
+	echo $res
+	usage
+	cleanup_and_exit 1
+fi
+
+# Make sure our tags exist
+res=$(validate_tags)
+if [ -n "$res" ]
+then
+	echo $res
+	cleanup_and_exit 1
+fi
+
+ABICHECK=`which abi-compliance-checker 2>/dev/null`
+if [ $? -ne 0 ]
+then
+	log "INFO" "Cant find abi-compliance-checker utility"
+	cleanup_and_exit 1
+fi
+
+ABIDUMP=`which abi-dumper 2>/dev/null`
+if [ $? -ne 0 ]
+then
+	log "INFO" "Cant find abi-dumper utility"
+	cleanup_and_exit 1
+fi
+
+log "INFO" "We're going to check and make sure that applications built"
+log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
+log "INFO" "against DPDK DSOs built from tag $TAG2."
+log "INFO" ""
+
+# Check to make sure we have a clean tree
+git status | grep -q clean
+if [ $? -ne 0 ]
+then
+	log "WARN" "Working directory not clean, aborting"
+	cleanup_and_exit 1
+fi
+
+# Move to the root of the git tree
+cd $(dirname $0)/..
+
+log "INFO" "Checking out version $TAG1 of the dpdk"
+# Move to the old version of the tree
+git checkout $TAG1
+
+# Make sure we configure SHARED libraries
+# Also turn off IGB and KNI as those require kernel headers to build
+sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
+
+# Checking abi compliance relies on using the dwarf information in
+# The shared objects.  Thats only included in the DSO's if we build
+# with -g
+export EXTRA_CFLAGS=-g
+export EXTRA_LDFLAGS=-g
+
+# Now configure the build
+log "INFO" "Configuring DPDK $TAG1"
+make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
+
+log "INFO" "Building DPDK $TAG1. This might take a moment"
+make O=$TARGET > $VERBOSE 2>&1
+
+if [ $? -ne 0 ]
+then
+	log "INFO" "THE BUILD FAILED.  ABORTING"
+	cleanup_and_exit 1
+fi
+
+# Move to the lib directory
+cd $TARGET/lib
+log "INFO" "COLLECTING ABI INFORMATION FOR $TAG1"
+for i in `ls *.so`
+do
+	$ABIDUMP $i -o $ABI_DIR/$i-ABI-0.dump -lver $TAG1
+done
+cd ../..
+
+# Now clean the tree, checkout the second tag, and rebuild
+git clean -f -d
+git reset --hard
+# Move to the new version of the tree
+log "INFO" "Checking out version $TAG2 of the dpdk"
+git checkout $TAG2
+
+# Make sure we configure SHARED libraries
+# Also turn off IGB and KNI as those require kernel headers to build
+sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
+
+# Now configure the build
+log "INFO" "Configuring DPDK $TAG2"
+make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
+
+log "INFO" "Building DPDK $TAG2. This might take a moment"
+make O=$TARGET > $VERBOSE 2>&1
+
+if [ $? -ne 0 ]
+then
+	log "INFO" "THE BUILD FAILED.  ABORTING"
+	cleanup_and_exit 1
+fi
+
+cd $TARGET/lib
+log "INFO" "COLLECTING ABI INFORMATION FOR $TAG2"
+for i in `ls *.so`
+do
+	$ABIDUMP $i -o $ABI_DIR/$i-ABI-1.dump -lver $TAG2
+done
+cd ../..
+
+# Start comparison of ABI dumps
+for i in `ls $ABI_DIR/*-1.dump`
+do
+	NEWNAME=`basename $i`
+	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
+	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
+
+	if [ ! -f $ABI_DIR/$OLDNAME ]
+	then
+		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
+	fi
+
+	#compare the abi dumps
+	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
+done
+
+git reset --hard
+log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
+cleanup_and_exit 0
+
+
-- 
2.1.0

^ permalink raw reply	[relevance 29%]

* Re: [dpdk-dev] [PATCH v4] ABI: Add abi checking utility
  2015-03-17 15:42  5%   ` Thomas Monjalon
  2015-03-17 16:47  9%     ` Thomas Monjalon
@ 2015-03-17 18:08  9%     ` Neil Horman
  1 sibling, 0 replies; 200+ results
From: Neil Horman @ 2015-03-17 18:08 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Tue, Mar 17, 2015 at 04:42:31PM +0100, Thomas Monjalon wrote:
> Hi Neil,
> 
> I tested this tool and I see few small improvements possible.
> 
I'll fix the bug you found, but I'm not going to go chasing every feature that
you happen to note.  Not saying they're not fine features, but I don't have time
to implement features that you happen to note might be nice to have, especially
not in time for 2.0. 

Regarding your question about report tolerance, Its not that kind of tool.  The
ABI checker simply calls a developers attention to symbols that have
inadvertently changed due to code or data structure modifications.  It is
incumbent on the developer to make a well informed decision about how to handle
those changes (via deprecation/versioning/etc), and to defend his/her reasoning.

Neil

> 2015-03-13 10:09, Neil Horman:
> > There was a request for an abi validation utilty for the ongoing ABI stability
>                                             utility
> > work.  As it turns out there is a abi compliance checker in development that
> > seems to be under active development and provides fairly detailed ABI compliance
> > reports.  Its not yet intellegent enough to understand symbol versioning, but it
>                         intelligent
> > does provide the ability to identify symbols which have changed between
> > releases, along with details of the change, and offers developers the
> > opportunity to identify which symbols then need versioning and validation for a
> > given update via manual testing.
> > 
> > This script automates the use of the compliance checker between two arbitrarily
> > specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
> > and run:
> > 
> > ./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG
> > 
> > where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
> > suitable for passing as the T= variable in the make config command.
> > 
> > Note the upstream source for the abi compliance checker is here:
> > http://ispras.linuxbase.org/index.php/ABI_compliance_checker
> > 
> > It generates a report for each DSO built from the requested tags that developers
> > can review to find ABI compliance issues.
> > 
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > 
> > ---
> > 
> > Change Notes:
> > 
> > v2) Fixed some typos as requested by Thomas
> > 
> > v3) Fixed some additional typos Thomas requested
> >     Improved script to work from detached state
> >     Added some documentation to the changelog
> >     Added some comments to the scripts
> > 
> > v4) Remove duplicate exports.
> >     Move restoration of starting branch/comit to cleanup_and_exit
> > ---
> [...]
> > +TAG1=$1
> > +TAG2=$2
> > +TARGET=$3
> > +ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`
> 
> +JOBS=$(grep -c '^processor' /proc/cpuinfo)
> 
> [...]
> > +cleanup_and_exit() {
> > +	rm -rf $ABI_DIR
> > +	exit $1
> > +	git checkout $CURRENT_BRANCH
> 
> Checkout is never done because of previous exit.
> 
> > +}
> [...]
> > +log "INFO" "Checking out version $TAG1 of the dpdk"
> > +# Move to the old version of the tree
> > +git checkout $TAG1
> 
> What about -q for quiet mode?
> 
> [...]
> > +log "INFO" "Building DPDK $TAG1. This might take a moment"
> > +make O=$TARGET > $VERBOSE 2>&1
> 
> -j$JOBS would improve building time
> 
> [...]
> > +# Move to the new version of the tree
> > +log "INFO" "Checking out version $TAG2 of the dpdk"
> > +git checkout $TAG2
> 
> -q ?
> 
> [...]
> > +log "INFO" "Building DPDK $TAG2. This might take a moment"
> > +make O=$TARGET > $VERBOSE 2>&1
> 
> -j ?
> 
> [...]
> > +# Start comparison of ABI dumps
> > +for i in `ls $ABI_DIR/*-1.dump`
> > +do
> > +	NEWNAME=`basename $i`
> > +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
> > +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
> > +
> > +	if [ ! -f $ABI_DIR/$OLDNAME ]
> > +	then
> > +		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
> > +	fi
> > +
> > +	#compare the abi dumps
> > +	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
> > +done
> 
> It would be more convenient to generate an HTML index giving access to every
> reports for every DSOs.
> 
> > +
> > +git reset --hard
> > +log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
> > +cleanup_and_exit 0
> 
> After reading the report, it's not clear what would be tolerated or not.
> Should we forbid every defects?
> 
> 

^ permalink raw reply	[relevance 9%]

* Re: [dpdk-dev] [PATCH v4] ABI: Add abi checking utility
  2015-03-17 15:42  5%   ` Thomas Monjalon
@ 2015-03-17 16:47  9%     ` Thomas Monjalon
  2015-03-17 18:08  9%     ` Neil Horman
  1 sibling, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-03-17 16:47 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

More comments:
Please rename to validate-abi.sh (with an hyphen) to be more consistent with
other scripts.
Please add it in the MAINTAINERS file.

Thanks

2015-03-17 16:42, Thomas Monjalon:
> Hi Neil,
> 
> I tested this tool and I see few small improvements possible.
> 
> 2015-03-13 10:09, Neil Horman:
> > There was a request for an abi validation utilty for the ongoing ABI stability
>                                             utility
> > work.  As it turns out there is a abi compliance checker in development that
> > seems to be under active development and provides fairly detailed ABI compliance
> > reports.  Its not yet intellegent enough to understand symbol versioning, but it
>                         intelligent
> > does provide the ability to identify symbols which have changed between
> > releases, along with details of the change, and offers developers the
> > opportunity to identify which symbols then need versioning and validation for a
> > given update via manual testing.
> > 
> > This script automates the use of the compliance checker between two arbitrarily
> > specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
> > and run:
> > 
> > ./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG
> > 
> > where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
> > suitable for passing as the T= variable in the make config command.
> > 
> > Note the upstream source for the abi compliance checker is here:
> > http://ispras.linuxbase.org/index.php/ABI_compliance_checker
> > 
> > It generates a report for each DSO built from the requested tags that developers
> > can review to find ABI compliance issues.
> > 
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > 
> > ---
> > 
> > Change Notes:
> > 
> > v2) Fixed some typos as requested by Thomas
> > 
> > v3) Fixed some additional typos Thomas requested
> >     Improved script to work from detached state
> >     Added some documentation to the changelog
> >     Added some comments to the scripts
> > 
> > v4) Remove duplicate exports.
> >     Move restoration of starting branch/comit to cleanup_and_exit
> > ---
> [...]
> > +TAG1=$1
> > +TAG2=$2
> > +TARGET=$3
> > +ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`
> 
> +JOBS=$(grep -c '^processor' /proc/cpuinfo)
> 
> [...]
> > +cleanup_and_exit() {
> > +	rm -rf $ABI_DIR
> > +	exit $1
> > +	git checkout $CURRENT_BRANCH
> 
> Checkout is never done because of previous exit.
> 
> > +}
> [...]
> > +log "INFO" "Checking out version $TAG1 of the dpdk"
> > +# Move to the old version of the tree
> > +git checkout $TAG1
> 
> What about -q for quiet mode?
> 
> [...]
> > +log "INFO" "Building DPDK $TAG1. This might take a moment"
> > +make O=$TARGET > $VERBOSE 2>&1
> 
> -j$JOBS would improve building time
> 
> [...]
> > +# Move to the new version of the tree
> > +log "INFO" "Checking out version $TAG2 of the dpdk"
> > +git checkout $TAG2
> 
> -q ?
> 
> [...]
> > +log "INFO" "Building DPDK $TAG2. This might take a moment"
> > +make O=$TARGET > $VERBOSE 2>&1
> 
> -j ?
> 
> [...]
> > +# Start comparison of ABI dumps
> > +for i in `ls $ABI_DIR/*-1.dump`
> > +do
> > +	NEWNAME=`basename $i`
> > +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
> > +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
> > +
> > +	if [ ! -f $ABI_DIR/$OLDNAME ]
> > +	then
> > +		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
> > +	fi
> > +
> > +	#compare the abi dumps
> > +	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
> > +done
> 
> It would be more convenient to generate an HTML index giving access to every
> reports for every DSOs.
> 
> > +
> > +git reset --hard
> > +log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
> > +cleanup_and_exit 0
> 
> After reading the report, it's not clear what would be tolerated or not.
> Should we forbid every defects?
> 

^ permalink raw reply	[relevance 9%]

* Re: [dpdk-dev] [PATCH v4] ABI: Add abi checking utility
  2015-03-13 14:09 17% ` [dpdk-dev] [PATCH v4] " Neil Horman
@ 2015-03-17 15:42  5%   ` Thomas Monjalon
  2015-03-17 16:47  9%     ` Thomas Monjalon
  2015-03-17 18:08  9%     ` Neil Horman
  0 siblings, 2 replies; 200+ results
From: Thomas Monjalon @ 2015-03-17 15:42 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

Hi Neil,

I tested this tool and I see few small improvements possible.

2015-03-13 10:09, Neil Horman:
> There was a request for an abi validation utilty for the ongoing ABI stability
                                            utility
> work.  As it turns out there is a abi compliance checker in development that
> seems to be under active development and provides fairly detailed ABI compliance
> reports.  Its not yet intellegent enough to understand symbol versioning, but it
                        intelligent
> does provide the ability to identify symbols which have changed between
> releases, along with details of the change, and offers developers the
> opportunity to identify which symbols then need versioning and validation for a
> given update via manual testing.
> 
> This script automates the use of the compliance checker between two arbitrarily
> specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
> and run:
> 
> ./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG
> 
> where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
> suitable for passing as the T= variable in the make config command.
> 
> Note the upstream source for the abi compliance checker is here:
> http://ispras.linuxbase.org/index.php/ABI_compliance_checker
> 
> It generates a report for each DSO built from the requested tags that developers
> can review to find ABI compliance issues.
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> ---
> 
> Change Notes:
> 
> v2) Fixed some typos as requested by Thomas
> 
> v3) Fixed some additional typos Thomas requested
>     Improved script to work from detached state
>     Added some documentation to the changelog
>     Added some comments to the scripts
> 
> v4) Remove duplicate exports.
>     Move restoration of starting branch/comit to cleanup_and_exit
> ---
[...]
> +TAG1=$1
> +TAG2=$2
> +TARGET=$3
> +ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`

+JOBS=$(grep -c '^processor' /proc/cpuinfo)

[...]
> +cleanup_and_exit() {
> +	rm -rf $ABI_DIR
> +	exit $1
> +	git checkout $CURRENT_BRANCH

Checkout is never done because of previous exit.

> +}
[...]
> +log "INFO" "Checking out version $TAG1 of the dpdk"
> +# Move to the old version of the tree
> +git checkout $TAG1

What about -q for quiet mode?

[...]
> +log "INFO" "Building DPDK $TAG1. This might take a moment"
> +make O=$TARGET > $VERBOSE 2>&1

-j$JOBS would improve building time

[...]
> +# Move to the new version of the tree
> +log "INFO" "Checking out version $TAG2 of the dpdk"
> +git checkout $TAG2

-q ?

[...]
> +log "INFO" "Building DPDK $TAG2. This might take a moment"
> +make O=$TARGET > $VERBOSE 2>&1

-j ?

[...]
> +# Start comparison of ABI dumps
> +for i in `ls $ABI_DIR/*-1.dump`
> +do
> +	NEWNAME=`basename $i`
> +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
> +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
> +
> +	if [ ! -f $ABI_DIR/$OLDNAME ]
> +	then
> +		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
> +	fi
> +
> +	#compare the abi dumps
> +	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
> +done

It would be more convenient to generate an HTML index giving access to every
reports for every DSOs.

> +
> +git reset --hard
> +log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
> +cleanup_and_exit 0

After reading the report, it's not clear what would be tolerated or not.
Should we forbid every defects?

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-13 18:28  0%               ` Mcnamara, John
@ 2015-03-13 23:15  5%                 ` Neil Horman
  2015-03-23 15:16  0%                   ` Thomas Monjalon
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-13 23:15 UTC (permalink / raw)
  To: Mcnamara, John; +Cc: dev

On Fri, Mar 13, 2015 at 06:28:31PM +0000, Mcnamara, John wrote:
> 
> 
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Friday, March 13, 2015 5:32 PM
> > To: Mcnamara, John
> > Cc: Richardson, Bruce; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX
> > callback
> > 
> > > Is encoding the information in the array really a better solution here?
> > The cb->param already exists for passing in user defined information to
> > the callback. The proposed patch merely transmits the parent function
> > arguments to the enclosed callback.
> > >
> > The cb->param can't be used here, because its opaque to the internals of
> > the DPDK.  rte_eth_rx_burst doesn't (and can't) know where in the cb-
> > >params pointer to store that information.  Thats why you added an
> > additional parameter in the first place, isn't it?
> 
> Yes. That is correct.
> 
Then why did you suggest doing so?

> > My point is that using
> > an array terminator keeps us out of this habbit of just adding parameters
> > to communicate more information (as thats an ABI breaking method, and not
> > particularly scalable if there is more information to be transmitted in
> > the future).  Using a context sensitive API set goes beyond even that, and
> > allows to retrieve arbitrary information form callbacks as needed in an
> > ABI safe manner
> 
> Again I can agree with this in the general case, but it isn't necessary, in this case, to encode the information in the array since it is already local to and available in the function. It seems artificial, at this point, to implement an array terminator solution to protect an API that, effectively, hasn't been published yet.
> 
You indicate that you agree an alternate solution is preferable in the general
case, so as to provide an API that is extensible in a way that isn't subject to
ABI breakage, correct?  If so, why do assert that its not necessecary in this
specific case?  If you feel you need to add information so that callbacks can be
more flexible (in this case specifying the size of a passed in array), why
immediately shoehorn another parmeter in place, and break the consistency
between rx and tx callbacks, when you don't have to?  I don't care if you break
ABI today (although to call it unpublished I think is disingenuous, as lots of
testing and development has already taken place with the ABI as it currently
stands).  I care, as I noted above about not getting into the habbit of just
assuming a change like this requires that you invaliate ABI somehow.  You don't
have to, you can create an API that is fairly invariant to it here if you like.
The question in my mind is, why don't you?

Neil


> John
> 
>  
> 
> 
> 

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-13 17:31  4%             ` Neil Horman
@ 2015-03-13 18:28  0%               ` Mcnamara, John
  2015-03-13 23:15  5%                 ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Mcnamara, John @ 2015-03-13 18:28 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev



> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Friday, March 13, 2015 5:32 PM
> To: Mcnamara, John
> Cc: Richardson, Bruce; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX
> callback
> 
> > Is encoding the information in the array really a better solution here?
> The cb->param already exists for passing in user defined information to
> the callback. The proposed patch merely transmits the parent function
> arguments to the enclosed callback.
> >
> The cb->param can't be used here, because its opaque to the internals of
> the DPDK.  rte_eth_rx_burst doesn't (and can't) know where in the cb-
> >params pointer to store that information.  Thats why you added an
> additional parameter in the first place, isn't it?

Yes. That is correct.

> My point is that using
> an array terminator keeps us out of this habbit of just adding parameters
> to communicate more information (as thats an ABI breaking method, and not
> particularly scalable if there is more information to be transmitted in
> the future).  Using a context sensitive API set goes beyond even that, and
> allows to retrieve arbitrary information form callbacks as needed in an
> ABI safe manner

Again I can agree with this in the general case, but it isn't necessary, in this case, to encode the information in the array since it is already local to and available in the function. It seems artificial, at this point, to implement an array terminator solution to protect an API that, effectively, hasn't been published yet.

John

 

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-13 16:26  0%           ` Mcnamara, John
@ 2015-03-13 17:31  4%             ` Neil Horman
  2015-03-13 18:28  0%               ` Mcnamara, John
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-13 17:31 UTC (permalink / raw)
  To: Mcnamara, John; +Cc: dev

On Fri, Mar 13, 2015 at 04:26:52PM +0000, Mcnamara, John wrote:
> 
> 
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Friday, March 13, 2015 3:09 PM
> > To: Richardson, Bruce
> > Cc: Mcnamara, John; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX
> > callback
> > 
> > Plese set asside the ABI issue for a moment.  I get that you're trying to
> > get it in prior to needing to version it.  Thats not the argument.  The
> > argument is how best to codify the new information you want to express in
> > the callback.  For this specific case, I think there are better ways to do
> > this than to just blindly add a new parameter.
> 
> Hi Neil,
> 
> I think that is good advice is the general case but this is a very specific case. The modified callback is only used in rte_eth_rx_burst(). For context here is the function in its entirety (without #defs). The substantive change (the addition of nb_pkts) is on the line with an asterisk:
> 
> 
>     static inline uint16_t
>     rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
>              struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
>     {
>         struct rte_eth_dev *dev;
> 
>         dev = &rte_eth_devices[port_id];
> 
>         int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
>                 rx_pkts, nb_pkts);
> 
>         struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
> 
>         if (unlikely(cb != NULL)) {
>             do {
>                 nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx,
>     *						nb_pkts, cb->param);
>                 cb = cb->next;
>             } while (cb != NULL);
>         }
> 
>         return nb_rx;
>     }
> 

Not sure I grok your point here.  Why impact does the number of internal
callouts for the callback api have on how we structure the API to external
consumers?


> > Encoding the array size
> > implicitly with a terminating marker lets you use this equally well with
> > the tx and rx callbacks (should you ever need it on the latter)
> 
> Is encoding the information in the array really a better solution here? The cb->param already exists for passing in user defined information to the callback. The proposed patch merely transmits the parent function arguments to the enclosed callback.
> 
The cb->param can't be used here, because its opaque to the internals of the
DPDK.  rte_eth_rx_burst doesn't (and can't) know where in the cb->params pointer
to store that information.  Thats why you added an additional parameter in the
first place, isn't it?  My point is that using an array terminator keeps us out
of this habbit of just adding parameters to communicate more information (as
thats an ABI breaking method, and not particularly scalable if there is more
information to be transmitted in the future).  Using a context sensitive API set
goes beyond even that, and allows to retrieve arbitrary information form
callbacks as needed in an ABI safe manner

Neil

> John
> 
> 
> 
> 
> 
> 
> 

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-13 15:09  4%         ` Neil Horman
@ 2015-03-13 16:26  0%           ` Mcnamara, John
  2015-03-13 17:31  4%             ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Mcnamara, John @ 2015-03-13 16:26 UTC (permalink / raw)
  To: Neil Horman, Richardson, Bruce; +Cc: dev



> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Friday, March 13, 2015 3:09 PM
> To: Richardson, Bruce
> Cc: Mcnamara, John; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX
> callback
> 
> Plese set asside the ABI issue for a moment.  I get that you're trying to
> get it in prior to needing to version it.  Thats not the argument.  The
> argument is how best to codify the new information you want to express in
> the callback.  For this specific case, I think there are better ways to do
> this than to just blindly add a new parameter.

Hi Neil,

I think that is good advice is the general case but this is a very specific case. The modified callback is only used in rte_eth_rx_burst(). For context here is the function in its entirety (without #defs). The substantive change (the addition of nb_pkts) is on the line with an asterisk:


    static inline uint16_t
    rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
             struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
    {
        struct rte_eth_dev *dev;

        dev = &rte_eth_devices[port_id];

        int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
                rx_pkts, nb_pkts);

        struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];

        if (unlikely(cb != NULL)) {
            do {
                nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx,
    *						nb_pkts, cb->param);
                cb = cb->next;
            } while (cb != NULL);
        }

        return nb_rx;
    }

> Encoding the array size
> implicitly with a terminating marker lets you use this equally well with
> the tx and rx callbacks (should you ever need it on the latter)

Is encoding the information in the array really a better solution here? The cb->param already exists for passing in user defined information to the callback. The proposed patch merely transmits the parent function arguments to the enclosed callback.

John

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
  2015-03-13 14:58  5%         ` Neil Horman
@ 2015-03-13 15:49  5%           ` Kavanagh, Mark B
  0 siblings, 0 replies; 200+ results
From: Kavanagh, Mark B @ 2015-03-13 15:49 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev



>-----Original Message-----
>From: Neil Horman [mailto:nhorman@tuxdriver.com]
>Sent: Friday, March 13, 2015 2:59 PM
>To: Kavanagh, Mark B
>Cc: dev@dpdk.org
>Subject: Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
>
>On Fri, Mar 13, 2015 at 02:25:17PM +0000, Kavanagh, Mark B wrote:
>> >On Fri, Mar 13, 2015 at 11:56:59AM +0000, Kavanagh, Mark B wrote:
>> >>
>> >>
>> >> >-----Original Message-----
>> >> >From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Neil Horman
>> >> >Sent: Wednesday, March 4, 2015 4:27 PM
>> >> >To: dev@dpdk.org
>> >> >Subject: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
>> >> >
>>
>>
>> (snip)
>>
>> >> >+log "INFO" "Building DPDK $TAG1. This might take a moment"
>> >> >+make O=$TARGET > $VERBOSE 2>&1
>> >> >+
>> >> >+if [ $? -ne 0 ]
>> >> >+then
>> >> >+	log "INFO" "THE BUILD FAILED.  ABORTING"
>> >>
>> >> If the build fails while TAG1 is checked out, the user must check out their original
>> >local branch manually. I'd prefer it if the script checked out $CURRENT_BRANCH in the
>> >'cleanup_and_exit' function.
>> >>
>> >Sure, its in V4.
>>
>> Cool.
>>
>> >
>> >> Same applies to TAG2, if the user CTRL-C's out of the script, and to any other
>command
>> >that might fail when a particular branch/tag is checked out (for example, the 'sed'
>> >commands fail when I run the script; however, they work when I run them on the command
>> >line - I'm investigating this currently).
>> >>
>> >What does the log say?  Please post it here.  If it helps add a set -x to the
>> >top of the script for additional verbosity.
>> >
>>
>> Hey Neil - this is the error, but it's not a problem with the script; presumably I'd
>cleaned my DPDK installation directory, so 'sed' couldn't find the defconfig file:
>> "sed: can't read config/defconfig_x86_64-ivshmem-linuxapp-gcc/: Not a directory"
>>
>Actually, it looks to me like you added a trailing "/" to the end of the third
>argument on the script command line, so sed bombs when it tries to modify a
>directory instead of a file.  Try specifying:
>x86_64-ivshmem-linuxapp-gcc
>instead of
>x86_64-ivshmem-linuxapp-gcc/
>
>Neil

Nice catch - thanks!

>
>> Thanks,
>> Mark
>>
>> >Neil
>>
>>

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-13 14:50  0%       ` Bruce Richardson
@ 2015-03-13 15:09  4%         ` Neil Horman
  2015-03-13 16:26  0%           ` Mcnamara, John
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-13 15:09 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev

On Fri, Mar 13, 2015 at 02:50:03PM +0000, Bruce Richardson wrote:
> On Fri, Mar 13, 2015 at 09:45:14AM -0400, Neil Horman wrote:
> > On Fri, Mar 13, 2015 at 09:41:33AM +0000, Bruce Richardson wrote:
> > > On Thu, Mar 12, 2015 at 03:15:40PM -0400, Neil Horman wrote:
> > > > On Thu, Mar 12, 2015 at 04:54:27PM +0000, John McNamara wrote:
> > > > > 
> > > > > This patch is a minor extension to the recent patchset for RX/TX callbacks
> > > > > based on feedback from users implementing solutions based on it.
> > > > > 
> > > > > The patch adds a new parameter to the RX callback to pass in the number of
> > > > > available RX packets in addition to the number of dequeued packets.
> > > > > This provides the RX callback functions with additional information
> > > > > that can be used to decide how packets from a burst are handled.
> > > > > 
> > > > > The TX callback doesn't require this additional parameter so the RX
> > > > > and TX callbacks no longer have the same function parameters. As such
> > > > > the single RX/TX callback has been refactored into two separate callbacks.
> > > > > 
> > > > > Since this is an API change we hope it can be included in 2.0.0 to avoid
> > > > > changing the API in a subsequent release.    
> > > > > 
> > > > > 
> > > > > John McNamara (1):
> > > > >   ethdev: added additional packet count parameter to RX callbacks
> > > > > 
> > > > >  examples/rxtx_callbacks/main.c |    3 +-
> > > > >  lib/librte_ether/rte_ethdev.c  |    8 ++--
> > > > >  lib/librte_ether/rte_ethdev.h  |   74 ++++++++++++++++++++++++++--------------
> > > > >  3 files changed, 54 insertions(+), 31 deletions(-)
> > > > > 
> > > > > -- 
> > > > > 1.7.4.1
> > > > > 
> > > > > 
> > > > 
> > > > 
> > > > Well, we're well past the new feature phase of this cycle, so I would say NACK.
> > > > I would also suggest that you don't need to modify ABI to accomodate this
> > > > feature.  Instead just document the pkts array to be terminated by a reserved
> > > > value, so that the callback can determine its size dynamically.  You could
> > > > alternatively create a new api call that allows you to retrieve that information
> > > > from the context of the callback.
> > > > 
> > > > Neil
> > > > 
> > > 
> > > Yes, I would agree we are past the new feature phase. However, given that we
> > > are making a change to the API, and a fairly small change too - adding one extra
> > > parameter - we think that the benefit of including this now outweighs any risk
> > > of merging the patch. It seems a bit crazy to ship a release with a new API and
> > > then immediately change the API straight after release. Is it not better to
> > > take the received feedback on the API and fix/improve it pre-release before it
> > > gets set-in-stone?
> > > 
> > > /Bruce
> > > 
> > > 
> > 
> > See above, the API doesn't need to change at all to accomodate this as far as I
> > can see.
> > 
> > Neil
> Yes, there are alternative ways to see about accomplishing the same thing, but
> they are not nearly as clear as adding in the extra param. That's why we'd like
> to see this change go in before release, if possible. If it doesn't make 2.0
> I'd like to see it in 2.1, but at the cost of having an API change and the
> additional versionning and deprecation that ensues.
> 
Plese set asside the ABI issue for a moment.  I get that you're trying to get it
in prior to needing to version it.  Thats not the argument.  The argument is how
best to codify the new information you want to express in the callback.  For
this specific case, I think there are better ways to do this than to just
blindly add a new parameter.  Encoding the array size implicitly with a
terminating marker lets you use this equally well with the tx and rx callbacks
(should you ever need it on the latter), and isn't uncommon to do at all.  It
also lets you avoid the odd bugs that arise should the caller ever mis-encode
the array length such that it doesn't match the actual array size.

Using additional context sensitive functions are also nice, because they are
additive without being ABI breaking.  That is to say, in the future, if you want
to export more information to a callback you can do so by adding an API call
that simply didnt' exist before.  Thats a nice feature to be able to support.

Just adding more parameters isn't the only (nor in my view) the more flexible
way to do this

Neil

> Regards,
> /Bruce
> 
> 

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
  2015-03-13 14:25  5%       ` Kavanagh, Mark B
@ 2015-03-13 14:58  5%         ` Neil Horman
  2015-03-13 15:49  5%           ` Kavanagh, Mark B
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-13 14:58 UTC (permalink / raw)
  To: Kavanagh, Mark B; +Cc: dev

On Fri, Mar 13, 2015 at 02:25:17PM +0000, Kavanagh, Mark B wrote:
> >On Fri, Mar 13, 2015 at 11:56:59AM +0000, Kavanagh, Mark B wrote:
> >>
> >>
> >> >-----Original Message-----
> >> >From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Neil Horman
> >> >Sent: Wednesday, March 4, 2015 4:27 PM
> >> >To: dev@dpdk.org
> >> >Subject: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
> >> >
> 
> 
> (snip)
> 
> >> >+log "INFO" "Building DPDK $TAG1. This might take a moment"
> >> >+make O=$TARGET > $VERBOSE 2>&1
> >> >+
> >> >+if [ $? -ne 0 ]
> >> >+then
> >> >+	log "INFO" "THE BUILD FAILED.  ABORTING"
> >>
> >> If the build fails while TAG1 is checked out, the user must check out their original
> >local branch manually. I'd prefer it if the script checked out $CURRENT_BRANCH in the
> >'cleanup_and_exit' function.
> >>
> >Sure, its in V4.
> 
> Cool.
> 
> >
> >> Same applies to TAG2, if the user CTRL-C's out of the script, and to any other command
> >that might fail when a particular branch/tag is checked out (for example, the 'sed'
> >commands fail when I run the script; however, they work when I run them on the command
> >line - I'm investigating this currently).
> >>
> >What does the log say?  Please post it here.  If it helps add a set -x to the
> >top of the script for additional verbosity.
> >
> 
> Hey Neil - this is the error, but it's not a problem with the script; presumably I'd cleaned my DPDK installation directory, so 'sed' couldn't find the defconfig file:
> "sed: can't read config/defconfig_x86_64-ivshmem-linuxapp-gcc/: Not a directory"
> 
Actually, it looks to me like you added a trailing "/" to the end of the third
argument on the script command line, so sed bombs when it tries to modify a
directory instead of a file.  Try specifying:
x86_64-ivshmem-linuxapp-gcc
instead of
x86_64-ivshmem-linuxapp-gcc/

Neil

> Thanks,
> Mark
> 
> >Neil
> 
> 

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-13 13:45  0%     ` Neil Horman
@ 2015-03-13 14:50  0%       ` Bruce Richardson
  2015-03-13 15:09  4%         ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Bruce Richardson @ 2015-03-13 14:50 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Fri, Mar 13, 2015 at 09:45:14AM -0400, Neil Horman wrote:
> On Fri, Mar 13, 2015 at 09:41:33AM +0000, Bruce Richardson wrote:
> > On Thu, Mar 12, 2015 at 03:15:40PM -0400, Neil Horman wrote:
> > > On Thu, Mar 12, 2015 at 04:54:27PM +0000, John McNamara wrote:
> > > > 
> > > > This patch is a minor extension to the recent patchset for RX/TX callbacks
> > > > based on feedback from users implementing solutions based on it.
> > > > 
> > > > The patch adds a new parameter to the RX callback to pass in the number of
> > > > available RX packets in addition to the number of dequeued packets.
> > > > This provides the RX callback functions with additional information
> > > > that can be used to decide how packets from a burst are handled.
> > > > 
> > > > The TX callback doesn't require this additional parameter so the RX
> > > > and TX callbacks no longer have the same function parameters. As such
> > > > the single RX/TX callback has been refactored into two separate callbacks.
> > > > 
> > > > Since this is an API change we hope it can be included in 2.0.0 to avoid
> > > > changing the API in a subsequent release.    
> > > > 
> > > > 
> > > > John McNamara (1):
> > > >   ethdev: added additional packet count parameter to RX callbacks
> > > > 
> > > >  examples/rxtx_callbacks/main.c |    3 +-
> > > >  lib/librte_ether/rte_ethdev.c  |    8 ++--
> > > >  lib/librte_ether/rte_ethdev.h  |   74 ++++++++++++++++++++++++++--------------
> > > >  3 files changed, 54 insertions(+), 31 deletions(-)
> > > > 
> > > > -- 
> > > > 1.7.4.1
> > > > 
> > > > 
> > > 
> > > 
> > > Well, we're well past the new feature phase of this cycle, so I would say NACK.
> > > I would also suggest that you don't need to modify ABI to accomodate this
> > > feature.  Instead just document the pkts array to be terminated by a reserved
> > > value, so that the callback can determine its size dynamically.  You could
> > > alternatively create a new api call that allows you to retrieve that information
> > > from the context of the callback.
> > > 
> > > Neil
> > > 
> > 
> > Yes, I would agree we are past the new feature phase. However, given that we
> > are making a change to the API, and a fairly small change too - adding one extra
> > parameter - we think that the benefit of including this now outweighs any risk
> > of merging the patch. It seems a bit crazy to ship a release with a new API and
> > then immediately change the API straight after release. Is it not better to
> > take the received feedback on the API and fix/improve it pre-release before it
> > gets set-in-stone?
> > 
> > /Bruce
> > 
> > 
> 
> See above, the API doesn't need to change at all to accomodate this as far as I
> can see.
> 
> Neil
Yes, there are alternative ways to see about accomplishing the same thing, but
they are not nearly as clear as adding in the extra param. That's why we'd like
to see this change go in before release, if possible. If it doesn't make 2.0
I'd like to see it in 2.1, but at the cost of having an API change and the
additional versionning and deprecation that ensues.

Regards,
/Bruce

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
  2015-03-13 14:10  5%     ` Neil Horman
@ 2015-03-13 14:25  5%       ` Kavanagh, Mark B
  2015-03-13 14:58  5%         ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Kavanagh, Mark B @ 2015-03-13 14:25 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

>On Fri, Mar 13, 2015 at 11:56:59AM +0000, Kavanagh, Mark B wrote:
>>
>>
>> >-----Original Message-----
>> >From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Neil Horman
>> >Sent: Wednesday, March 4, 2015 4:27 PM
>> >To: dev@dpdk.org
>> >Subject: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
>> >


(snip)

>> >+log "INFO" "Building DPDK $TAG1. This might take a moment"
>> >+make O=$TARGET > $VERBOSE 2>&1
>> >+
>> >+if [ $? -ne 0 ]
>> >+then
>> >+	log "INFO" "THE BUILD FAILED.  ABORTING"
>>
>> If the build fails while TAG1 is checked out, the user must check out their original
>local branch manually. I'd prefer it if the script checked out $CURRENT_BRANCH in the
>'cleanup_and_exit' function.
>>
>Sure, its in V4.

Cool.

>
>> Same applies to TAG2, if the user CTRL-C's out of the script, and to any other command
>that might fail when a particular branch/tag is checked out (for example, the 'sed'
>commands fail when I run the script; however, they work when I run them on the command
>line - I'm investigating this currently).
>>
>What does the log say?  Please post it here.  If it helps add a set -x to the
>top of the script for additional verbosity.
>

Hey Neil - this is the error, but it's not a problem with the script; presumably I'd cleaned my DPDK installation directory, so 'sed' couldn't find the defconfig file:
"sed: can't read config/defconfig_x86_64-ivshmem-linuxapp-gcc/: Not a directory"

Thanks,
Mark

>Neil

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
  2015-03-13 11:56  5%   ` Kavanagh, Mark B
@ 2015-03-13 14:10  5%     ` Neil Horman
  2015-03-13 14:25  5%       ` Kavanagh, Mark B
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-13 14:10 UTC (permalink / raw)
  To: Kavanagh, Mark B; +Cc: dev

On Fri, Mar 13, 2015 at 11:56:59AM +0000, Kavanagh, Mark B wrote:
> 
> 
> >-----Original Message-----
> >From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Neil Horman
> >Sent: Wednesday, March 4, 2015 4:27 PM
> >To: dev@dpdk.org
> >Subject: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
> >
> >There was a request for an abi validation utilty for the ongoing ABI stability
> >work.  As it turns out there is a abi compliance checker in development that
> >seems to be under active development and provides fairly detailed ABI compliance
> >reports.  Its not yet intellegent enough to understand symbol versioning, but it
> >does provide the ability to identify symbols which have changed between
> >releases, along with details of the change, and offers developers the
> >opportunity to identify which symbols then need versioning and validation for a
> >given update via manual testing.
> >
> >This script automates the use of the compliance checker between two arbitrarily
> >specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
> >and run:
> >
> >./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG
> >
> >where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
> >suitable for passing as the T= variable in the make config command.
> >
> >Note the upstream source for the abi compliance checker is here:
> >http://ispras.linuxbase.org/index.php/ABI_compliance_checker
> >
> >It generates a report for each DSO built from the requested tags that developers
> >can review to find ABI compliance issues.
> >
> >Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> >
> >---
> >
> >Change Notes:
> >
> >v2) Fixed some typos as requested by Thomas
> >
> >v3) Fixed some additional typos Thomas requested
> >    Improved script to work from detached state
> >    Added some documentation to the changelog
> >    Added some comments to the scripts
> >---
> > scripts/validate_abi.sh | 248 ++++++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 248 insertions(+)
> > create mode 100755 scripts/validate_abi.sh
> >
> >diff --git a/scripts/validate_abi.sh b/scripts/validate_abi.sh
> >new file mode 100755
> >index 0000000..899cf5f
> >--- /dev/null
> >+++ b/scripts/validate_abi.sh
> >@@ -0,0 +1,248 @@
> >+#!/bin/sh
> >+#   BSD LICENSE
> >+#
> >+#   Copyright(c) 2015 Neil Horman. All rights reserved.
> >+#   All rights reserved.
> >+#
> >+#   Redistribution and use in source and binary forms, with or without
> >+#   modification, are permitted provided that the following conditions
> >+#   are met:
> >+#
> >+#     * Redistributions of source code must retain the above copyright
> >+#       notice, this list of conditions and the following disclaimer.
> >+#     * Redistributions in binary form must reproduce the above copyright
> >+#       notice, this list of conditions and the following disclaimer in
> >+#       the documentation and/or other materials provided with the
> >+#       distribution.
> >+#
> >+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >+
> >+TAG1=$1
> >+TAG2=$2
> >+TARGET=$3
> >+ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`
> >+
> >+usage() {
> >+	echo "$0 <TAG1> <TAG2> <TARGET>"
> >+}
> >+
> >+log() {
> >+	local level=$1
> >+	shift
> >+	echo "$*"
> >+}
> >+
> >+validate_tags() {
> >+	git tag -l | grep -q "$TAG1"
> >+	if [ $? -ne 0 ]
> >+	then
> >+		echo "$TAG1 is invalid"
> >+		return
> >+	fi
> >+	git tag -l | grep -q "$TAG2"
> >+	if [ $? -ne 0 ]
> >+	then
> >+		echo "$TAG2 is invalid"
> >+		return
> >+	fi
> >+}
> >+
> >+validate_args() {
> >+	if [ -z "$TAG1" ]
> >+	then
> >+		echo "Must Specify TAG1"
> >+		return
> >+	fi
> >+	if [ -z "$TAG2" ]
> >+	then
> >+		echo "Must Specify TAG2"
> >+		return
> >+	fi
> >+	if [ -z "$TARGET" ]
> >+	then
> >+		echo "Must Specify a build target"
> >+	fi
> >+}
> >+
> >+
> >+cleanup_and_exit() {
> >+	rm -rf $ABI_DIR
> >+	exit $1
> >+}
> >+
> >+###########################################
> >+#START
> >+############################################
> >+
> >+#trap on ctrl-c to clean up
> >+trap cleanup_and_exit SIGINT
> >+
> >+#Save the current branch
> >+CURRENT_BRANCH=`git branch | grep \* | cut -d' ' -f2`
> >+
> >+if [ -z "$CURRENT_BRANCH" ]
> >+then
> >+	CURRENT_BRANCH=`git log --pretty=format:%H HEAD~1..HEAD`
> >+fi
> >+
> >+if [ -n "$VERBOSE" ]
> >+then
> >+	export VERBOSE=/dev/stdout
> >+else
> >+	export VERBOSE=/dev/null
> >+fi
> >+
> >+# Validate that we have all the arguments we need
> >+res=$(validate_args)
> >+if [ -n "$res" ]
> >+then
> >+	echo $res
> >+	usage
> >+	cleanup_and_exit 1
> >+fi
> >+
> >+# Make sure our tags exist
> >+res=$(validate_tags)
> >+if [ -n "$res" ]
> >+then
> >+	echo $res
> >+	cleanup_and_exit 1
> >+fi
> >+
> >+ABICHECK=`which abi-compliance-checker 2>/dev/null`
> >+if [ $? -ne 0 ]
> >+then
> >+	log "INFO" "Cant find abi-compliance-checker utility"
> >+	cleanup_and_exit 1
> >+fi
> >+
> >+ABIDUMP=`which abi-dumper 2>/dev/null`
> >+if [ $? -ne 0 ]
> >+then
> >+	log "INFO" "Cant find abi-dumper utility"
> >+	cleanup_and_exit 1
> >+fi
> >+
> >+log "INFO" "We're going to check and make sure that applications built"
> >+log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> >+log "INFO" "against DPDK DSOs built from tag $TAG2."
> >+log "INFO" ""
> >+
> >+# Check to make sure we have a clean tree
> >+git status | grep -q clean
> >+if [ $? -ne 0 ]
> >+then
> >+	log "WARN" "Working directory not clean, aborting"
> >+	cleanup_and_exit 1
> >+fi
> >+
> >+# Move to the root of the git tree
> >+cd $(dirname $0)/..
> >+
> >+log "INFO" "Checking out version $TAG1 of the dpdk"
> >+# Move to the old version of the tree
> >+git checkout $TAG1
> >+
> >+# Make sure we configure SHARED libraries
> >+# Also turn off IGB and KNI as those require kernel headers to build
> >+sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> >+sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> >+sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> >+
> >+# Checking abi compliance relies on using the dwarf information in
> >+# The shared objects.  Thats only included in the DSO's if we build
> >+# with -g
> >+export EXTRA_CFLAGS=-g
> >+export EXTRA_LDFLAGS=-g
> >+
> >+# Now configure the build
> >+log "INFO" "Configuring DPDK $TAG1"
> >+make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
> >+
> >+log "INFO" "Building DPDK $TAG1. This might take a moment"
> >+make O=$TARGET > $VERBOSE 2>&1
> >+
> >+if [ $? -ne 0 ]
> >+then
> >+	log "INFO" "THE BUILD FAILED.  ABORTING"
> 
> If the build fails while TAG1 is checked out, the user must check out their original local branch manually. I'd prefer it if the script checked out $CURRENT_BRANCH in the 'cleanup_and_exit' function. 
> 
Sure, its in V4.

> Same applies to TAG2, if the user CTRL-C's out of the script, and to any other command that might fail when a particular branch/tag is checked out (for example, the 'sed' commands fail when I run the script; however, they work when I run them on the command line - I'm investigating this currently).
> 
What does the log say?  Please post it here.  If it helps add a set -x to the
top of the script for additional verbosity.

Neil

^ permalink raw reply	[relevance 5%]

* [dpdk-dev] [PATCH v4] ABI: Add abi checking utility
      2015-03-04 16:26 17% ` [dpdk-dev] [PATCH v3] " Neil Horman
@ 2015-03-13 14:09 17% ` Neil Horman
  2015-03-17 15:42  5%   ` Thomas Monjalon
  2015-03-17 18:08 29% ` [dpdk-dev] [PATCH v5] " Neil Horman
  3 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-13 14:09 UTC (permalink / raw)
  To: dev

There was a request for an abi validation utilty for the ongoing ABI stability
work.  As it turns out there is a abi compliance checker in development that
seems to be under active development and provides fairly detailed ABI compliance
reports.  Its not yet intellegent enough to understand symbol versioning, but it
does provide the ability to identify symbols which have changed between
releases, along with details of the change, and offers developers the
opportunity to identify which symbols then need versioning and validation for a
given update via manual testing.

This script automates the use of the compliance checker between two arbitrarily
specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
and run:

./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG

where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
suitable for passing as the T= variable in the make config command.

Note the upstream source for the abi compliance checker is here:
http://ispras.linuxbase.org/index.php/ABI_compliance_checker

It generates a report for each DSO built from the requested tags that developers
can review to find ABI compliance issues.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

---

Change Notes:

v2) Fixed some typos as requested by Thomas

v3) Fixed some additional typos Thomas requested
    Improved script to work from detached state
    Added some documentation to the changelog
    Added some comments to the scripts

v4) Remove duplicate exports.
    Move restoration of starting branch/comit to cleanup_and_exit
---
 scripts/validate_abi.sh | 245 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100755 scripts/validate_abi.sh

diff --git a/scripts/validate_abi.sh b/scripts/validate_abi.sh
new file mode 100755
index 0000000..bdec431
--- /dev/null
+++ b/scripts/validate_abi.sh
@@ -0,0 +1,245 @@
+#!/bin/sh
+#   BSD LICENSE
+#
+#   Copyright(c) 2015 Neil Horman. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TAG1=$1
+TAG2=$2
+TARGET=$3
+ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`
+
+usage() {
+	echo "$0 <TAG1> <TAG2> <TARGET>"
+}
+
+log() {
+	local level=$1
+	shift
+	echo "$*"
+}
+
+validate_tags() {
+	git tag -l | grep -q "$TAG1"
+	if [ $? -ne 0 ]
+	then
+		echo "$TAG1 is invalid"
+		return
+	fi
+	git tag -l | grep -q "$TAG2"
+	if [ $? -ne 0 ]
+	then
+		echo "$TAG2 is invalid"
+		return
+	fi
+}
+
+validate_args() {
+	if [ -z "$TAG1" ]
+	then
+		echo "Must Specify TAG1"
+		return
+	fi
+	if [ -z "$TAG2" ]
+	then
+		echo "Must Specify TAG2"
+		return
+	fi
+	if [ -z "$TARGET" ]
+	then
+		echo "Must Specify a build target"
+	fi
+}
+
+
+cleanup_and_exit() {
+	rm -rf $ABI_DIR
+	exit $1
+	git checkout $CURRENT_BRANCH
+}
+
+###########################################
+#START
+############################################
+
+#trap on ctrl-c to clean up
+trap cleanup_and_exit SIGINT
+
+#Save the current branch
+CURRENT_BRANCH=`git branch | grep \* | cut -d' ' -f2`
+
+if [ -z "$CURRENT_BRANCH" ]
+then
+	CURRENT_BRANCH=`git log --pretty=format:%H HEAD~1..HEAD`
+fi
+
+if [ -n "$VERBOSE" ]
+then
+	export VERBOSE=/dev/stdout
+else
+	export VERBOSE=/dev/null
+fi
+
+# Validate that we have all the arguments we need
+res=$(validate_args)
+if [ -n "$res" ]
+then
+	echo $res
+	usage
+	cleanup_and_exit 1
+fi
+
+# Make sure our tags exist
+res=$(validate_tags)
+if [ -n "$res" ]
+then
+	echo $res
+	cleanup_and_exit 1
+fi
+
+ABICHECK=`which abi-compliance-checker 2>/dev/null`
+if [ $? -ne 0 ]
+then
+	log "INFO" "Cant find abi-compliance-checker utility"
+	cleanup_and_exit 1
+fi
+
+ABIDUMP=`which abi-dumper 2>/dev/null`
+if [ $? -ne 0 ]
+then
+	log "INFO" "Cant find abi-dumper utility"
+	cleanup_and_exit 1
+fi
+
+log "INFO" "We're going to check and make sure that applications built"
+log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
+log "INFO" "against DPDK DSOs built from tag $TAG2."
+log "INFO" ""
+
+# Check to make sure we have a clean tree
+git status | grep -q clean
+if [ $? -ne 0 ]
+then
+	log "WARN" "Working directory not clean, aborting"
+	cleanup_and_exit 1
+fi
+
+# Move to the root of the git tree
+cd $(dirname $0)/..
+
+log "INFO" "Checking out version $TAG1 of the dpdk"
+# Move to the old version of the tree
+git checkout $TAG1
+
+# Make sure we configure SHARED libraries
+# Also turn off IGB and KNI as those require kernel headers to build
+sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
+
+# Checking abi compliance relies on using the dwarf information in
+# The shared objects.  Thats only included in the DSO's if we build
+# with -g
+export EXTRA_CFLAGS=-g
+export EXTRA_LDFLAGS=-g
+
+# Now configure the build
+log "INFO" "Configuring DPDK $TAG1"
+make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
+
+log "INFO" "Building DPDK $TAG1. This might take a moment"
+make O=$TARGET > $VERBOSE 2>&1
+
+if [ $? -ne 0 ]
+then
+	log "INFO" "THE BUILD FAILED.  ABORTING"
+	cleanup_and_exit 1
+fi
+
+# Move to the lib directory
+cd $TARGET/lib
+log "INFO" "COLLECTING ABI INFORMATION FOR $TAG1"
+for i in `ls *.so`
+do
+	$ABIDUMP $i -o $ABI_DIR/$i-ABI-0.dump -lver $TAG1
+done
+cd ../..
+
+# Now clean the tree, checkout the second tag, and rebuild
+git clean -f -d
+git reset --hard
+# Move to the new version of the tree
+log "INFO" "Checking out version $TAG2 of the dpdk"
+git checkout $TAG2
+
+# Make sure we configure SHARED libraries
+# Also turn off IGB and KNI as those require kernel headers to build
+sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
+
+# Now configure the build
+log "INFO" "Configuring DPDK $TAG2"
+make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
+
+log "INFO" "Building DPDK $TAG2. This might take a moment"
+make O=$TARGET > $VERBOSE 2>&1
+
+if [ $? -ne 0 ]
+then
+	log "INFO" "THE BUILD FAILED.  ABORTING"
+	cleanup_and_exit 1
+fi
+
+cd $TARGET/lib
+log "INFO" "COLLECTING ABI INFORMATION FOR $TAG2"
+for i in `ls *.so`
+do
+	$ABIDUMP $i -o $ABI_DIR/$i-ABI-1.dump -lver $TAG2
+done
+cd ../..
+
+# Start comparison of ABI dumps
+for i in `ls $ABI_DIR/*-1.dump`
+do
+	NEWNAME=`basename $i`
+	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
+	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
+
+	if [ ! -f $ABI_DIR/$OLDNAME ]
+	then
+		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
+	fi
+
+	#compare the abi dumps
+	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
+done
+
+git reset --hard
+log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
+cleanup_and_exit 0
+
+
-- 
2.1.0

^ permalink raw reply	[relevance 17%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-13  9:41  0%   ` Bruce Richardson
@ 2015-03-13 13:45  0%     ` Neil Horman
  2015-03-13 14:50  0%       ` Bruce Richardson
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-13 13:45 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev

On Fri, Mar 13, 2015 at 09:41:33AM +0000, Bruce Richardson wrote:
> On Thu, Mar 12, 2015 at 03:15:40PM -0400, Neil Horman wrote:
> > On Thu, Mar 12, 2015 at 04:54:27PM +0000, John McNamara wrote:
> > > 
> > > This patch is a minor extension to the recent patchset for RX/TX callbacks
> > > based on feedback from users implementing solutions based on it.
> > > 
> > > The patch adds a new parameter to the RX callback to pass in the number of
> > > available RX packets in addition to the number of dequeued packets.
> > > This provides the RX callback functions with additional information
> > > that can be used to decide how packets from a burst are handled.
> > > 
> > > The TX callback doesn't require this additional parameter so the RX
> > > and TX callbacks no longer have the same function parameters. As such
> > > the single RX/TX callback has been refactored into two separate callbacks.
> > > 
> > > Since this is an API change we hope it can be included in 2.0.0 to avoid
> > > changing the API in a subsequent release.    
> > > 
> > > 
> > > John McNamara (1):
> > >   ethdev: added additional packet count parameter to RX callbacks
> > > 
> > >  examples/rxtx_callbacks/main.c |    3 +-
> > >  lib/librte_ether/rte_ethdev.c  |    8 ++--
> > >  lib/librte_ether/rte_ethdev.h  |   74 ++++++++++++++++++++++++++--------------
> > >  3 files changed, 54 insertions(+), 31 deletions(-)
> > > 
> > > -- 
> > > 1.7.4.1
> > > 
> > > 
> > 
> > 
> > Well, we're well past the new feature phase of this cycle, so I would say NACK.
> > I would also suggest that you don't need to modify ABI to accomodate this
> > feature.  Instead just document the pkts array to be terminated by a reserved
> > value, so that the callback can determine its size dynamically.  You could
> > alternatively create a new api call that allows you to retrieve that information
> > from the context of the callback.
> > 
> > Neil
> > 
> 
> Yes, I would agree we are past the new feature phase. However, given that we
> are making a change to the API, and a fairly small change too - adding one extra
> parameter - we think that the benefit of including this now outweighs any risk
> of merging the patch. It seems a bit crazy to ship a release with a new API and
> then immediately change the API straight after release. Is it not better to
> take the received feedback on the API and fix/improve it pre-release before it
> gets set-in-stone?
> 
> /Bruce
> 
> 

See above, the API doesn't need to change at all to accomodate this as far as I
can see.

Neil

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
  2015-03-04 16:26 17% ` [dpdk-dev] [PATCH v3] " Neil Horman
  2015-03-04 16:49  9%   ` Thomas Monjalon
@ 2015-03-13 11:56  5%   ` Kavanagh, Mark B
  2015-03-13 14:10  5%     ` Neil Horman
  1 sibling, 1 reply; 200+ results
From: Kavanagh, Mark B @ 2015-03-13 11:56 UTC (permalink / raw)
  To: Neil Horman, dev



>-----Original Message-----
>From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Neil Horman
>Sent: Wednesday, March 4, 2015 4:27 PM
>To: dev@dpdk.org
>Subject: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
>
>There was a request for an abi validation utilty for the ongoing ABI stability
>work.  As it turns out there is a abi compliance checker in development that
>seems to be under active development and provides fairly detailed ABI compliance
>reports.  Its not yet intellegent enough to understand symbol versioning, but it
>does provide the ability to identify symbols which have changed between
>releases, along with details of the change, and offers developers the
>opportunity to identify which symbols then need versioning and validation for a
>given update via manual testing.
>
>This script automates the use of the compliance checker between two arbitrarily
>specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
>and run:
>
>./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG
>
>where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
>suitable for passing as the T= variable in the make config command.
>
>Note the upstream source for the abi compliance checker is here:
>http://ispras.linuxbase.org/index.php/ABI_compliance_checker
>
>It generates a report for each DSO built from the requested tags that developers
>can review to find ABI compliance issues.
>
>Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
>
>---
>
>Change Notes:
>
>v2) Fixed some typos as requested by Thomas
>
>v3) Fixed some additional typos Thomas requested
>    Improved script to work from detached state
>    Added some documentation to the changelog
>    Added some comments to the scripts
>---
> scripts/validate_abi.sh | 248 ++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 248 insertions(+)
> create mode 100755 scripts/validate_abi.sh
>
>diff --git a/scripts/validate_abi.sh b/scripts/validate_abi.sh
>new file mode 100755
>index 0000000..899cf5f
>--- /dev/null
>+++ b/scripts/validate_abi.sh
>@@ -0,0 +1,248 @@
>+#!/bin/sh
>+#   BSD LICENSE
>+#
>+#   Copyright(c) 2015 Neil Horman. All rights reserved.
>+#   All rights reserved.
>+#
>+#   Redistribution and use in source and binary forms, with or without
>+#   modification, are permitted provided that the following conditions
>+#   are met:
>+#
>+#     * Redistributions of source code must retain the above copyright
>+#       notice, this list of conditions and the following disclaimer.
>+#     * Redistributions in binary form must reproduce the above copyright
>+#       notice, this list of conditions and the following disclaimer in
>+#       the documentation and/or other materials provided with the
>+#       distribution.
>+#
>+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>+
>+TAG1=$1
>+TAG2=$2
>+TARGET=$3
>+ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`
>+
>+usage() {
>+	echo "$0 <TAG1> <TAG2> <TARGET>"
>+}
>+
>+log() {
>+	local level=$1
>+	shift
>+	echo "$*"
>+}
>+
>+validate_tags() {
>+	git tag -l | grep -q "$TAG1"
>+	if [ $? -ne 0 ]
>+	then
>+		echo "$TAG1 is invalid"
>+		return
>+	fi
>+	git tag -l | grep -q "$TAG2"
>+	if [ $? -ne 0 ]
>+	then
>+		echo "$TAG2 is invalid"
>+		return
>+	fi
>+}
>+
>+validate_args() {
>+	if [ -z "$TAG1" ]
>+	then
>+		echo "Must Specify TAG1"
>+		return
>+	fi
>+	if [ -z "$TAG2" ]
>+	then
>+		echo "Must Specify TAG2"
>+		return
>+	fi
>+	if [ -z "$TARGET" ]
>+	then
>+		echo "Must Specify a build target"
>+	fi
>+}
>+
>+
>+cleanup_and_exit() {
>+	rm -rf $ABI_DIR
>+	exit $1
>+}
>+
>+###########################################
>+#START
>+############################################
>+
>+#trap on ctrl-c to clean up
>+trap cleanup_and_exit SIGINT
>+
>+#Save the current branch
>+CURRENT_BRANCH=`git branch | grep \* | cut -d' ' -f2`
>+
>+if [ -z "$CURRENT_BRANCH" ]
>+then
>+	CURRENT_BRANCH=`git log --pretty=format:%H HEAD~1..HEAD`
>+fi
>+
>+if [ -n "$VERBOSE" ]
>+then
>+	export VERBOSE=/dev/stdout
>+else
>+	export VERBOSE=/dev/null
>+fi
>+
>+# Validate that we have all the arguments we need
>+res=$(validate_args)
>+if [ -n "$res" ]
>+then
>+	echo $res
>+	usage
>+	cleanup_and_exit 1
>+fi
>+
>+# Make sure our tags exist
>+res=$(validate_tags)
>+if [ -n "$res" ]
>+then
>+	echo $res
>+	cleanup_and_exit 1
>+fi
>+
>+ABICHECK=`which abi-compliance-checker 2>/dev/null`
>+if [ $? -ne 0 ]
>+then
>+	log "INFO" "Cant find abi-compliance-checker utility"
>+	cleanup_and_exit 1
>+fi
>+
>+ABIDUMP=`which abi-dumper 2>/dev/null`
>+if [ $? -ne 0 ]
>+then
>+	log "INFO" "Cant find abi-dumper utility"
>+	cleanup_and_exit 1
>+fi
>+
>+log "INFO" "We're going to check and make sure that applications built"
>+log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
>+log "INFO" "against DPDK DSOs built from tag $TAG2."
>+log "INFO" ""
>+
>+# Check to make sure we have a clean tree
>+git status | grep -q clean
>+if [ $? -ne 0 ]
>+then
>+	log "WARN" "Working directory not clean, aborting"
>+	cleanup_and_exit 1
>+fi
>+
>+# Move to the root of the git tree
>+cd $(dirname $0)/..
>+
>+log "INFO" "Checking out version $TAG1 of the dpdk"
>+# Move to the old version of the tree
>+git checkout $TAG1
>+
>+# Make sure we configure SHARED libraries
>+# Also turn off IGB and KNI as those require kernel headers to build
>+sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
>+sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
>+sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
>+
>+# Checking abi compliance relies on using the dwarf information in
>+# The shared objects.  Thats only included in the DSO's if we build
>+# with -g
>+export EXTRA_CFLAGS=-g
>+export EXTRA_LDFLAGS=-g
>+
>+# Now configure the build
>+log "INFO" "Configuring DPDK $TAG1"
>+make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
>+
>+log "INFO" "Building DPDK $TAG1. This might take a moment"
>+make O=$TARGET > $VERBOSE 2>&1
>+
>+if [ $? -ne 0 ]
>+then
>+	log "INFO" "THE BUILD FAILED.  ABORTING"

If the build fails while TAG1 is checked out, the user must check out their original local branch manually. I'd prefer it if the script checked out $CURRENT_BRANCH in the 'cleanup_and_exit' function. 

Same applies to TAG2, if the user CTRL-C's out of the script, and to any other command that might fail when a particular branch/tag is checked out (for example, the 'sed' commands fail when I run the script; however, they work when I run them on the command line - I'm investigating this currently).

>+	cleanup_and_exit 1
>+fi
>+
>+# Move to the lib directory
>+cd $TARGET/lib
>+log "INFO" "COLLECTING ABI INFORMATION FOR $TAG1"
>+for i in `ls *.so`
>+do
>+	$ABIDUMP $i -o $ABI_DIR/$i-ABI-0.dump -lver $TAG1
>+done
>+cd ../..
>+
>+# Now clean the tree, checkout the second tag, and rebuild
>+git clean -f -d
>+git reset --hard
>+# Move to the new version of the tree
>+log "INFO" "Checking out version $TAG2 of the dpdk"
>+git checkout $TAG2
>+
>+export EXTRA_CFLAGS=-g
>+export EXTRA_LDFLAGS=-g
>+
>+# Make sure we configure SHARED libraries
>+# Also turn off IGB and KNI as those require kernel headers to build
>+sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
>+sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
>+sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
>+
>+# Now configure the build
>+log "INFO" "Configuring DPDK $TAG2"
>+make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
>+
>+log "INFO" "Building DPDK $TAG2. This might take a moment"
>+make O=$TARGET > $VERBOSE 2>&1
>+
>+if [ $? -ne 0 ]
>+then
>+	log "INFO" "THE BUILD FAILED.  ABORTING"
>+	cleanup_and_exit 1
>+fi
>+
>+cd $TARGET/lib
>+log "INFO" "COLLECTING ABI INFORMATION FOR $TAG2"
>+for i in `ls *.so`
>+do
>+	$ABIDUMP $i -o $ABI_DIR/$i-ABI-1.dump -lver $TAG2
>+done
>+cd ../..
>+
>+# Start comparison of ABI dumps
>+for i in `ls $ABI_DIR/*-1.dump`
>+do
>+	NEWNAME=`basename $i`
>+	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
>+	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
>+
>+	if [ ! -f $ABI_DIR/$OLDNAME ]
>+	then
>+		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
>+	fi
>+
>+	#compare the abi dumps
>+	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
>+done
>+
>+git reset --hard
>+git checkout $CURRENT_BRANCH
>+log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
>+cleanup_and_exit 0
>+
>+
>--
>2.1.0

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  2015-03-12 19:15  3% ` Neil Horman
@ 2015-03-13  9:41  0%   ` Bruce Richardson
  2015-03-13 13:45  0%     ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Bruce Richardson @ 2015-03-13  9:41 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Thu, Mar 12, 2015 at 03:15:40PM -0400, Neil Horman wrote:
> On Thu, Mar 12, 2015 at 04:54:27PM +0000, John McNamara wrote:
> > 
> > This patch is a minor extension to the recent patchset for RX/TX callbacks
> > based on feedback from users implementing solutions based on it.
> > 
> > The patch adds a new parameter to the RX callback to pass in the number of
> > available RX packets in addition to the number of dequeued packets.
> > This provides the RX callback functions with additional information
> > that can be used to decide how packets from a burst are handled.
> > 
> > The TX callback doesn't require this additional parameter so the RX
> > and TX callbacks no longer have the same function parameters. As such
> > the single RX/TX callback has been refactored into two separate callbacks.
> > 
> > Since this is an API change we hope it can be included in 2.0.0 to avoid
> > changing the API in a subsequent release.    
> > 
> > 
> > John McNamara (1):
> >   ethdev: added additional packet count parameter to RX callbacks
> > 
> >  examples/rxtx_callbacks/main.c |    3 +-
> >  lib/librte_ether/rte_ethdev.c  |    8 ++--
> >  lib/librte_ether/rte_ethdev.h  |   74 ++++++++++++++++++++++++++--------------
> >  3 files changed, 54 insertions(+), 31 deletions(-)
> > 
> > -- 
> > 1.7.4.1
> > 
> > 
> 
> 
> Well, we're well past the new feature phase of this cycle, so I would say NACK.
> I would also suggest that you don't need to modify ABI to accomodate this
> feature.  Instead just document the pkts array to be terminated by a reserved
> value, so that the callback can determine its size dynamically.  You could
> alternatively create a new api call that allows you to retrieve that information
> from the context of the callback.
> 
> Neil
> 

Yes, I would agree we are past the new feature phase. However, given that we
are making a change to the API, and a fairly small change too - adding one extra
parameter - we think that the benefit of including this now outweighs any risk
of merging the patch. It seems a bit crazy to ship a release with a new API and
then immediately change the API straight after release. Is it not better to
take the received feedback on the API and fix/improve it pre-release before it
gets set-in-stone?

/Bruce

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
  2015-03-11 19:36  5%       ` Neil Horman
@ 2015-03-13  8:51  5%         ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-03-13  8:51 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

Hi Neil,

2015-03-11 15:36, Neil Horman:
> On Thu, Mar 05, 2015 at 11:57:27AM -0500, Neil Horman wrote:
> > On Wed, Mar 04, 2015 at 05:49:50PM +0100, Thomas Monjalon wrote:
> > > 2015-03-04 11:26, Neil Horman:
> > > > +#trap on ctrl-c to clean up
> > > > +trap cleanup_and_exit SIGINT
> > > 
> > > I think INT is preffered over SIGINT.
> > > You may also add QUIT and TERM.
> > > With QUIT, you can replace cleanup_and_exit calls by a simple exit.
> > > 
> > > > +	CURRENT_BRANCH=`git log --pretty=format:%H HEAD~1..HEAD`
> > > 
> > > May be simpler "git log -1 --format=%H"
> > > 
> > It might be, but the above is equivalent, and --format is a more recent git-log
> > feature.  Older versions still require --pretty=format
> > 
> > > > +log "INFO" "We're going to check and make sure that applications built"
> > > > +log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> > > > +log "INFO" "against DPDK DSOs built from tag $TAG2."
> > > 
> > > I think it may be removed as no app is run.
> > > 
> > The above doesn't indicate that an application will be run, only that the
> > purpose of this script is to ensure that older applications will still run,
> > which I think is appropriate.
> > 
> > > > +# Make sure we configure SHARED libraries
> > > > +# Also turn off IGB and KNI as those require kernel headers to build
> > > > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > > > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > > > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> > > 
> > > So you prefer modifying defconfig instead of .config, right?
> > > (you sent it while I was answering on v2)
> > > 
> > Yes, correct.
> > 
> > > > +# Checking abi compliance relies on using the dwarf information in
> > > > +# The shared objects.  Thats only included in the DSO's if we build
> > > > +# with -g
> > > > +export EXTRA_CFLAGS=-g
> > > > +export EXTRA_LDFLAGS=-g
> > > [...]
> > > > +export EXTRA_CFLAGS=-g
> > > > +export EXTRA_LDFLAGS=-g
> > > 
> > > Already exported.
> > > 
> > Yeah, I'll clean that up later.

OK, could you send a v4 please?

> > > > +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
> > > 
> > > Could be OLDNAME=$(basename $i 1.dump)0.dump
> > > 
> > > > +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
> > > 
> > > Could be LIBNAME=$(basename $i -ABI-1.dump)
> > > 
> > It could be, but I prefer the clarity of the sed replacement.
> > 
> > Neil
> > 
> > > Thanks
> > > 
> > > 
> > 
> 
> Ping Thomas, is this going to make 2.0?

Yes sure, waiting a v4.

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback
  @ 2015-03-12 19:15  3% ` Neil Horman
  2015-03-13  9:41  0%   ` Bruce Richardson
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-12 19:15 UTC (permalink / raw)
  To: John McNamara; +Cc: dev

On Thu, Mar 12, 2015 at 04:54:27PM +0000, John McNamara wrote:
> 
> This patch is a minor extension to the recent patchset for RX/TX callbacks
> based on feedback from users implementing solutions based on it.
> 
> The patch adds a new parameter to the RX callback to pass in the number of
> available RX packets in addition to the number of dequeued packets.
> This provides the RX callback functions with additional information
> that can be used to decide how packets from a burst are handled.
> 
> The TX callback doesn't require this additional parameter so the RX
> and TX callbacks no longer have the same function parameters. As such
> the single RX/TX callback has been refactored into two separate callbacks.
> 
> Since this is an API change we hope it can be included in 2.0.0 to avoid
> changing the API in a subsequent release.    
> 
> 
> John McNamara (1):
>   ethdev: added additional packet count parameter to RX callbacks
> 
>  examples/rxtx_callbacks/main.c |    3 +-
>  lib/librte_ether/rte_ethdev.c  |    8 ++--
>  lib/librte_ether/rte_ethdev.h  |   74 ++++++++++++++++++++++++++--------------
>  3 files changed, 54 insertions(+), 31 deletions(-)
> 
> -- 
> 1.7.4.1
> 
> 


Well, we're well past the new feature phase of this cycle, so I would say NACK.
I would also suggest that you don't need to modify ABI to accomodate this
feature.  Instead just document the pkts array to be terminated by a reserved
value, so that the callback can determine its size dynamically.  You could
alternatively create a new api call that allows you to retrieve that information
from the context of the callback.

Neil

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
  2015-03-05 16:57  5%     ` Neil Horman
@ 2015-03-11 19:36  5%       ` Neil Horman
  2015-03-13  8:51  5%         ` Thomas Monjalon
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-11 19:36 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Thu, Mar 05, 2015 at 11:57:27AM -0500, Neil Horman wrote:
> On Wed, Mar 04, 2015 at 05:49:50PM +0100, Thomas Monjalon wrote:
> > 2015-03-04 11:26, Neil Horman:
> > > +#trap on ctrl-c to clean up
> > > +trap cleanup_and_exit SIGINT
> > 
> > I think INT is preffered over SIGINT.
> > You may also add QUIT and TERM.
> > With QUIT, you can replace cleanup_and_exit calls by a simple exit.
> > 
> > > +	CURRENT_BRANCH=`git log --pretty=format:%H HEAD~1..HEAD`
> > 
> > May be simpler "git log -1 --format=%H"
> > 
> It might be, but the above is equivalent, and --format is a more recent git-log
> feature.  Older versions still require --pretty=format
> 
> > > +log "INFO" "We're going to check and make sure that applications built"
> > > +log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> > > +log "INFO" "against DPDK DSOs built from tag $TAG2."
> > 
> > I think it may be removed as no app is run.
> > 
> The above doesn't indicate that an application will be run, only that the
> purpose of this script is to ensure that older applications will still run,
> which I think is appropriate.
> 
> > > +# Make sure we configure SHARED libraries
> > > +# Also turn off IGB and KNI as those require kernel headers to build
> > > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> > 
> > So you prefer modifying defconfig instead of .config, right?
> > (you sent it while I was answering on v2)
> > 
> Yes, correct.
> 
> > > +# Checking abi compliance relies on using the dwarf information in
> > > +# The shared objects.  Thats only included in the DSO's if we build
> > > +# with -g
> > > +export EXTRA_CFLAGS=-g
> > > +export EXTRA_LDFLAGS=-g
> > [...]
> > > +export EXTRA_CFLAGS=-g
> > > +export EXTRA_LDFLAGS=-g
> > 
> > Already exported.
> > 
> Yeah, I'll clean that up later.
> 
> > > +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
> > 
> > Could be OLDNAME=$(basename $i 1.dump)0.dump
> > 
> > > +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
> > 
> > Could be LIBNAME=$(basename $i -ABI-1.dump)
> > 
> It could be, but I prefer the clarity of the sed replacement.
> 
> Neil
> 
> > Thanks
> > 
> > 
> 

Ping Thomas, is this going to make 2.0?

Thanks
Neil

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [RFC] resolve conflict between net/ethernet.h and rte_ethdev.h
  2015-03-10 13:29  3%     ` Thomas Monjalon
@ 2015-03-10 15:46  0%       ` Stephen Hemminger
  0 siblings, 0 replies; 200+ results
From: Stephen Hemminger @ 2015-03-10 15:46 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Tue, 10 Mar 2015 14:29:11 +0100
Thomas Monjalon <thomas.monjalon@6wind.com> wrote:

> Hi Stephen,
> If, by any chance, you are willing to reply to this thread,
> maybe you would like to send a non-rfc patch with these 2 additions:
> - rename addr_bytes to ether_addr_octet everywhere
> - mark addr_bytes as deprecated in doc/guides/rel_notes/abi.rst

I can respin with the global changes, just not a big fan of deprecation
since it only pisses off users.

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [RFC] resolve conflict between net/ethernet.h and rte_ethdev.h
  @ 2015-03-10 13:29  3%     ` Thomas Monjalon
  2015-03-10 15:46  0%       ` Stephen Hemminger
  0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2015-03-10 13:29 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

2015-03-05 00:16, Thomas Monjalon:
> 2015-01-06 11:44, Thomas Monjalon:
> > 2014-12-27 15:13, Stephen Hemminger:
> > > This is a patch to address the conflict between <net/ethernet.h>
> > > and the definitions in <rte_ethdev.h>. It has two side effects
> > > worth discussion:
> > >   1. It forces inclusion of net/ethernet.h
> > >   2. It has definition to deal with the differing structure elements
> > >      in the two versions of struct ether_addr.
> > > 
> > > By doing this ether_ntoa and related functions can be used without
> > > messing with prototypes.
> > > 
> > > Alternative is more complex #ifdef magic like linux/libc-compat.h
> > 
> > [...]
> > 
> > > +#include <net/ethernet.h>
> > 
> > [...]
> > 
> > > +/* Deprecated definition to allow for compatiablity with net/ethernet.h */
> > > +#define addr_bytes	ether_addr_octet
> > 
> > This is defining a common identifier without prefix.
> > So it will be forbidden to use addr_bytes as variable name.
> > I understand you are trying to keep compatibility with both structures,
> > but the drawback is really nasty.
> > Is there another solution? Or at least, we could mark it as deprecated and
> > remove it in release 2.1.
> 
> ping
> Any opinion?

Hi Stephen,
If, by any chance, you are willing to reply to this thread,
maybe you would like to send a non-rfc patch with these 2 additions:
- rename addr_bytes to ether_addr_octet everywhere
- mark addr_bytes as deprecated in doc/guides/rel_notes/abi.rst

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
  2015-03-04 16:49  9%   ` Thomas Monjalon
@ 2015-03-05 16:57  5%     ` Neil Horman
  2015-03-11 19:36  5%       ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-05 16:57 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Wed, Mar 04, 2015 at 05:49:50PM +0100, Thomas Monjalon wrote:
> 2015-03-04 11:26, Neil Horman:
> > +#trap on ctrl-c to clean up
> > +trap cleanup_and_exit SIGINT
> 
> I think INT is preffered over SIGINT.
> You may also add QUIT and TERM.
> With QUIT, you can replace cleanup_and_exit calls by a simple exit.
> 
> > +	CURRENT_BRANCH=`git log --pretty=format:%H HEAD~1..HEAD`
> 
> May be simpler "git log -1 --format=%H"
> 
It might be, but the above is equivalent, and --format is a more recent git-log
feature.  Older versions still require --pretty=format

> > +log "INFO" "We're going to check and make sure that applications built"
> > +log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> > +log "INFO" "against DPDK DSOs built from tag $TAG2."
> 
> I think it may be removed as no app is run.
> 
The above doesn't indicate that an application will be run, only that the
purpose of this script is to ensure that older applications will still run,
which I think is appropriate.

> > +# Make sure we configure SHARED libraries
> > +# Also turn off IGB and KNI as those require kernel headers to build
> > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> 
> So you prefer modifying defconfig instead of .config, right?
> (you sent it while I was answering on v2)
> 
Yes, correct.

> > +# Checking abi compliance relies on using the dwarf information in
> > +# The shared objects.  Thats only included in the DSO's if we build
> > +# with -g
> > +export EXTRA_CFLAGS=-g
> > +export EXTRA_LDFLAGS=-g
> [...]
> > +export EXTRA_CFLAGS=-g
> > +export EXTRA_LDFLAGS=-g
> 
> Already exported.
> 
Yeah, I'll clean that up later.

> > +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
> 
> Could be OLDNAME=$(basename $i 1.dump)0.dump
> 
> > +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
> 
> Could be LIBNAME=$(basename $i -ABI-1.dump)
> 
It could be, but I prefer the clarity of the sed replacement.

Neil

> Thanks
> 
> 

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
  2015-03-04 16:26 17% ` [dpdk-dev] [PATCH v3] " Neil Horman
@ 2015-03-04 16:49  9%   ` Thomas Monjalon
  2015-03-05 16:57  5%     ` Neil Horman
  2015-03-13 11:56  5%   ` Kavanagh, Mark B
  1 sibling, 1 reply; 200+ results
From: Thomas Monjalon @ 2015-03-04 16:49 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

2015-03-04 11:26, Neil Horman:
> +#trap on ctrl-c to clean up
> +trap cleanup_and_exit SIGINT

I think INT is preffered over SIGINT.
You may also add QUIT and TERM.
With QUIT, you can replace cleanup_and_exit calls by a simple exit.

> +	CURRENT_BRANCH=`git log --pretty=format:%H HEAD~1..HEAD`

May be simpler "git log -1 --format=%H"

> +log "INFO" "We're going to check and make sure that applications built"
> +log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> +log "INFO" "against DPDK DSOs built from tag $TAG2."

I think it may be removed as no app is run.

> +# Make sure we configure SHARED libraries
> +# Also turn off IGB and KNI as those require kernel headers to build
> +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET

So you prefer modifying defconfig instead of .config, right?
(you sent it while I was answering on v2)

> +# Checking abi compliance relies on using the dwarf information in
> +# The shared objects.  Thats only included in the DSO's if we build
> +# with -g
> +export EXTRA_CFLAGS=-g
> +export EXTRA_LDFLAGS=-g
[...]
> +export EXTRA_CFLAGS=-g
> +export EXTRA_LDFLAGS=-g

Already exported.

> +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`

Could be OLDNAME=$(basename $i 1.dump)0.dump

> +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`

Could be LIBNAME=$(basename $i -ABI-1.dump)

Thanks

^ permalink raw reply	[relevance 9%]

* [dpdk-dev] [PATCH v3] ABI: Add abi checking utility
    @ 2015-03-04 16:26 17% ` Neil Horman
  2015-03-04 16:49  9%   ` Thomas Monjalon
  2015-03-13 11:56  5%   ` Kavanagh, Mark B
  2015-03-13 14:09 17% ` [dpdk-dev] [PATCH v4] " Neil Horman
  2015-03-17 18:08 29% ` [dpdk-dev] [PATCH v5] " Neil Horman
  3 siblings, 2 replies; 200+ results
From: Neil Horman @ 2015-03-04 16:26 UTC (permalink / raw)
  To: dev

There was a request for an abi validation utilty for the ongoing ABI stability
work.  As it turns out there is a abi compliance checker in development that
seems to be under active development and provides fairly detailed ABI compliance
reports.  Its not yet intellegent enough to understand symbol versioning, but it
does provide the ability to identify symbols which have changed between
releases, along with details of the change, and offers developers the
opportunity to identify which symbols then need versioning and validation for a
given update via manual testing.

This script automates the use of the compliance checker between two arbitrarily
specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
and run:

./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG

where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
suitable for passing as the T= variable in the make config command.

Note the upstream source for the abi compliance checker is here:
http://ispras.linuxbase.org/index.php/ABI_compliance_checker

It generates a report for each DSO built from the requested tags that developers
can review to find ABI compliance issues.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

---

Change Notes:

v2) Fixed some typos as requested by Thomas

v3) Fixed some additional typos Thomas requested
    Improved script to work from detached state
    Added some documentation to the changelog
    Added some comments to the scripts
---
 scripts/validate_abi.sh | 248 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 248 insertions(+)
 create mode 100755 scripts/validate_abi.sh

diff --git a/scripts/validate_abi.sh b/scripts/validate_abi.sh
new file mode 100755
index 0000000..899cf5f
--- /dev/null
+++ b/scripts/validate_abi.sh
@@ -0,0 +1,248 @@
+#!/bin/sh
+#   BSD LICENSE
+#
+#   Copyright(c) 2015 Neil Horman. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TAG1=$1
+TAG2=$2
+TARGET=$3
+ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`
+
+usage() {
+	echo "$0 <TAG1> <TAG2> <TARGET>"
+}
+
+log() {
+	local level=$1
+	shift
+	echo "$*"
+}
+
+validate_tags() {
+	git tag -l | grep -q "$TAG1"
+	if [ $? -ne 0 ]
+	then
+		echo "$TAG1 is invalid"
+		return
+	fi
+	git tag -l | grep -q "$TAG2"
+	if [ $? -ne 0 ]
+	then
+		echo "$TAG2 is invalid"
+		return
+	fi
+}
+
+validate_args() {
+	if [ -z "$TAG1" ]
+	then
+		echo "Must Specify TAG1"
+		return
+	fi
+	if [ -z "$TAG2" ]
+	then
+		echo "Must Specify TAG2"
+		return
+	fi
+	if [ -z "$TARGET" ]
+	then
+		echo "Must Specify a build target"
+	fi
+}
+
+
+cleanup_and_exit() {
+	rm -rf $ABI_DIR
+	exit $1
+}
+
+###########################################
+#START
+############################################
+
+#trap on ctrl-c to clean up
+trap cleanup_and_exit SIGINT
+
+#Save the current branch
+CURRENT_BRANCH=`git branch | grep \* | cut -d' ' -f2`
+
+if [ -z "$CURRENT_BRANCH" ]
+then
+	CURRENT_BRANCH=`git log --pretty=format:%H HEAD~1..HEAD`
+fi
+
+if [ -n "$VERBOSE" ]
+then
+	export VERBOSE=/dev/stdout
+else
+	export VERBOSE=/dev/null
+fi
+
+# Validate that we have all the arguments we need
+res=$(validate_args)
+if [ -n "$res" ]
+then
+	echo $res
+	usage
+	cleanup_and_exit 1
+fi
+
+# Make sure our tags exist
+res=$(validate_tags)
+if [ -n "$res" ]
+then
+	echo $res
+	cleanup_and_exit 1
+fi
+
+ABICHECK=`which abi-compliance-checker 2>/dev/null`
+if [ $? -ne 0 ]
+then
+	log "INFO" "Cant find abi-compliance-checker utility"
+	cleanup_and_exit 1
+fi
+
+ABIDUMP=`which abi-dumper 2>/dev/null`
+if [ $? -ne 0 ]
+then
+	log "INFO" "Cant find abi-dumper utility"
+	cleanup_and_exit 1
+fi
+
+log "INFO" "We're going to check and make sure that applications built"
+log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
+log "INFO" "against DPDK DSOs built from tag $TAG2."
+log "INFO" ""
+
+# Check to make sure we have a clean tree
+git status | grep -q clean
+if [ $? -ne 0 ]
+then
+	log "WARN" "Working directory not clean, aborting"
+	cleanup_and_exit 1
+fi
+
+# Move to the root of the git tree
+cd $(dirname $0)/..
+
+log "INFO" "Checking out version $TAG1 of the dpdk"
+# Move to the old version of the tree
+git checkout $TAG1
+
+# Make sure we configure SHARED libraries
+# Also turn off IGB and KNI as those require kernel headers to build
+sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
+
+# Checking abi compliance relies on using the dwarf information in
+# The shared objects.  Thats only included in the DSO's if we build
+# with -g
+export EXTRA_CFLAGS=-g
+export EXTRA_LDFLAGS=-g
+
+# Now configure the build
+log "INFO" "Configuring DPDK $TAG1"
+make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
+
+log "INFO" "Building DPDK $TAG1. This might take a moment"
+make O=$TARGET > $VERBOSE 2>&1
+
+if [ $? -ne 0 ]
+then
+	log "INFO" "THE BUILD FAILED.  ABORTING"
+	cleanup_and_exit 1
+fi
+
+# Move to the lib directory
+cd $TARGET/lib
+log "INFO" "COLLECTING ABI INFORMATION FOR $TAG1"
+for i in `ls *.so`
+do
+	$ABIDUMP $i -o $ABI_DIR/$i-ABI-0.dump -lver $TAG1
+done
+cd ../..
+
+# Now clean the tree, checkout the second tag, and rebuild
+git clean -f -d
+git reset --hard
+# Move to the new version of the tree
+log "INFO" "Checking out version $TAG2 of the dpdk"
+git checkout $TAG2
+
+export EXTRA_CFLAGS=-g
+export EXTRA_LDFLAGS=-g
+
+# Make sure we configure SHARED libraries
+# Also turn off IGB and KNI as those require kernel headers to build
+sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
+sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
+
+# Now configure the build
+log "INFO" "Configuring DPDK $TAG2"
+make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
+
+log "INFO" "Building DPDK $TAG2. This might take a moment"
+make O=$TARGET > $VERBOSE 2>&1
+
+if [ $? -ne 0 ]
+then
+	log "INFO" "THE BUILD FAILED.  ABORTING"
+	cleanup_and_exit 1
+fi
+
+cd $TARGET/lib
+log "INFO" "COLLECTING ABI INFORMATION FOR $TAG2"
+for i in `ls *.so`
+do
+	$ABIDUMP $i -o $ABI_DIR/$i-ABI-1.dump -lver $TAG2
+done
+cd ../..
+
+# Start comparison of ABI dumps
+for i in `ls $ABI_DIR/*-1.dump`
+do
+	NEWNAME=`basename $i`
+	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
+	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
+
+	if [ ! -f $ABI_DIR/$OLDNAME ]
+	then
+		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
+	fi
+
+	#compare the abi dumps
+	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
+done
+
+git reset --hard
+git checkout $CURRENT_BRANCH
+log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
+cleanup_and_exit 0
+
+
-- 
2.1.0

^ permalink raw reply	[relevance 17%]

* Re: [dpdk-dev] [PATCH v2] ABI: Add abi checking utility
  2015-03-04 15:42  5%             ` Neil Horman
@ 2015-03-04 16:15  5%               ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-03-04 16:15 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

2015-03-04 10:42, Neil Horman:
> On Wed, Mar 04, 2015 at 04:15:18PM +0100, Thomas Monjalon wrote:
> > 2015-03-04 09:39, Neil Horman:
> > > On Wed, Mar 04, 2015 at 01:54:49PM +0100, Thomas Monjalon wrote:
> > > > Hi Neil,
> > > > 
> > > > I remove parts that I agree and reply to those which deserve more discussion.
> > > > 
> > > > 2015-03-04 06:49, Neil Horman:
> > > > > On Tue, Mar 03, 2015 at 11:18:47PM +0100, Thomas Monjalon wrote:
> > > > > > 2015-02-02 13:18, Neil Horman:
> > > > > > > +# Make sure we configure SHARED libraries
> > > > > > > +# Also turn off IGB and KNI as those require kernel headers to build
> > > > > > > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > > > > > > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > > > > > > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> > > > > > 
> > > > > > Why not tuning configuration after make config in .config file?
> > > > > > 
> > > > > Because this way we save a reconfig (from a developer viewpoint), you should run
> > > > > make config again after changing configs, and so this way you save doing that.
> > > > 
> > > > No, you run make config once and update .config file. That's the recommended
> > > > way to configure DPDK.
> > > > defconfig files are default configurations and should stay read-only.
> > > 
> > > They get overwritten when we do the git resets.  Its silly to modify your config
> > > file after you run make config, in the event the make target has to re-read any
> > > modified options and adjust dependent config files accordingly.  I understand
> > > that doesn't happen now, but its common practice for every open source project
> > > in existance.
> > 
> > I'm not sure to understand. Maybe an example would help.
> > By the way, your method works.
> 
> For example, the linux kernel.  The .config file that is generated in the root
> directory is converted to an autoconf.h in parallel with its generation, for
> applications to key off of.  If you change something in .config, you need to run
> make config again so that those changes are reflected into the other
> auto-generated files.  Thats common practice.  So its counter intuitive to
> assume that altering the generated .config file is automatically recognized by
> the rest of the build, without a subsequent make config (be it explicit or and
> implicit dependency of the make all target).

OK thanks, now I better understand how you think about DPDK config.
Note that in Linux you are modifying .config, not the defconfig.
I'm not going to debate how it could be improved now but I think we shouldn't
dynamically modify defconfig files to avoid confusion about their purpose.

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v2] ABI: Add abi checking utility
  2015-03-04 15:15  5%           ` Thomas Monjalon
@ 2015-03-04 15:42  5%             ` Neil Horman
  2015-03-04 16:15  5%               ` Thomas Monjalon
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-04 15:42 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Wed, Mar 04, 2015 at 04:15:18PM +0100, Thomas Monjalon wrote:
> 2015-03-04 09:39, Neil Horman:
> > On Wed, Mar 04, 2015 at 01:54:49PM +0100, Thomas Monjalon wrote:
> > > Hi Neil,
> > > 
> > > I remove parts that I agree and reply to those which deserve more discussion.
> > > 
> > > 2015-03-04 06:49, Neil Horman:
> > > > On Tue, Mar 03, 2015 at 11:18:47PM +0100, Thomas Monjalon wrote:
> > > > > 2015-02-02 13:18, Neil Horman:
> > > > > > +# Validate that we have all the arguments we need
> > > > > > +if [ ! -d ./.git ]
> > > > > > +then
> > > > > > +	log "WARN" "You must be in the root of the dpdk git tree"
> > > > > > +	log "WARN" "You are in $PWD"
> > > > > > +	cleanup_and_exit 1
> > > > > > +fi
> > > > > 
> > > > > Why not cd $(dirname $0)/.. instead of returning an error?
> > > > 
> > > > Why would that help in finding the base of the git tree.  Theres no guarantee
> > > > that you are in a subdirectory of a git tree.  I suppose we can try it
> > > > recursively until we hit /, but it seems just as easy and clear to tell the user
> > > > whats needed.
> > > 
> > > No I'm saying that you could avoid this check by going into the right
> > > directory from the beginning.
> > > We know that the root dir is $(dirname $0)/.. because this script is in
> > > scripts/ directory.
> > > 
> > That only helps if you start from the right directory.  If you run this command
> > from some other location, your solution just breaks.
> 
> Why it would break? $(dirname $0) is always reachable because you launched $0.
> The only exception is for the case the PATH variable is used to find the DPDK
> scripts/ directory (should not happen).
> 
Ah!  Sorry, misunderstood, for some reason I was conflating $0 with $PWD.  Yes,
this will work and I'll update it

> > > > > > +# Make sure we configure SHARED libraries
> > > > > > +# Also turn off IGB and KNI as those require kernel headers to build
> > > > > > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > > > > > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > > > > > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> > > > > 
> > > > > Why not tuning configuration after make config in .config file?
> > > > > 
> > > > Because this way we save a reconfig (from a developer viewpoint), you should run
> > > > make config again after changing configs, and so this way you save doing that.
> > > 
> > > No, you run make config once and update .config file. That's the recommended
> > > way to configure DPDK.
> > > defconfig files are default configurations and should stay read-only.
> > 
> > They get overwritten when we do the git resets.  Its silly to modify your config
> > file after you run make config, in the event the make target has to re-read any
> > modified options and adjust dependent config files accordingly.  I understand
> > that doesn't happen now, but its common practice for every open source project
> > in existance.
> 
> I'm not sure to understand. Maybe an example would help.
> By the way, your method works.
For example, the linux kernel.  The .config file that is generated in the root
directory is converted to an autoconf.h in parallel with its generation, for
applications to key off of.  If you change something in .config, you need to run
make config again so that those changes are reflected into the other
auto-generated files.  Thats common practice.  So its counter intuitive to
assume that altering the generated .config file is automatically recognized by
the rest of the build, without a subsequent make config (be it explicit or and
implicit dependency of the make all target).

Neil


> 
> 

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v2] ABI: Add abi checking utility
  2015-03-04 14:39  5%         ` Neil Horman
@ 2015-03-04 15:15  5%           ` Thomas Monjalon
  2015-03-04 15:42  5%             ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2015-03-04 15:15 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

2015-03-04 09:39, Neil Horman:
> On Wed, Mar 04, 2015 at 01:54:49PM +0100, Thomas Monjalon wrote:
> > Hi Neil,
> > 
> > I remove parts that I agree and reply to those which deserve more discussion.
> > 
> > 2015-03-04 06:49, Neil Horman:
> > > On Tue, Mar 03, 2015 at 11:18:47PM +0100, Thomas Monjalon wrote:
> > > > 2015-02-02 13:18, Neil Horman:
> > > > > +# Validate that we have all the arguments we need
> > > > > +if [ ! -d ./.git ]
> > > > > +then
> > > > > +	log "WARN" "You must be in the root of the dpdk git tree"
> > > > > +	log "WARN" "You are in $PWD"
> > > > > +	cleanup_and_exit 1
> > > > > +fi
> > > > 
> > > > Why not cd $(dirname $0)/.. instead of returning an error?
> > > 
> > > Why would that help in finding the base of the git tree.  Theres no guarantee
> > > that you are in a subdirectory of a git tree.  I suppose we can try it
> > > recursively until we hit /, but it seems just as easy and clear to tell the user
> > > whats needed.
> > 
> > No I'm saying that you could avoid this check by going into the right
> > directory from the beginning.
> > We know that the root dir is $(dirname $0)/.. because this script is in
> > scripts/ directory.
> > 
> That only helps if you start from the right directory.  If you run this command
> from some other location, your solution just breaks.

Why it would break? $(dirname $0) is always reachable because you launched $0.
The only exception is for the case the PATH variable is used to find the DPDK
scripts/ directory (should not happen).

> > > > > +# Make sure we configure SHARED libraries
> > > > > +# Also turn off IGB and KNI as those require kernel headers to build
> > > > > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > > > > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > > > > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> > > > 
> > > > Why not tuning configuration after make config in .config file?
> > > > 
> > > Because this way we save a reconfig (from a developer viewpoint), you should run
> > > make config again after changing configs, and so this way you save doing that.
> > 
> > No, you run make config once and update .config file. That's the recommended
> > way to configure DPDK.
> > defconfig files are default configurations and should stay read-only.
> 
> They get overwritten when we do the git resets.  Its silly to modify your config
> file after you run make config, in the event the make target has to re-read any
> modified options and adjust dependent config files accordingly.  I understand
> that doesn't happen now, but its common practice for every open source project
> in existance.

I'm not sure to understand. Maybe an example would help.
By the way, your method works.

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v2] ABI: Add abi checking utility
  2015-03-04 12:54  5%       ` Thomas Monjalon
@ 2015-03-04 14:39  5%         ` Neil Horman
  2015-03-04 15:15  5%           ` Thomas Monjalon
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-04 14:39 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Wed, Mar 04, 2015 at 01:54:49PM +0100, Thomas Monjalon wrote:
> Hi Neil,
> 
> I remove parts that I agree and reply to those which deserve more discussion.
> 
> 2015-03-04 06:49, Neil Horman:
> > On Tue, Mar 03, 2015 at 11:18:47PM +0100, Thomas Monjalon wrote:
> > > 2015-02-02 13:18, Neil Horman:
> > > > +# Validate that we have all the arguments we need
> > > > +res=$(validate_args)
> > > > +if [ -n "$res" ]
> > > > +then
> > > > +	echo $res
> > > 
> > > Should be redirected to stderr >&2
> > > 
> > Why? this is eactly what I intended.  All the other messages from log are
> > directed to stdout, so should this be.
> 
> I'm wondering if there's some normal output which could be redirected for
> further processing, and some error output.
> My comment was not only for this log but also for every error message.
> 

No, the report output is in html format and always to a file, so stdout isn't
used for any inline informational reporting.

> > > I guess this is the tool:
> > > 	http://ispras.linuxbase.org/index.php/ABI_compliance_checker
> > 
> > Correct.
> 
> So maybe you should add this URL in the commit log.
> 
sure, fine.

> > > > +log "INFO" "We're going to check and make sure that applications built"
> > > > +log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> > > > +log "INFO" "against DPDK DSOs built from tag $TAG2."
> > > > +log "INFO" ""
> 
> > > > +if [ ! -d ./.git ]
> > > > +then
> > > > +	log "WARN" "You must be in the root of the dpdk git tree"
> > > > +	log "WARN" "You are in $PWD"
> > > > +	cleanup_and_exit 1
> > > > +fi
> > > 
> > > Why not cd $(dirname $0)/.. instead of returning an error?
> > 
> > Why would that help in finding the base of the git tree.  Theres no guarantee
> > that you are in a subdirectory of a git tree.  I suppose we can try it
> > recursively until we hit /, but it seems just as easy and clear to tell the user
> > whats needed.
> 
> No I'm saying that you could avoid this check by going into the right
> directory from the beginning.
> We know that the root dir is $(dirname $0)/.. because this script is in
> scripts/ directory.
> 
That only helps if you start from the right directory.  If you run this command
from some other location, your solution just breaks.

> > > > +# Make sure we configure SHARED libraries
> > > > +# Also turn off IGB and KNI as those require kernel headers to build
> > > > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > > > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > > > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> > > 
> > > Why not tuning configuration after make config in .config file?
> > > 
> > Because this way we save a reconfig (from a developer viewpoint), you should run
> > make config again after changing configs, and so this way you save doing that.
> 
> No, you run make config once and update .config file. That's the recommended
> way to configure DPDK.
> defconfig files are default configurations and should stay read-only.
They get overwritten when we do the git resets.  Its silly to modify your config
file after you run make config, in the event the make target has to re-read any
modified options and adjust dependent config files accordingly.  I understand
that doesn't happen now, but its common practice for every open source project
in existance.

> 
> > > > +for i in `ls *.so`
> > > 
> > > I think ls is useless.
> > > 
> > Um, I don't?  Not sure what you're getting at here.
> 
> I think "for i in *.so" should work.
> 
Then its irrelevant in my mind.  They both work equally well.


> > > > +	#compare the abi dumps
> > > > +	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
> > > 
> > > Do we need to do a visual check? I didn't try yet.
> > > 
> > Yes, it generates an html report of all the symbols exported in a build and
> > compares them with the alternate version.  That needs manual review.
> 
> OK I think it's important to explain in the commit log.
Ok.

> 
> > > So you compare the ABI dumps.
> > > Do we also need to run an app from TAG2 with libs from TAG1?
> > 
> > I started down that path, but its not really that helpful, as all it will do is
> > refuse to run if there is a symbol missing from a later version.  While that
> > might be helpful, its no where near as through as the full report from the
> > compliance checker.
> > 
> > The bottom line is that real ABI compliance requires a developer to be aware of
> > the changes going into the code and how they affect binary layout. A simple
> > "does it still work" test isn't sufficient.
> 
> I hope we'll be able to integrate this kind of tool in an automated sanity
> check in order to find obvious errors.
> 
> Thanks
> 

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v2] ABI: Add abi checking utility
  2015-03-04 11:49  8%     ` Neil Horman
@ 2015-03-04 12:54  5%       ` Thomas Monjalon
  2015-03-04 14:39  5%         ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2015-03-04 12:54 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

Hi Neil,

I remove parts that I agree and reply to those which deserve more discussion.

2015-03-04 06:49, Neil Horman:
> On Tue, Mar 03, 2015 at 11:18:47PM +0100, Thomas Monjalon wrote:
> > 2015-02-02 13:18, Neil Horman:
> > > +# Validate that we have all the arguments we need
> > > +res=$(validate_args)
> > > +if [ -n "$res" ]
> > > +then
> > > +	echo $res
> > 
> > Should be redirected to stderr >&2
> > 
> Why? this is eactly what I intended.  All the other messages from log are
> directed to stdout, so should this be.

I'm wondering if there's some normal output which could be redirected for
further processing, and some error output.
My comment was not only for this log but also for every error message.

> > I guess this is the tool:
> > 	http://ispras.linuxbase.org/index.php/ABI_compliance_checker
> 
> Correct.

So maybe you should add this URL in the commit log.

> > > +log "INFO" "We're going to check and make sure that applications built"
> > > +log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> > > +log "INFO" "against DPDK DSOs built from tag $TAG2."
> > > +log "INFO" ""

> > > +if [ ! -d ./.git ]
> > > +then
> > > +	log "WARN" "You must be in the root of the dpdk git tree"
> > > +	log "WARN" "You are in $PWD"
> > > +	cleanup_and_exit 1
> > > +fi
> > 
> > Why not cd $(dirname $0)/.. instead of returning an error?
> 
> Why would that help in finding the base of the git tree.  Theres no guarantee
> that you are in a subdirectory of a git tree.  I suppose we can try it
> recursively until we hit /, but it seems just as easy and clear to tell the user
> whats needed.

No I'm saying that you could avoid this check by going into the right
directory from the beginning.
We know that the root dir is $(dirname $0)/.. because this script is in
scripts/ directory.

> > > +# Make sure we configure SHARED libraries
> > > +# Also turn off IGB and KNI as those require kernel headers to build
> > > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> > 
> > Why not tuning configuration after make config in .config file?
> > 
> Because this way we save a reconfig (from a developer viewpoint), you should run
> make config again after changing configs, and so this way you save doing that.

No, you run make config once and update .config file. That's the recommended
way to configure DPDK.
defconfig files are default configurations and should stay read-only.

> > > +for i in `ls *.so`
> > 
> > I think ls is useless.
> > 
> Um, I don't?  Not sure what you're getting at here.

I think "for i in *.so" should work.

> > > +	#compare the abi dumps
> > > +	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
> > 
> > Do we need to do a visual check? I didn't try yet.
> > 
> Yes, it generates an html report of all the symbols exported in a build and
> compares them with the alternate version.  That needs manual review.

OK I think it's important to explain in the commit log.

> > So you compare the ABI dumps.
> > Do we also need to run an app from TAG2 with libs from TAG1?
> 
> I started down that path, but its not really that helpful, as all it will do is
> refuse to run if there is a symbol missing from a later version.  While that
> might be helpful, its no where near as through as the full report from the
> compliance checker.
> 
> The bottom line is that real ABI compliance requires a developer to be aware of
> the changes going into the code and how they affect binary layout. A simple
> "does it still work" test isn't sufficient.

I hope we'll be able to integrate this kind of tool in an automated sanity
check in order to find obvious errors.

Thanks

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v2] ABI: Add abi checking utility
  2015-03-03 22:18 10%   ` Thomas Monjalon
@ 2015-03-04 11:49  8%     ` Neil Horman
  2015-03-04 12:54  5%       ` Thomas Monjalon
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-03-04 11:49 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Tue, Mar 03, 2015 at 11:18:47PM +0100, Thomas Monjalon wrote:
> 2015-02-02 13:18, Neil Horman:
> > There was a request for an abi validation utiltyfor the ongoing ABI stability
> > work.  As it turns out there is a abi compliance checker in development that
> > seems to be under active development and provides fairly detailed ABI compliance
> > reports.  Its not yet intellegent enough to understand symbol versioning, but it
> > does provide the ability to identify symbols which have changed between
> > releases, along with details of the change, and offers develoeprs the
> > opportunity to identify which symbols then need versioning and validation for a
> > given update via manaul testing.
> 
> There's a lot of typos in this text. Please check.
> 
Three.  Theres 3 typos.  But sure, I'll fix them.

><snip>
> > +
> > +usage() {
> > +	echo "$0 <TAG1> <TAG2> <TARGET>"
> > +}
> > +
> > +log() {
> > +	local level=$1
> 
> level is not used later?
> 
Not yet, but you'll note all the log calls start with a log level to add
filtering.  I'd rather leave this here as it doesn't hurt anything and
effectively documents the paramter.

><snip>
> > +	shift
> > +	echo "$*"
> > +}
> > +
> > +validate_tags() {
> > +	git tag -l | grep -q "$TAG1"
> > +	if [ $? -ne 0 ]
> > +	then
> > +		echo "$TAG1 is invalid"
> > +		return
> > +	fi
> > +	git tag -l | grep -q "$TAG2"
> > +	if [ $? -ne 0 ]
> > +	then
> > +		echo "$TAG2 is invalid"
> > +		return
> > +	fi
> > +}
> > +
> > +validate_args() {
> > +	if [ -z "$TAG1" ]
> > +	then
> > +		echo "Must Specify TAG1"
> > +		return
> > +	fi
> > +	if [ -z "$TAG2" ]
> > +	then
> > +		echo "Must Specify TAG2"
> > +		return
> > +	fi
> > +	if [ -z "$TARGET" ]
> > +	then
> > +		echo "Must Specify a build target"
> > +	fi
> > +}
> > +
> > +
> > +cleanup_and_exit() {
> > +	rm -rf $ABI_DIR
> > +	exit $1
> > +}
> 
> This function could be automatically invoked with trap.
> 
Yes, I can add that.

> > +###########################################
> > +#START
> > +############################################
> > +
> > +#Save the current branch
> > +CURRENT_BRANCH=`git branch | grep \* | cut -d' ' -f2`
> 
> Will it work when not on any branch?
> 
No it won't, and I honestly wasn't that worried about it, as people
don't/shouldn't make changes in detached head state.  I can add a check to
ensure you're on a branch though.

> > +
> > +if [ -n "$VERBOSE" ]
> > +then
> > +	export VERBOSE=/dev/stdout
> > +else
> > +	export VERBOSE=/dev/null
> > +fi
> > +
> > +# Validate that we have all the arguments we need
> > +res=$(validate_args)
> > +if [ -n "$res" ]
> > +then
> > +	echo $res
> 
> Should be redirected to stderr >&2
> 
Why? this is eactly what I intended.  All the other messages from log are
directed to stdout, so should this be.

> > +	usage
> > +	cleanup_and_exit 1
> > +fi
> > +
> > +# Make sure our tags exist
> > +res=$(validate_tags)
> > +if [ -n "$res" ]
> > +then
> > +	echo $res
> > +	cleanup_and_exit 1
> > +fi
> > +
> > +ABICHECK=`which abi-compliance-checker 2>/dev/null`
> 
> Why not using the $() form like above?
> 
I don't honestly recall, but I do remember fighting trying to get output from
that format for some reason, and just left this as it was, as it wasn't
particularly relevant.

> I guess this is the tool:
> 	http://ispras.linuxbase.org/index.php/ABI_compliance_checker
> 
Correct.

> > +if [ $? -ne 0 ]
> > +then
> > +	log "INFO" "Cant find abi-compliance-checker utility"
> > +	cleanup_and_exit 1
> > +fi
> > +
> > +ABIDUMP=`which abi-dumper 2>/dev/null`
> > +if [ $? -ne 0 ]
> > +then
> > +	log "INFO" "Cant find abi-dumper utility"
> > +	cleanup_and_exit 1
> > +fi
> > +
> > +log "INFO" "We're going to check and make sure that applications built"
> > +log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> > +log "INFO" "against DPDK DSOs built from tag $TAG2."
> > +log "INFO" ""
> > +
> > +# Check to make sure we have a clean tree
> > +git status | grep -q clean
> > +if [ $? -ne 0 ]
> > +then
> 
> You may compact in one line:
> if git status | grep -q clean ; then
> 
I explicitly do execution and error checking on separate lines as I think its
more clear.  You'll find this style consistent in the script.

> > +	log "WARN" "Working directory not clean, aborting"
> > +	cleanup_and_exit 1
> > +fi
> > +
> > +if [ ! -d ./.git ]
> > +then
> > +	log "WARN" "You must be in the root of the dpdk git tree"
> > +	log "WARN" "You are in $PWD"
> > +	cleanup_and_exit 1
> > +fi
> 
> Why not cd $(dirname $0)/.. instead of returning an error?
> 
Why would that help in finding the base of the git tree.  Theres no guarantee
that you are in a subdirectory of a git tree.  I suppose we can try it
recursively until we hit /, but it seems just as easy and clear to tell the user
whats needed.

> > +log "INFO" "Checking out version $TAG1 of the dpdk"
> > +# Move to the old version of the tree
> > +git checkout $TAG1
> > +
> > +# Make sure we configure SHARED libraries
> > +# Also turn off IGB and KNI as those require kernel headers to build
> > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> 
> Why not tuning configuration after make config in .config file?
> 
Because this way we save a reconfig (from a developer viewpoint), you should run
make config again after changing configs, and so this way you save doing that.

> > +export EXTRA_CFLAGS=-g
> > +export EXTRA_LDFLAGS=-g
> 
> A comment is required (needed for abi-dumper?)
> 
Sure.

> > +# Now configure the build
> > +log "INFO" "Configuring DPDK $TAG1"
> > +make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
> > +
> > +log "INFO" "Building DPDK $TAG1. This might take a moment"
> > +make O=$TARGET > $VERBOSE 2>&1
> 
> It would more efficient with a customizable -j option
> 
I'm sure it would, I'll look at that in future enhancement. 

> > +if [ $? -ne 0 ]
> > +then
> > +	log "INFO" "THE BUILD FAILED.  ABORTING"
> > +	cleanup_and_exit 1
> > +fi
> > +
> > +# Move to the lib directory
> > +cd $TARGET/lib
> > +log "INFO" "COLLECTING ABI INFORMATION FOR $TAG1"
> > +for i in `ls *.so`
> 
> I think ls is useless.
> 
Um, I don't?  Not sure what you're getting at here.

> > +do
> > +	$ABIDUMP $i -o $ABI_DIR/$i-ABI-0.dump -lver $TAG1
> > +done
> > +cd ../..
> > +
> > +# Now clean the tree, checkout the second tag, and rebuild
> > +git clean -f -d
> > +git reset --hard
> > +# Move to the new version of the tree
> > +log "INFO" "Checking out version $TAG2 of the dpdk"
> > +git checkout $TAG2
> > +
> > +export EXTRA_CFLAGS=-g
> > +export EXTRA_LDFLAGS=-g
> > +
> > +# Make sure we configure SHARED libraries
> > +# Also turn off IGB and KNI as those require kernel headers to build
> > +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> > +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> > +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> > +
> > +# Now configure the build
> > +log "INFO" "Configuring DPDK $TAG2"
> > +make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
> > +
> > +log "INFO" "Building DPDK $TAG2. This might take a moment"
> > +make O=$TARGET > $VERBOSE 2>&1
> > +
> > +if [ $? -ne 0 ]
> > +then
> > +	log "INFO" "THE BUILD FAILED.  ABORTING"
> > +	cleanup_and_exit 1
> > +fi
> > +
> > +cd $TARGET/lib
> > +log "INFO" "COLLECTING ABI INFORMATION FOR $TAG2"
> > +for i in `ls *.so`
> > +do
> > +	$ABIDUMP $i -o $ABI_DIR/$i-ABI-1.dump -lver $TAG2
> > +done
> > +cd ../..
> > +
> > +# Start comparison of ABI dumps
> > +for i in `ls $ABI_DIR/*-1.dump`
> 
> Why ls?
> 
Because it preforms the needed action for what I want to do here. Not sure what
you're proposing

> > +do
> > +	NEWNAME=`basename $i`
> > +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
> > +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
> > +
> > +	if [ ! -f $ABI_DIR/$OLDNAME ]
> > +	then
> > +		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
> > +	fi
> > +
> > +	#compare the abi dumps
> > +	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
> 
> Do we need to do a visual check? I didn't try yet.
> 
Yes, it generates an html report of all the symbols exported in a build and
compares them with the alternate version.  That needs manual review.

> > +done
> > +
> > +git reset --hard
> > +git checkout $CURRENT_BRANCH
> > +log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
> > +cleanup_and_exit 0
> 
> So you compare the ABI dumps.
> Do we also need to run an app from TAG2 with libs from TAG1?
> 
I started down that path, but its not really that helpful, as all it will do is
refuse to run if there is a symbol missing from a later version.  While that
might be helpful, its no where near as through as the full report from the
compliance checker.

The bottom line is that real ABI compliance requires a developer to be aware of
the changes going into the code and how they affect binary layout. A simple
"does it still work" test isn't sufficient.

Neil
> Thanks Neil
> 

^ permalink raw reply	[relevance 8%]

* Re: [dpdk-dev] [PATCH v2] ABI: Add abi checking utility
    2015-02-27 13:48  5%   ` Neil Horman
@ 2015-03-03 22:18 10%   ` Thomas Monjalon
  2015-03-04 11:49  8%     ` Neil Horman
  1 sibling, 1 reply; 200+ results
From: Thomas Monjalon @ 2015-03-03 22:18 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

2015-02-02 13:18, Neil Horman:
> There was a request for an abi validation utiltyfor the ongoing ABI stability
> work.  As it turns out there is a abi compliance checker in development that
> seems to be under active development and provides fairly detailed ABI compliance
> reports.  Its not yet intellegent enough to understand symbol versioning, but it
> does provide the ability to identify symbols which have changed between
> releases, along with details of the change, and offers develoeprs the
> opportunity to identify which symbols then need versioning and validation for a
> given update via manaul testing.

There's a lot of typos in this text. Please check.

> This script automates the use of the compliance checker between two arbitrarily
> specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
> and run:
> 
> ./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG
> 
> where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
> suitable for passing as the T= variable in the make config command.
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> Change Notes:
> 
> v2) Fixed some typos as requested by Thomas
> ---
>  scripts/validate_abi.sh | 241 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 241 insertions(+)
>  create mode 100755 scripts/validate_abi.sh
> 
> diff --git a/scripts/validate_abi.sh b/scripts/validate_abi.sh
> new file mode 100755
> index 0000000..31583df
> --- /dev/null
> +++ b/scripts/validate_abi.sh
> @@ -0,0 +1,241 @@
> +#!/bin/sh
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2015 Neil Horman. All rights reserved.
> +#   All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +#     * Redistributions of source code must retain the above copyright
> +#       notice, this list of conditions and the following disclaimer.
> +#     * Redistributions in binary form must reproduce the above copyright
> +#       notice, this list of conditions and the following disclaimer in
> +#       the documentation and/or other materials provided with the
> +#       distribution.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +TAG1=$1
> +TAG2=$2
> +TARGET=$3
> +ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`
> +
> +usage() {
> +	echo "$0 <TAG1> <TAG2> <TARGET>"
> +}
> +
> +log() {
> +	local level=$1

level is not used later?

> +	shift
> +	echo "$*"
> +}
> +
> +validate_tags() {
> +	git tag -l | grep -q "$TAG1"
> +	if [ $? -ne 0 ]
> +	then
> +		echo "$TAG1 is invalid"
> +		return
> +	fi
> +	git tag -l | grep -q "$TAG2"
> +	if [ $? -ne 0 ]
> +	then
> +		echo "$TAG2 is invalid"
> +		return
> +	fi
> +}
> +
> +validate_args() {
> +	if [ -z "$TAG1" ]
> +	then
> +		echo "Must Specify TAG1"
> +		return
> +	fi
> +	if [ -z "$TAG2" ]
> +	then
> +		echo "Must Specify TAG2"
> +		return
> +	fi
> +	if [ -z "$TARGET" ]
> +	then
> +		echo "Must Specify a build target"
> +	fi
> +}
> +
> +
> +cleanup_and_exit() {
> +	rm -rf $ABI_DIR
> +	exit $1
> +}

This function could be automatically invoked with trap.

> +###########################################
> +#START
> +############################################
> +
> +#Save the current branch
> +CURRENT_BRANCH=`git branch | grep \* | cut -d' ' -f2`

Will it work when not on any branch?

> +
> +if [ -n "$VERBOSE" ]
> +then
> +	export VERBOSE=/dev/stdout
> +else
> +	export VERBOSE=/dev/null
> +fi
> +
> +# Validate that we have all the arguments we need
> +res=$(validate_args)
> +if [ -n "$res" ]
> +then
> +	echo $res

Should be redirected to stderr >&2

> +	usage
> +	cleanup_and_exit 1
> +fi
> +
> +# Make sure our tags exist
> +res=$(validate_tags)
> +if [ -n "$res" ]
> +then
> +	echo $res
> +	cleanup_and_exit 1
> +fi
> +
> +ABICHECK=`which abi-compliance-checker 2>/dev/null`

Why not using the $() form like above?

I guess this is the tool:
	http://ispras.linuxbase.org/index.php/ABI_compliance_checker

> +if [ $? -ne 0 ]
> +then
> +	log "INFO" "Cant find abi-compliance-checker utility"
> +	cleanup_and_exit 1
> +fi
> +
> +ABIDUMP=`which abi-dumper 2>/dev/null`
> +if [ $? -ne 0 ]
> +then
> +	log "INFO" "Cant find abi-dumper utility"
> +	cleanup_and_exit 1
> +fi
> +
> +log "INFO" "We're going to check and make sure that applications built"
> +log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> +log "INFO" "against DPDK DSOs built from tag $TAG2."
> +log "INFO" ""
> +
> +# Check to make sure we have a clean tree
> +git status | grep -q clean
> +if [ $? -ne 0 ]
> +then

You may compact in one line:
if git status | grep -q clean ; then

> +	log "WARN" "Working directory not clean, aborting"
> +	cleanup_and_exit 1
> +fi
> +
> +if [ ! -d ./.git ]
> +then
> +	log "WARN" "You must be in the root of the dpdk git tree"
> +	log "WARN" "You are in $PWD"
> +	cleanup_and_exit 1
> +fi

Why not cd $(dirname $0)/.. instead of returning an error?

> +log "INFO" "Checking out version $TAG1 of the dpdk"
> +# Move to the old version of the tree
> +git checkout $TAG1
> +
> +# Make sure we configure SHARED libraries
> +# Also turn off IGB and KNI as those require kernel headers to build
> +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET

Why not tuning configuration after make config in .config file?

> +export EXTRA_CFLAGS=-g
> +export EXTRA_LDFLAGS=-g

A comment is required (needed for abi-dumper?)

> +# Now configure the build
> +log "INFO" "Configuring DPDK $TAG1"
> +make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
> +
> +log "INFO" "Building DPDK $TAG1. This might take a moment"
> +make O=$TARGET > $VERBOSE 2>&1

It would more efficient with a customizable -j option

> +if [ $? -ne 0 ]
> +then
> +	log "INFO" "THE BUILD FAILED.  ABORTING"
> +	cleanup_and_exit 1
> +fi
> +
> +# Move to the lib directory
> +cd $TARGET/lib
> +log "INFO" "COLLECTING ABI INFORMATION FOR $TAG1"
> +for i in `ls *.so`

I think ls is useless.

> +do
> +	$ABIDUMP $i -o $ABI_DIR/$i-ABI-0.dump -lver $TAG1
> +done
> +cd ../..
> +
> +# Now clean the tree, checkout the second tag, and rebuild
> +git clean -f -d
> +git reset --hard
> +# Move to the new version of the tree
> +log "INFO" "Checking out version $TAG2 of the dpdk"
> +git checkout $TAG2
> +
> +export EXTRA_CFLAGS=-g
> +export EXTRA_LDFLAGS=-g
> +
> +# Make sure we configure SHARED libraries
> +# Also turn off IGB and KNI as those require kernel headers to build
> +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> +
> +# Now configure the build
> +log "INFO" "Configuring DPDK $TAG2"
> +make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
> +
> +log "INFO" "Building DPDK $TAG2. This might take a moment"
> +make O=$TARGET > $VERBOSE 2>&1
> +
> +if [ $? -ne 0 ]
> +then
> +	log "INFO" "THE BUILD FAILED.  ABORTING"
> +	cleanup_and_exit 1
> +fi
> +
> +cd $TARGET/lib
> +log "INFO" "COLLECTING ABI INFORMATION FOR $TAG2"
> +for i in `ls *.so`
> +do
> +	$ABIDUMP $i -o $ABI_DIR/$i-ABI-1.dump -lver $TAG2
> +done
> +cd ../..
> +
> +# Start comparison of ABI dumps
> +for i in `ls $ABI_DIR/*-1.dump`

Why ls?

> +do
> +	NEWNAME=`basename $i`
> +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
> +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
> +
> +	if [ ! -f $ABI_DIR/$OLDNAME ]
> +	then
> +		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
> +	fi
> +
> +	#compare the abi dumps
> +	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME

Do we need to do a visual check? I didn't try yet.

> +done
> +
> +git reset --hard
> +git checkout $CURRENT_BRANCH
> +log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
> +cleanup_and_exit 0

So you compare the ABI dumps.
Do we also need to run an app from TAG2 with libs from TAG1?

Thanks Neil

^ permalink raw reply	[relevance 10%]

* Re: [dpdk-dev] Error seen while compiling Pktgen-dpdk
  2015-02-28 14:00  3% ` Neil Horman
@ 2015-02-28 18:20  0%   ` Wiles, Keith
  0 siblings, 0 replies; 200+ results
From: Wiles, Keith @ 2015-02-28 18:20 UTC (permalink / raw)
  To: Neil Horman, Shankari Vaidyalingam; +Cc: dev



On 2/28/15, 8:00 AM, "Neil Horman" <nhorman@tuxdriver.com> wrote:

>On Sat, Feb 28, 2015 at 01:06:32PM +0530, Shankari Vaidyalingam wrote:
>> Hi,
>> 
>> I'm facing the below error while executing make on Pktgen-dpdk source.
>> I'm using 2.8 version of pktgen downloaded
>> I have built DPDK binaries and then tried building pktgen-dpdk.
>> RTE_TARGET is set to x86_64-pktgen-linuxapp-gcc and RTE_SDK is set to
>>the
>> directory where dpdk source files are present.
>> DPDK version - 1.7.1
>> Please let me know how to resolve this error.
>> 
>> controller@controller-VirtualBox:~/pktgen-2.8.0$ sudo
>> RTE_SDK=/home/controller/dpdk-1.7.1 make
>> make -C lib
>> make[1]: Entering directory `/home/controller/pktgen-2.8.0/lib'
>> == common
>> == lua
>> == src
>> make[1]: Leaving directory `/home/controller/pktgen-2.8.0/lib'
>> make -C app
>> make[1]: Entering directory `/home/controller/pktgen-2.8.0/app'
>>   CC lpktgenlib.o
>> lpktgenlib.c: In function Œgetf_etheraddr¹:
>> lpktgenlib.c:174:9: error: passing argument 1 of
>>Œcmdline_parse_etheraddr¹
>> from incompatible pointer type [-Werror]
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_etheraddr.h:75:5:
>> note: expected Œstruct cmdline_parse_token_hdr_t *¹ but argument is of
>>type
>> Œstruct cmdline_etheraddr_t *¹
>> lpktgenlib.c: In function Œgetf_ipaddr¹:
>> lpktgenlib.c:185:6: error: too many arguments to function
>> Œcmdline_parse_ipaddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_ipaddr.h:94:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_set¹:
>> lpktgenlib.c:233:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œset_seq¹:
>> lpktgenlib.c:290:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c:291:3: error: too many arguments to function
>> Œcmdline_parse_etheraddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_etheraddr.h:75:5:
>> note: declared here
>> lpktgenlib.c:292:3: error: too many arguments to function
>> Œcmdline_parse_etheraddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_etheraddr.h:75:5:
>> note: declared here
>> lpktgenlib.c:295:7: error: too many arguments to function
>> Œcmdline_parse_ipaddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_ipaddr.h:94:5:
>> note: declared here
>> lpktgenlib.c:298:7: error: too many arguments to function
>> Œcmdline_parse_ipaddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_ipaddr.h:94:5:
>> note: declared here
>> lpktgenlib.c: In function Œset_seqTable¹:
>> lpktgenlib.c:370:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_icmp¹:
>> lpktgenlib.c:466:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_sendARP¹:
>> lpktgenlib.c:494:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_set_mac¹:
>> lpktgenlib.c:521:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c:522:2: error: too many arguments to function
>> Œcmdline_parse_etheraddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_etheraddr.h:75:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_prototype¹:
>> lpktgenlib.c:582:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_set_ip_addr¹:
>> lpktgenlib.c:615:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c:619:2: error: too many arguments to function
>> Œcmdline_parse_ipaddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_ipaddr.h:94:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_set_type¹:
>> lpktgenlib.c:650:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_send_ping4¹:
>> lpktgenlib.c:679:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_pcap¹:
>> lpktgenlib.c:739:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_start¹:
>> lpktgenlib.c:768:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_stop¹:
>> lpktgenlib.c:796:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_prime¹:
>> lpktgenlib.c:845:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_clear¹:
>> lpktgenlib.c:1087:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_reset_config¹:
>> lpktgenlib.c:1172:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_dst_mac¹:
>> lpktgenlib.c:1201:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c:1202:2: error: too many arguments to function
>> Œcmdline_parse_etheraddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_etheraddr.h:75:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_src_mac¹:
>> lpktgenlib.c:1232:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c:1233:2: error: too many arguments to function
>> Œcmdline_parse_etheraddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_etheraddr.h:75:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_dst_ip¹:
>> lpktgenlib.c:1265:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c:1267:2: error: too many arguments to function
>> Œcmdline_parse_ipaddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_ipaddr.h:94:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_src_ip¹:
>> lpktgenlib.c:1300:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c:1302:2: error: too many arguments to function
>> Œcmdline_parse_ipaddr¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_ipaddr.h:94:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_dst_port¹:
>> lpktgenlib.c:1332:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_src_port¹:
>> lpktgenlib.c:1361:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_vlan_id¹:
>> lpktgenlib.c:1391:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_vlanid¹:
>> lpktgenlib.c:1424:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_vlan¹:
>> lpktgenlib.c:1455:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_mpls_entry¹:
>> lpktgenlib.c:1485:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_mpls¹:
>> lpktgenlib.c:1514:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_qinqids¹:
>> lpktgenlib.c:1544:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_qinq¹:
>> lpktgenlib.c:1579:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_gre_key¹:
>> lpktgenlib.c:1609:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_gre¹:
>> lpktgenlib.c:1638:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_gre_eth¹:
>> lpktgenlib.c:1666:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_pkt_size¹:
>> lpktgenlib.c:1697:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_range¹:
>> lpktgenlib.c:1728:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_process¹:
>> lpktgenlib.c:1802:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_garp¹:
>> lpktgenlib.c:1831:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_blink¹:
>> lpktgenlib.c:1860:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_isSending¹:
>> lpktgenlib.c:1916:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_linkState¹:
>> lpktgenlib.c:1974:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_portSizes¹:
>> lpktgenlib.c:2045:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_pktStats¹:
>> lpktgenlib.c:2114:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_portStats¹:
>> lpktgenlib.c:2189:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_decompile¹:
>> lpktgenlib.c:2325:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_sendPkt¹:
>> lpktgenlib.c:2358:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> lpktgenlib.c: In function Œpktgen_recvPkt¹:
>> lpktgenlib.c:2428:2: error: too many arguments to function
>> Œcmdline_parse_portlist¹
>> 
>>/home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_pa
>>rse_portlist.h:83:5:
>> note: declared here
>> cc1: all warnings being treated as errors
>> make[1]: *** [lpktgenlib.o] Error 1
>> make[1]: Leaving directory `/home/controller/pktgen-2.8.0/app'
>> make: *** [app] Error 2
>> 
>
>Looks like you ran afoul of commit
>aaa662e75c23c61a1d79bd4d1f9f35b4967c39db.  It
>changed the arguments to all the fuctions you are having compile errrors
>with.
>You need to update pktgen to pass the new argument list properly.

Updated Pktgen-dpdk to build with the new changes, you can pull down the
2.8.3 version
and it should build fine. The new version of Pktgen-dpdk will require the
following patch
to dpdk to support the updated build style
:http://patchwork.dpdk.org/dev/patchwork/patch/3799/
until the patch has been applied.

Let me know if that does not help you as you are on DPDK 1.7.1.

Regards,
++Keith
>
>In the future this will hopefully be less of a problem now that we have
>some
>modicum of ABI compat infrastructure
>Neil
>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] Error seen while compiling Pktgen-dpdk
  @ 2015-02-28 14:00  3% ` Neil Horman
  2015-02-28 18:20  0%   ` Wiles, Keith
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-02-28 14:00 UTC (permalink / raw)
  To: Shankari Vaidyalingam; +Cc: dev

On Sat, Feb 28, 2015 at 01:06:32PM +0530, Shankari Vaidyalingam wrote:
> Hi,
> 
> I'm facing the below error while executing make on Pktgen-dpdk source.
> I'm using 2.8 version of pktgen downloaded
> I have built DPDK binaries and then tried building pktgen-dpdk.
> RTE_TARGET is set to x86_64-pktgen-linuxapp-gcc and RTE_SDK is set to the
> directory where dpdk source files are present.
> DPDK version - 1.7.1
> Please let me know how to resolve this error.
> 
> controller@controller-VirtualBox:~/pktgen-2.8.0$ sudo
> RTE_SDK=/home/controller/dpdk-1.7.1 make
> make -C lib
> make[1]: Entering directory `/home/controller/pktgen-2.8.0/lib'
> == common
> == lua
> == src
> make[1]: Leaving directory `/home/controller/pktgen-2.8.0/lib'
> make -C app
> make[1]: Entering directory `/home/controller/pktgen-2.8.0/app'
>   CC lpktgenlib.o
> lpktgenlib.c: In function ‘getf_etheraddr’:
> lpktgenlib.c:174:9: error: passing argument 1 of ‘cmdline_parse_etheraddr’
> from incompatible pointer type [-Werror]
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_etheraddr.h:75:5:
> note: expected ‘struct cmdline_parse_token_hdr_t *’ but argument is of type
> ‘struct cmdline_etheraddr_t *’
> lpktgenlib.c: In function ‘getf_ipaddr’:
> lpktgenlib.c:185:6: error: too many arguments to function
> ‘cmdline_parse_ipaddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_ipaddr.h:94:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_set’:
> lpktgenlib.c:233:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘set_seq’:
> lpktgenlib.c:290:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c:291:3: error: too many arguments to function
> ‘cmdline_parse_etheraddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_etheraddr.h:75:5:
> note: declared here
> lpktgenlib.c:292:3: error: too many arguments to function
> ‘cmdline_parse_etheraddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_etheraddr.h:75:5:
> note: declared here
> lpktgenlib.c:295:7: error: too many arguments to function
> ‘cmdline_parse_ipaddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_ipaddr.h:94:5:
> note: declared here
> lpktgenlib.c:298:7: error: too many arguments to function
> ‘cmdline_parse_ipaddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_ipaddr.h:94:5:
> note: declared here
> lpktgenlib.c: In function ‘set_seqTable’:
> lpktgenlib.c:370:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_icmp’:
> lpktgenlib.c:466:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_sendARP’:
> lpktgenlib.c:494:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_set_mac’:
> lpktgenlib.c:521:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c:522:2: error: too many arguments to function
> ‘cmdline_parse_etheraddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_etheraddr.h:75:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_prototype’:
> lpktgenlib.c:582:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_set_ip_addr’:
> lpktgenlib.c:615:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c:619:2: error: too many arguments to function
> ‘cmdline_parse_ipaddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_ipaddr.h:94:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_set_type’:
> lpktgenlib.c:650:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_send_ping4’:
> lpktgenlib.c:679:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_pcap’:
> lpktgenlib.c:739:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_start’:
> lpktgenlib.c:768:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_stop’:
> lpktgenlib.c:796:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_prime’:
> lpktgenlib.c:845:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_clear’:
> lpktgenlib.c:1087:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_reset_config’:
> lpktgenlib.c:1172:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_dst_mac’:
> lpktgenlib.c:1201:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c:1202:2: error: too many arguments to function
> ‘cmdline_parse_etheraddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_etheraddr.h:75:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_src_mac’:
> lpktgenlib.c:1232:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c:1233:2: error: too many arguments to function
> ‘cmdline_parse_etheraddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_etheraddr.h:75:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_dst_ip’:
> lpktgenlib.c:1265:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c:1267:2: error: too many arguments to function
> ‘cmdline_parse_ipaddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_ipaddr.h:94:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_src_ip’:
> lpktgenlib.c:1300:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c:1302:2: error: too many arguments to function
> ‘cmdline_parse_ipaddr’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_ipaddr.h:94:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_dst_port’:
> lpktgenlib.c:1332:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_src_port’:
> lpktgenlib.c:1361:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_vlan_id’:
> lpktgenlib.c:1391:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_vlanid’:
> lpktgenlib.c:1424:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_vlan’:
> lpktgenlib.c:1455:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_mpls_entry’:
> lpktgenlib.c:1485:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_mpls’:
> lpktgenlib.c:1514:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_qinqids’:
> lpktgenlib.c:1544:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_qinq’:
> lpktgenlib.c:1579:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_gre_key’:
> lpktgenlib.c:1609:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_gre’:
> lpktgenlib.c:1638:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_gre_eth’:
> lpktgenlib.c:1666:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_pkt_size’:
> lpktgenlib.c:1697:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_range’:
> lpktgenlib.c:1728:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_process’:
> lpktgenlib.c:1802:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_garp’:
> lpktgenlib.c:1831:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_blink’:
> lpktgenlib.c:1860:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_isSending’:
> lpktgenlib.c:1916:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_linkState’:
> lpktgenlib.c:1974:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_portSizes’:
> lpktgenlib.c:2045:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_pktStats’:
> lpktgenlib.c:2114:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_portStats’:
> lpktgenlib.c:2189:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_decompile’:
> lpktgenlib.c:2325:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_sendPkt’:
> lpktgenlib.c:2358:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> lpktgenlib.c: In function ‘pktgen_recvPkt’:
> lpktgenlib.c:2428:2: error: too many arguments to function
> ‘cmdline_parse_portlist’
> /home/controller/dpdk-1.7.1/x86_64-native-linuxapp-gcc/include/cmdline_parse_portlist.h:83:5:
> note: declared here
> cc1: all warnings being treated as errors
> make[1]: *** [lpktgenlib.o] Error 1
> make[1]: Leaving directory `/home/controller/pktgen-2.8.0/app'
> make: *** [app] Error 2
> 

Looks like you ran afoul of commit aaa662e75c23c61a1d79bd4d1f9f35b4967c39db.  It
changed the arguments to all the fuctions you are having compile errrors with.
You need to update pktgen to pass the new argument list properly.  

In the future this will hopefully be less of a problem now that we have some
modicum of ABI compat infrastructure
Neil

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v2] ABI: Add abi checking utility
  2015-02-27 13:48  5%   ` Neil Horman
@ 2015-02-27 13:55  5%     ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-02-27 13:55 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

Hi Neil,

2015-02-27 08:48, Neil Horman:
> On Mon, Feb 02, 2015 at 01:18:26PM -0500, Neil Horman wrote:
> > There was a request for an abi validation utiltyfor the ongoing ABI stability
> > work.  As it turns out there is a abi compliance checker in development that
> > seems to be under active development and provides fairly detailed ABI compliance
> > reports.  Its not yet intellegent enough to understand symbol versioning, but it
> > does provide the ability to identify symbols which have changed between
> > releases, along with details of the change, and offers develoeprs the
> > opportunity to identify which symbols then need versioning and validation for a
> > given update via manaul testing.
> > 
> > This script automates the use of the compliance checker between two arbitrarily
> > specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
> > and run:
> > 
> > ./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG
> > 
> > where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
> > suitable for passing as the T= variable in the make config command.
> > 
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> Whats the disposition of this, I don't see it in the repository yet.

I plan to review it carefully during next week.
We can integrate this tool until end of March, that's why it was not the
highest priority.

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v2] ABI: Add abi checking utility
  @ 2015-02-27 13:48  5%   ` Neil Horman
  2015-02-27 13:55  5%     ` Thomas Monjalon
  2015-03-03 22:18 10%   ` Thomas Monjalon
  1 sibling, 1 reply; 200+ results
From: Neil Horman @ 2015-02-27 13:48 UTC (permalink / raw)
  To: dev

On Mon, Feb 02, 2015 at 01:18:26PM -0500, Neil Horman wrote:
> There was a request for an abi validation utiltyfor the ongoing ABI stability
> work.  As it turns out there is a abi compliance checker in development that
> seems to be under active development and provides fairly detailed ABI compliance
> reports.  Its not yet intellegent enough to understand symbol versioning, but it
> does provide the ability to identify symbols which have changed between
> releases, along with details of the change, and offers develoeprs the
> opportunity to identify which symbols then need versioning and validation for a
> given update via manaul testing.
> 
> This script automates the use of the compliance checker between two arbitrarily
> specified tags within the dpdk tree.  To execute enter the $RTE_SDK directory
> and run:
> 
> ./scripts/validate_abi.sh $GIT_TAG1 $GIT_TAG2 $CONFIG
> 
> where $GIT_TAG1 and 2 are git tags and $CONFIG is a config specification
> suitable for passing as the T= variable in the make config command.
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> Change Notes:
> 
> v2) Fixed some typos as requested by Thomas
> ---
>  scripts/validate_abi.sh | 241 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 241 insertions(+)
>  create mode 100755 scripts/validate_abi.sh
> 
> diff --git a/scripts/validate_abi.sh b/scripts/validate_abi.sh
> new file mode 100755
> index 0000000..31583df
> --- /dev/null
> +++ b/scripts/validate_abi.sh
> @@ -0,0 +1,241 @@
> +#!/bin/sh
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2015 Neil Horman. All rights reserved.
> +#   All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +#     * Redistributions of source code must retain the above copyright
> +#       notice, this list of conditions and the following disclaimer.
> +#     * Redistributions in binary form must reproduce the above copyright
> +#       notice, this list of conditions and the following disclaimer in
> +#       the documentation and/or other materials provided with the
> +#       distribution.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +TAG1=$1
> +TAG2=$2
> +TARGET=$3
> +ABI_DIR=`mktemp -d -p /tmp ABI.XXXXXX`
> +
> +usage() {
> +	echo "$0 <TAG1> <TAG2> <TARGET>"
> +}
> +
> +log() {
> +	local level=$1
> +	shift
> +	echo "$*"
> +}
> +
> +validate_tags() {
> +	git tag -l | grep -q "$TAG1"
> +	if [ $? -ne 0 ]
> +	then
> +		echo "$TAG1 is invalid"
> +		return
> +	fi
> +	git tag -l | grep -q "$TAG2"
> +	if [ $? -ne 0 ]
> +	then
> +		echo "$TAG2 is invalid"
> +		return
> +	fi
> +}
> +
> +validate_args() {
> +	if [ -z "$TAG1" ]
> +	then
> +		echo "Must Specify TAG1"
> +		return
> +	fi
> +	if [ -z "$TAG2" ]
> +	then
> +		echo "Must Specify TAG2"
> +		return
> +	fi
> +	if [ -z "$TARGET" ]
> +	then
> +		echo "Must Specify a build target"
> +	fi
> +}
> +
> +
> +cleanup_and_exit() {
> +	rm -rf $ABI_DIR
> +	exit $1
> +}
> +
> +###########################################
> +#START
> +############################################
> +
> +#Save the current branch
> +CURRENT_BRANCH=`git branch | grep \* | cut -d' ' -f2`
> +
> +if [ -n "$VERBOSE" ]
> +then
> +	export VERBOSE=/dev/stdout
> +else
> +	export VERBOSE=/dev/null
> +fi
> +
> +# Validate that we have all the arguments we need
> +res=$(validate_args)
> +if [ -n "$res" ]
> +then
> +	echo $res
> +	usage
> +	cleanup_and_exit 1
> +fi
> +
> +# Make sure our tags exist
> +res=$(validate_tags)
> +if [ -n "$res" ]
> +then
> +	echo $res
> +	cleanup_and_exit 1
> +fi
> +
> +ABICHECK=`which abi-compliance-checker 2>/dev/null`
> +if [ $? -ne 0 ]
> +then
> +	log "INFO" "Cant find abi-compliance-checker utility"
> +	cleanup_and_exit 1
> +fi
> +
> +ABIDUMP=`which abi-dumper 2>/dev/null`
> +if [ $? -ne 0 ]
> +then
> +	log "INFO" "Cant find abi-dumper utility"
> +	cleanup_and_exit 1
> +fi
> +
> +log "INFO" "We're going to check and make sure that applications built"
> +log "INFO" "against DPDK DSOs from tag $TAG1 will still run when executed"
> +log "INFO" "against DPDK DSOs built from tag $TAG2."
> +log "INFO" ""
> +
> +# Check to make sure we have a clean tree
> +git status | grep -q clean
> +if [ $? -ne 0 ]
> +then
> +	log "WARN" "Working directory not clean, aborting"
> +	cleanup_and_exit 1
> +fi
> +
> +if [ ! -d ./.git ]
> +then
> +	log "WARN" "You must be in the root of the dpdk git tree"
> +	log "WARN" "You are in $PWD"
> +	cleanup_and_exit 1
> +fi
> +
> +log "INFO" "Checking out version $TAG1 of the dpdk"
> +# Move to the old version of the tree
> +git checkout $TAG1
> +
> +# Make sure we configure SHARED libraries
> +# Also turn off IGB and KNI as those require kernel headers to build
> +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> +
> +export EXTRA_CFLAGS=-g
> +export EXTRA_LDFLAGS=-g
> +
> +# Now configure the build
> +log "INFO" "Configuring DPDK $TAG1"
> +make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
> +
> +log "INFO" "Building DPDK $TAG1. This might take a moment"
> +make O=$TARGET > $VERBOSE 2>&1
> +
> +if [ $? -ne 0 ]
> +then
> +	log "INFO" "THE BUILD FAILED.  ABORTING"
> +	cleanup_and_exit 1
> +fi
> +
> +# Move to the lib directory
> +cd $TARGET/lib
> +log "INFO" "COLLECTING ABI INFORMATION FOR $TAG1"
> +for i in `ls *.so`
> +do
> +	$ABIDUMP $i -o $ABI_DIR/$i-ABI-0.dump -lver $TAG1
> +done
> +cd ../..
> +
> +# Now clean the tree, checkout the second tag, and rebuild
> +git clean -f -d
> +git reset --hard
> +# Move to the new version of the tree
> +log "INFO" "Checking out version $TAG2 of the dpdk"
> +git checkout $TAG2
> +
> +export EXTRA_CFLAGS=-g
> +export EXTRA_LDFLAGS=-g
> +
> +# Make sure we configure SHARED libraries
> +# Also turn off IGB and KNI as those require kernel headers to build
> +sed -i -e"$ a\CONFIG_RTE_BUILD_SHARED_LIB=y" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_EAL_IGB_UIO=n" config/defconfig_$TARGET
> +sed -i -e"$ a\CONFIG_RTE_LIBRTE_KNI=n" config/defconfig_$TARGET
> +
> +# Now configure the build
> +log "INFO" "Configuring DPDK $TAG2"
> +make config T=$TARGET O=$TARGET > $VERBOSE 2>&1
> +
> +log "INFO" "Building DPDK $TAG2. This might take a moment"
> +make O=$TARGET > $VERBOSE 2>&1
> +
> +if [ $? -ne 0 ]
> +then
> +	log "INFO" "THE BUILD FAILED.  ABORTING"
> +	cleanup_and_exit 1
> +fi
> +
> +cd $TARGET/lib
> +log "INFO" "COLLECTING ABI INFORMATION FOR $TAG2"
> +for i in `ls *.so`
> +do
> +	$ABIDUMP $i -o $ABI_DIR/$i-ABI-1.dump -lver $TAG2
> +done
> +cd ../..
> +
> +# Start comparison of ABI dumps
> +for i in `ls $ABI_DIR/*-1.dump`
> +do
> +	NEWNAME=`basename $i`
> +	OLDNAME=`basename $i | sed -e"s/1.dump/0.dump/"`
> +	LIBNAME=`basename $i | sed -e"s/-ABI-1.dump//"`
> +
> +	if [ ! -f $ABI_DIR/$OLDNAME ]
> +	then
> +		log "INFO" "$OLDNAME DOES NOT EXIST IN $TAG1. SKIPPING..."
> +	fi
> +
> +	#compare the abi dumps
> +	$ABICHECK -l $LIBNAME -old $ABI_DIR/$OLDNAME -new $ABI_DIR/$NEWNAME
> +done
> +
> +git reset --hard
> +git checkout $CURRENT_BRANCH
> +log "INFO" "ABI CHECK COMPLETE.  REPORTS ARE IN compat_report directory"
> +cleanup_and_exit 0
> +
> +
> -- 
> 2.1.0
> 
> 
Whats the disposition of this, I don't see it in the repository yet.

Neil

^ permalink raw reply	[relevance 5%]

* Re: [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD
  2015-02-27  4:56  3% ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Cunming Liang
  2015-02-27  4:56  2%   ` [dpdk-dev] [PATCH v6 5/8] ethdev: add rx interrupt enable/disable functions Cunming Liang
@ 2015-02-27  8:00  0%   ` Liu, Yong
  2015-05-05  5:39  3%   ` [dpdk-dev] From: Cunming Liang <cunming.liang@intel.com> Cunming Liang
  2015-05-05  5:53  3%   ` [dpdk-dev] [PATCH v7 00/10] Interrupt mode PMD Cunming Liang
  3 siblings, 0 replies; 200+ results
From: Liu, Yong @ 2015-02-27  8:00 UTC (permalink / raw)
  To: Liang, Cunming, dev

Tested-by: Yong Liu <yong.liu@intel.com>

- Tested Commit: 00c685634b8a43e4594e26949a6c4f1cf5b67047
- OS: Fedora20 3.15.8-200.fc20.x86_64
- GCC: gcc version 4.8.3 20140911 (Red Hat 4.8.3-7) (GCC)
- CPU: Intel(R) Xeon(R) CPU E5-2680 v2 @ 2.80GHz
- NIC: Intel Corporation 82599ES 10-Gigabit SFI/SFP+ Network Connection
- Default x86_64-native-linuxapp-gcc configuration
- Total 4 cases, 4 passed, 0 failed

- Case: interrupt pmd on PF with single queue
  Description: Check interrupt pmd work with single queue
  Command / instruction:
    Bind ports to vfio-pci.
      modprobe vfio
      modprobe vfio-pci
      ./tools/dpdk_nic_bind.py --bind=vfio-pci 0000:08:00.0 0000:08:00.1
    Start l3fwd-power with one queue per port.
      l3fwd-power -c 7 -n 4 -- -p 0x3 -P --config="(0,0,1),(1,0,2)"
    Send one packet to Port0 and Port1, check that thread on core1 and core2 
    waked up.
      L3FWD_POWER: lcore 1 is waked up from rx interrupt on port1,rxq0
      L3FWD_POWER: lcore 2 is waked up from rx interrupt on port1,rxq0
  Expected test result:
    l3fwd-power can forward packets normally and thread on core1 and core2 
    will sleep when there's no packet received.

- Case: interrupt pmd on PF with multi queue
  Description: Check interrupt pmd work with multiple queues
  Command / instruction:
    Start l3fwd-power with two queues per port.
      l3fwd-power -c 1f -n 4 -- -p 0x3 \
      --config="(0,0,1),(0,1,2)(1,0,3),(1,1,4)"
    Send packet with increased dest IP to Port0 and Port1, check that thread 
    on core1,core2,core3,core4 waked up.
      L3FWD_POWER: lcore 1 is waked up from rx interrupt on port1,rxq0
      L3FWD_POWER: lcore 2 is waked up from rx interrupt on port1,rxq1
      L3FWD_POWER: lcore 3 is waked up from rx interrupt on port1,rxq0
      L3FWD_POWER: lcore 4 is waked up from rx interrupt on port1,rxq1
  Expected test result:
    l3fwd-power can forward packets normally and thread on core1-core4 will 
	sleep when there's no packet received.

- Case: interrupt pmd on PF with max Rx queues
  Description: Check interrupt pmd work with maximum queues
  Command / instruction:
    Start l3fwd-power with 32 queues per port.
    l3fwd-power -c ffffffff -n 4 -- -p 0x3 -P --config="(0,0,0),(0,1,1),\
      (0,2,2),(0,3,3),(0,4,4),(0,5,5),(0,6,6),(0,7,7),(0,8,8),
      (0,9,9),(0,10,10),(0,11,11),(0,12,12),(0,13,13),(0,14,14),\
      (0,15,15),\
      (1,0,16),(1,1,17),(1,2,18),(1,3,19),(1,4,20),(1,5,21),(1,6,22),\
      (1,7,23),(1,8,24),(1,9,25),(1,10,26),(1,11,27),(1,12,28),\
      (1,13,29),(1,14,30),\(1,15,31)"
    Send packet with increased dest IP to Port0 and Port1, check that all 
    threads waked up.
  Expected test result:
    l3fwd-power can forward packets normally and thread on core1-core31
    will sleep when there's no packet received.
		
- Case: interrupt pmd on VF with single queue
  Description: Check interrupt pmd work on VF device
  Command / instruction:
    Bind ports to back to ixgbe driver.
      ./tools/dpdk_nic_bind.py --bind=ixgbe 0000:08:00.0 0000:08:00.1
    Create one VF per Port in host and make sure PF interface up
	  echo 1 > /sys/bus/pci/devices/0000\:08\:00.0/sriov_numvfs
      echo 1 > /sys/bus/pci/devices/0000\:08\:00.1/sriov_numvfs
      ifconfig p786p1 up
      ifconfig p786p2 up	  
    Bind VF device to vfio-pci.
      ./tools/dpdk_nic_bind.py --bind=vfio-pci 0000:08:10.0 0000:08:10.1
    Start l3fwd-power on host with one queue per port.
	  l3fwd-power -c 1f -n 4 -- -p 0x3 -P --config="(0,0,1),(1,0,2)"
    Send one packet to Port0 and Port1, check that thread on core1 and core2 
    waked up.
      L3FWD_POWER: lcore 1 is waked up from rx interrupt on port1,rxq0
      L3FWD_POWER: lcore 2 is waked up from rx interrupt on port1,rxq0
  Expected test result:
    l3fwd-power can forward packets normally on VF and thread on core1 and 
    core2 will sleep when there's no packet received.

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Cunming Liang
> Sent: Friday, February 27, 2015 12:56 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD
> 
> v6 changes
>  - split rte_intr_wait_rx_pkt into two APIs 'wait' and 'set'.
>  - rewrite rte_intr_rx_wait/rte_intr_rx_set.
>  - using vector number instead of queue_id as interrupt API params.
>  - patch reorder and split.
> 
> v5 changes
>  - Rebase the patchset onto the HEAD
>  - Isolate ethdev from EAL for new-added wait-for-rx interrupt function
>  - Export wait-for-rx interrupt function for shared libraries
>  - Split-off a new patch file for changed struct rte_intr_handle that
>    other patches depend on, to avoid breaking git bisect
>  - Change sample applicaiton to accomodate EAL function spec change
>    accordingly
> 
> v4 changes
>  - Export interrupt enable/disable functions for shared libraries
>  - Adjust position of new-added structure fields and functions to
>    avoid breaking ABI
> 
> v3 changes
>  - Add return value for interrupt enable/disable functions
>  - Move spinlok from PMD to L3fwd-power
>  - Remove unnecessary variables in e1000_mac_info
>  - Fix miscelleous review comments
> 
> v2 changes
>  - Fix compilation issue in Makefile for missed header file.
>  - Consolidate internal and community review comments of v1 patch set.
> 
> The patch series introduce low-latency one-shot rx interrupt into DPDK
> with
> polling and interrupt mode switch control example.
> 
> DPDK userspace interrupt notification and handling mechanism is based on
> UIO
> with below limitation:
> 1) It is designed to handle LSC interrupt only with inefficient suspended
>    pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling
> thread
>    which then wakes up DPDK polling thread). In this way, it introduces
>    non-deterministic wakeup latency for DPDK polling thread as well as
> packet
>    latency if it is used to handle Rx interrupt.
> 2) UIO only supports a single interrupt vector which has to been shared by
>    LSC interrupt and interrupts assigned to dedicated rx queues.
> 
> This patchset includes below features:
> 1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF
> only).
> 2) Build on top of the VFIO mechanism instead of UIO, so it could support
>    up to 64 interrupt vectors for rx queue interrupts.
> 3) Have 1 DPDK polling thread handle per Rx queue interrupt with a
> dedicated
>    VFIO eventfd, which eliminates non-deterministic pthread wakeup latency
> in
>    user space.
> 4) Demonstrate interrupts control APIs and userspace NAIP-like
> polling/interrupt
>    switch algorithms in L3fwd-power example.
> 
> Known limitations:
> 1) It does not work for UIO due to a single interrupt eventfd shared by
> LSC
>    and rx queue interrupt handlers causes a mess.
> 2) LSC interrupt is not supported by VF driver, so it is by default
> disabled
>    in L3fwd-power now. Feel free to turn in on if you want to support both
> LSC
>    and rx queue interrupts on a PF.
> 
> Cunming Liang (5):
>   eal: declare new interrupt api
>   eal/linux: add rx queue interrupt FDs to intr handle struct
>   eal/bsd: dummy for new intr definition
>   eal/linux: add per rx queue interrupt handling based on VFIO
>   ethdev: add rx interrupt enable/disable functions
> 
> Zhou, Danny (3):
>   ixgbe: enable rx queue interrupts for both PF and VF
>   igb: enable rx queue interrupts for PF
>   l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
>     switch
> 
>  examples/l3fwd-power/main.c                        | 194 ++++++++---
>  lib/librte_eal/bsdapp/eal/eal_interrupts.c         |  15 +
>  .../bsdapp/eal/include/exec-env/rte_interrupts.h   |   4 +
>  lib/librte_eal/bsdapp/eal/rte_eal_version.map      |   2 +
>  lib/librte_eal/common/include/rte_interrupts.h     |  38 +++
>  lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 224 +++++++++---
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  23 +-
>  .../linuxapp/eal/include/exec-env/rte_interrupts.h |   9 +
>  lib/librte_eal/linuxapp/eal/rte_eal_version.map    |   2 +
>  lib/librte_ether/rte_ethdev.c                      |  66 ++++
>  lib/librte_ether/rte_ethdev.h                      |  77 +++++
>  lib/librte_ether/rte_ether_version.map             |   3 +
>  lib/librte_pmd_e1000/e1000_ethdev.h                |   3 +
>  lib/librte_pmd_e1000/igb_ethdev.c                  | 231 +++++++++++--
>  lib/librte_pmd_ixgbe/ixgbe_ethdev.c                | 377
> ++++++++++++++++++++-
>  lib/librte_pmd_ixgbe/ixgbe_ethdev.h                |   7 +
>  16 files changed, 1156 insertions(+), 119 deletions(-)
> 
> --
> 1.8.1.4

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] [PATCH v6 5/8] ethdev: add rx interrupt enable/disable functions
  2015-02-27  4:56  3% ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Cunming Liang
@ 2015-02-27  4:56  2%   ` Cunming Liang
  2015-02-27  8:00  0%   ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Liu, Yong
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 200+ results
From: Cunming Liang @ 2015-02-27  4:56 UTC (permalink / raw)
  To: dev

Add three dev_ops functions to enable and disable rx queue interrupts; and to retrieve the vector num which the specified queue assosiated with.

Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
v6 changes
 - add rx_intr_vec_get to retrieve the vector num of the queue.

v5 changes
 - Rebase the patchset onto the HEAD

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Put new functions at the end of eth_dev_ops to avoid breaking ABI

v3 changes
 - Add return value for interrupt enable/disable functions

 lib/librte_ether/rte_ethdev.c          | 66 +++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.h          | 77 ++++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ether_version.map |  3 ++
 3 files changed, 146 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index bb94ccb..6654917 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -3320,6 +3320,72 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	}
 	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
+
+int
+rte_eth_dev_rx_intr_vec_get(uint8_t port_id, uint16_t queue_id,
+			    uint32_t *vec)
+{
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+
+	if (port_id >= nb_ports) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	intr_handle = &dev->pci_dev->intr_handle;
+	*vec = intr_handle->vec_num[queue_id];
+	return 0;
+}
+
+int
+rte_eth_dev_rx_intr_enable(uint8_t port_id,
+			   uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (port_id >= nb_ports) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+}
+
+int
+rte_eth_dev_rx_intr_disable(uint8_t port_id,
+			    uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (port_id >= nb_ports) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return -ENODEV;
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
+}
+
 #ifdef RTE_NIC_BYPASS
 int rte_eth_dev_bypass_init(uint8_t port_id)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 8db3127..9cdde82 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -825,6 +825,8 @@ struct rte_eth_fdir {
 struct rte_intr_conf {
 	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
 	uint16_t lsc;
+	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
+	uint16_t rxq;
 };
 
 /**
@@ -1030,6 +1032,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    const struct rte_eth_txconf *tx_conf);
 /**< @internal Setup a transmit queue of an Ethernet device. */
 
+typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
+
 typedef void (*eth_queue_release_t)(void *queue);
 /**< @internal Release memory resources allocated by given RX/TX queue. */
 
@@ -1381,6 +1391,10 @@ struct eth_dev_ops {
 	/** Get current RSS hash configuration. */
 	rss_hash_conf_get_t rss_hash_conf_get;
 	eth_filter_ctrl_t              filter_ctrl;          /**< common filter control*/
+
+	/** Enable/disable Rx queue interrupt. */
+	eth_rx_enable_intr_t       rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
+	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
 };
 
 /**
@@ -2846,6 +2860,69 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 				enum rte_eth_event_type event);
 
 /**
+ * When there is no rx packet coming in Rx Queue for a long time, we can
+ * sleep lcore related to RX Queue for power saving, and enable rx interrupt
+ * to be triggered when rx packect arrives.
+ *
+ * The rte_eth_dev_rx_intr_enable() function enables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_enable(uint8_t port_id,
+			       uint16_t queue_id);
+
+/**
+ * When lcore wakes up from rx interrupt indicating packet coming, disable rx
+ * interrupt and returns to polling mode.
+ *
+ * The rte_eth_dev_rx_intr_disable() function disables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_disable(uint8_t port_id,
+				uint16_t queue_id);
+
+/**
+ * It retrieves the interrupt vector number on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param vec
+ *   The interrupt vector number of the specified queue.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_vec_get(uint8_t port_id, uint16_t queue_id,
+				uint32_t *vec);
+
+/**
  * Turn on the LED on the Ethernet device.
  * This function turns on the LED on the Ethernet device.
  *
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 0d46578..7f93156 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -47,6 +47,9 @@ DPDK_2.0 {
 	rte_eth_dev_rss_hash_update;
 	rte_eth_dev_rss_reta_query;
 	rte_eth_dev_rss_reta_update;
+	rte_eth_dev_rx_intr_disable;
+	rte_eth_dev_rx_intr_enable;
+	rte_eth_dev_rx_intr_vec_get;
 	rte_eth_dev_rx_queue_start;
 	rte_eth_dev_rx_queue_stop;
 	rte_eth_dev_set_link_down;
-- 
1.8.1.4

^ permalink raw reply	[relevance 2%]

* [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD
  2015-02-23 16:55  3% [dpdk-dev] [PATCH v5 0/6] " Zhou Danny
  2015-02-23 16:55  3% ` [dpdk-dev] [PATCH v5 1/6] ethdev: add rx interrupt enable/disable functions Zhou Danny
@ 2015-02-27  4:56  3% ` Cunming Liang
  2015-02-27  4:56  2%   ` [dpdk-dev] [PATCH v6 5/8] ethdev: add rx interrupt enable/disable functions Cunming Liang
                     ` (3 more replies)
  1 sibling, 4 replies; 200+ results
From: Cunming Liang @ 2015-02-27  4:56 UTC (permalink / raw)
  To: dev

v6 changes
 - split rte_intr_wait_rx_pkt into two APIs 'wait' and 'set'.
 - rewrite rte_intr_rx_wait/rte_intr_rx_set.
 - using vector number instead of queue_id as interrupt API params.
 - patch reorder and split.

v5 changes
 - Rebase the patchset onto the HEAD
 - Isolate ethdev from EAL for new-added wait-for-rx interrupt function
 - Export wait-for-rx interrupt function for shared libraries
 - Split-off a new patch file for changed struct rte_intr_handle that
   other patches depend on, to avoid breaking git bisect
 - Change sample applicaiton to accomodate EAL function spec change
   accordingly

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Adjust position of new-added structure fields and functions to
   avoid breaking ABI
 
v3 changes
 - Add return value for interrupt enable/disable functions
 - Move spinlok from PMD to L3fwd-power
 - Remove unnecessary variables in e1000_mac_info
 - Fix miscelleous review comments
 
v2 changes
 - Fix compilation issue in Makefile for missed header file.
 - Consolidate internal and community review comments of v1 patch set.
 
The patch series introduce low-latency one-shot rx interrupt into DPDK with
polling and interrupt mode switch control example.
 
DPDK userspace interrupt notification and handling mechanism is based on UIO
with below limitation:
1) It is designed to handle LSC interrupt only with inefficient suspended
   pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
   which then wakes up DPDK polling thread). In this way, it introduces
   non-deterministic wakeup latency for DPDK polling thread as well as packet
   latency if it is used to handle Rx interrupt.
2) UIO only supports a single interrupt vector which has to been shared by
   LSC interrupt and interrupts assigned to dedicated rx queues.
 
This patchset includes below features:
1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF only).
2) Build on top of the VFIO mechanism instead of UIO, so it could support
   up to 64 interrupt vectors for rx queue interrupts.
3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
   VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
   user space.
4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
   switch algorithms in L3fwd-power example.

Known limitations:
1) It does not work for UIO due to a single interrupt eventfd shared by LSC
   and rx queue interrupt handlers causes a mess.
2) LSC interrupt is not supported by VF driver, so it is by default disabled
   in L3fwd-power now. Feel free to turn in on if you want to support both LSC
   and rx queue interrupts on a PF.

Cunming Liang (5):
  eal: declare new interrupt api
  eal/linux: add rx queue interrupt FDs to intr handle struct
  eal/bsd: dummy for new intr definition
  eal/linux: add per rx queue interrupt handling based on VFIO
  ethdev: add rx interrupt enable/disable functions

Zhou, Danny (3):
  ixgbe: enable rx queue interrupts for both PF and VF
  igb: enable rx queue interrupts for PF
  l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
    switch

 examples/l3fwd-power/main.c                        | 194 ++++++++---
 lib/librte_eal/bsdapp/eal/eal_interrupts.c         |  15 +
 .../bsdapp/eal/include/exec-env/rte_interrupts.h   |   4 +
 lib/librte_eal/bsdapp/eal/rte_eal_version.map      |   2 +
 lib/librte_eal/common/include/rte_interrupts.h     |  38 +++
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 224 +++++++++---
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  23 +-
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |   9 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map    |   2 +
 lib/librte_ether/rte_ethdev.c                      |  66 ++++
 lib/librte_ether/rte_ethdev.h                      |  77 +++++
 lib/librte_ether/rte_ether_version.map             |   3 +
 lib/librte_pmd_e1000/e1000_ethdev.h                |   3 +
 lib/librte_pmd_e1000/igb_ethdev.c                  | 231 +++++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c                | 377 ++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h                |   7 +
 16 files changed, 1156 insertions(+), 119 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] closing version 2.0.0-rc1
  2015-02-24 21:50  4% [dpdk-dev] closing version 2.0.0-rc1 Thomas Monjalon
  2015-02-25  0:56  0% ` Stephen Hemminger
@ 2015-02-27  0:42  0% ` Zhang, Helin
  1 sibling, 0 replies; 200+ results
From: Zhang, Helin @ 2015-02-27  0:42 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

Hi Thomas

For "unified packet type", it has been acked by Konstantin, and carefully reviewed by Oliver, and reviewed by Cunming, Bruce, Jingjing, etc.
The only open comment is that detailed description of each packet type should be added. I will complete it today and hopefully it can be in R2.0. Could you help to check it again and if it can be in R2.0? Thank you very much!

Regards,
Helin

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Wednesday, February 25, 2015 5:50 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] closing version 2.0.0-rc1
> 
> It is time to close the first release candidate for DPDK 2.0.
> Then we are going to enter into the RC testing phase which was planned from
> 23rd February to 13th March. Only fixes, and cleanups will be accepted.
> At the end, the release should be out on 31st March.
> 
> This is the list of the integrated features:
> 	* ABI versioning
> 	* x32 ABI
> 	* non-eal thread supports
> 	* multi-pthread per core
> 	* jobstats library
> 	* reorder library
> 	* acl for AVX2
> 	* crc hash arch-independent
> 	* uio_pci_generic support
> 	* kni optimizations
> 	* vhost-user support
> 	* virtio (link, vlan, mac, port IO, perf)
> 	* ixgbevf RSS
> 	* i40e hash filtering
> 	* i40e nvgre offloading
> 	* i40e switch
> 	* fm10k driver
> 	* bonding mode 4 tests
> 	* bonding mode 6
> 	* Rx/Tx callbacks
> 	* unified flow types
> 	* remove old filtering API (flow director, flex, ethertype, syn, ntuple)
> 	* remove device arguments limit
> 	* add indirect attached mbuf flag
> 	* use default port configuration in testpmd
> 	* tunnel offloading in testpmd
> 
> Some missing features may still be integrated in the last minute run:
> 	* hotplug
> 	* mlx4 driver
> 	* interrupt mode
> 	* sched enhancements
> 
> Some other features were submitted too late or haven't been properly
> reviewed, so they are postponed (deferred state in patchwork):
> 	* tile arch
> 	* ixgbe base update
> 	* i40e TSO
> 	* xen net-front driver
> 	* hyper-v driver
> 	* bnx2x driver
> 	* unified packet type
> 
> Thanks for your attention

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] [dpdk-announce] release candidate 2.0.0-rc1
@ 2015-02-26  0:12  4% Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-02-26  0:12 UTC (permalink / raw)
  To: announce

A new DPDK release candidate is ready for testing:
	http://dpdk.org/browse/dpdk/tag/?id=v2.0.0-rc1

This is the first release candidate for DPDK 2.0.
It means we are entering in the RC testing phase.
Only fixes and cleanups will be accepted.

Changelog (main changes since 1.8.0)
	- enhancements:
		* ABI versioning
		* x32 ABI
		* non-eal thread supports
		* multi-pthread per core
		* port hotplug
		* jobstats library
		* reorder library
		* memcpy optimization
		* acl for AVX2
		* crc hash arch-independent
		* uio_pci_generic support
		* kni optimizations
		* vhost-user support
		* virtio (link, vlan, mac, port IO, perf)
		* ixgbevf RSS
		* i40e hash filtering
		* i40e nvgre offloading
		* i40e switch
		* fm10k driver
		* mlx4 driver
		* bonding mode 4 tests
		* bonding mode 6
		* Rx/Tx callbacks
		* unified flow types
		* remove old filtering API (flow director, flex, ethertype, syn, ntuple)
		* remove device arguments limit
		* add indirect attached mbuf flag
		* use default port configuration in testpmd
		* tunnel offloading in testpmd
	- fixes for:
		* build
		* big endian
		* cmdline
		* timer
		* lpm
		* pipeline
		* bonding
		* pcap
		* ixgbe vector
		* i40e

We are making good progress in organization and reviewing.
Please let's continue to sustain the reviewing effort.
You are welcome to check pending patches from other developers
in http://dpdk.org/dev/patchwork and make some comments:
	http://dpdk.org/dev#review

Thank you everyone for making it possible.

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] closing version 2.0.0-rc1
  2015-02-24 21:50  4% [dpdk-dev] closing version 2.0.0-rc1 Thomas Monjalon
@ 2015-02-25  0:56  0% ` Stephen Hemminger
  2015-02-27  0:42  0% ` Zhang, Helin
  1 sibling, 0 replies; 200+ results
From: Stephen Hemminger @ 2015-02-25  0:56 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Tue, 24 Feb 2015 22:50:29 +0100
Thomas Monjalon <thomas.monjalon@6wind.com> wrote:

> It is time to close the first release candidate for DPDK 2.0.
> Then we are going to enter into the RC testing phase which was planned
> from 23rd February to 13th March. Only fixes, and cleanups will be accepted.
> At the end, the release should be out on 31st March.
> 
> This is the list of the integrated features:
> 	* ABI versioning
> 	* x32 ABI
> 	* non-eal thread supports
> 	* multi-pthread per core
> 	* jobstats library
> 	* reorder library
> 	* acl for AVX2
> 	* crc hash arch-independent
> 	* uio_pci_generic support
> 	* kni optimizations
> 	* vhost-user support
> 	* virtio (link, vlan, mac, port IO, perf)
> 	* ixgbevf RSS
> 	* i40e hash filtering
> 	* i40e nvgre offloading
> 	* i40e switch
> 	* fm10k driver
> 	* bonding mode 4 tests
> 	* bonding mode 6
> 	* Rx/Tx callbacks
> 	* unified flow types
> 	* remove old filtering API (flow director, flex, ethertype, syn, ntuple)
> 	* remove device arguments limit
> 	* add indirect attached mbuf flag
> 	* use default port configuration in testpmd
> 	* tunnel offloading in testpmd

I have a bunch more fixes to vmxnet3, but they are small.

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] closing version 2.0.0-rc1
@ 2015-02-24 21:50  4% Thomas Monjalon
  2015-02-25  0:56  0% ` Stephen Hemminger
  2015-02-27  0:42  0% ` Zhang, Helin
  0 siblings, 2 replies; 200+ results
From: Thomas Monjalon @ 2015-02-24 21:50 UTC (permalink / raw)
  To: dev

It is time to close the first release candidate for DPDK 2.0.
Then we are going to enter into the RC testing phase which was planned
from 23rd February to 13th March. Only fixes, and cleanups will be accepted.
At the end, the release should be out on 31st March.

This is the list of the integrated features:
	* ABI versioning
	* x32 ABI
	* non-eal thread supports
	* multi-pthread per core
	* jobstats library
	* reorder library
	* acl for AVX2
	* crc hash arch-independent
	* uio_pci_generic support
	* kni optimizations
	* vhost-user support
	* virtio (link, vlan, mac, port IO, perf)
	* ixgbevf RSS
	* i40e hash filtering
	* i40e nvgre offloading
	* i40e switch
	* fm10k driver
	* bonding mode 4 tests
	* bonding mode 6
	* Rx/Tx callbacks
	* unified flow types
	* remove old filtering API (flow director, flex, ethertype, syn, ntuple)
	* remove device arguments limit
	* add indirect attached mbuf flag
	* use default port configuration in testpmd
	* tunnel offloading in testpmd

Some missing features may still be integrated in the last minute run:
	* hotplug
	* mlx4 driver
	* interrupt mode
	* sched enhancements

Some other features were submitted too late or haven't been properly reviewed,
so they are postponed (deferred state in patchwork):
	* tile arch
	* ixgbe base update
	* i40e TSO
	* xen net-front driver
	* hyper-v driver
	* bnx2x driver
	* unified packet type

Thanks for your attention

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH 0/8] Improve build process
  2015-02-23 18:23  0%                               ` Neil Horman
@ 2015-02-24 13:24  0%                                 ` Gonzalez Monroy, Sergio
  0 siblings, 0 replies; 200+ results
From: Gonzalez Monroy, Sergio @ 2015-02-24 13:24 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On 23/02/2015 18:23, Neil Horman wrote:
> On Mon, Feb 23, 2015 at 02:58:30PM +0000, Gonzalez Monroy, Sergio wrote:
>> On 23/02/2015 13:52, Neil Horman wrote:
>>> On Mon, Feb 23, 2015 at 10:25:01AM +0000, Gonzalez Monroy, Sergio wrote:
>>>> On 22/02/2015 23:37, Neil Horman wrote:
>>>>> On Fri, Feb 20, 2015 at 02:31:36PM +0000, Gonzalez Monroy, Sergio wrote:
>>>>>> On 13/02/2015 12:51, Neil Horman wrote:
>>>>>>> On Fri, Feb 13, 2015 at 11:08:02AM +0000, Gonzalez Monroy, Sergio wrote:
>>>>>>>> On 13/02/2015 10:14, Panu Matilainen wrote:
>>>>>>>>> On 02/12/2015 05:52 PM, Neil Horman wrote:
>>>>>>>>>> On Thu, Feb 12, 2015 at 04:07:50PM +0200, Panu Matilainen wrote:
>>>>>>>>>>> On 02/12/2015 02:23 PM, Neil Horman wrote:
>>>>>>>>> [...snip...]
>>>>>>>>>>>>>>> So I just realized that I was not having into account a possible
>>>>>>>>>>>>>>> scenario, where
>>>>>>>>>>>>>>> we have an app built with static dpdk libs then loading a dso
>>>>>>>>>>>>>>> with -d
>>>>>>>>>>>>>>> option.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> In such case, because the pmd would have DT_NEEDED entries,
>>>>>>>>>>>>>>> dlopen will
>>>>>>>>>>>>>>> fail.
>>>>>>>>>>>>>>> So to enable such scenario we would need to build PMDs without
>>>>>>>>>>>>>>> DT_NEEDED
>>>>>>>>>>>>>>> entries.
>>>>>>>>>>>>>> Hmm, for that to be a problem you'd need to have the PMD built
>>>>>>>>>>>>>> against
>>>>>>>>>>>>>> shared dpdk libs and while the application is built against
>>>>>>>>>>>>>> static dpdk
>>>>>>>>>>>>>> libs. I dont think that's a supportable scenario in any case.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Or is there some other scenario that I'm not seeing?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>     - Panu -
>>>>>>>>>>>>>>
>>>>>>>>>>>>> I agree with you. I suppose it comes down to, do we want to
>>>>>>>>>>>>> support such
>>>>>>>>>>>>> scenario?
>>>>>>>>>>>>>
>>>>>>>>>>>>>  From what I can see, it seems that we do currently support such
>>>>>>>>>>>>> scenario by
>>>>>>>>>>>>> building dpdk apps against all static dpdk libs using
>>>>>>>>>>>>> --whole-archive (all
>>>>>>>>>>>>> libs and not only PMDs).
>>>>>>>>>>>>> http://dpdk.org/browse/dpdk/commit/?id=20afd76a504155e947c770783ef5023e87136ad8
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Am I misunderstanding this?
>>>>>>>>>>>>>
>>>>>>>>>>>> Shoot, you're right, I missed the static build aspect to this.  Yes,
>>>>>>>>>>>> if we do the following:
>>>>>>>>>>>>
>>>>>>>>>>>> 1) Build the DPDK as a static library
>>>>>>>>>>>> 2) Link an application against (1)
>>>>>>>>>>>> 3) Use the dlopen mechanism to load a PMD built as a DSO
>>>>>>>>>>>>
>>>>>>>>>>>> Then the DT_NEEDED entries in the DSO will go unsatisfied, because
>>>>>>>>>>>> the shared
>>>>>>>>>>>> objects on which it (the PMD) depends will not exist in the file
>>>>>>>>>>>> system.
>>>>>>>>>>> I think its even more twisty:
>>>>>>>>>>>
>>>>>>>>>>> 1) Build the DPDK as a static library
>>>>>>>>>>> 2) Link an application against (1)
>>>>>>>>>>> 3) Do another build of DPDK as a shared library
>>>>>>>>>>> 4) In app 2), use the dlopen mechanism to load a PMD built as a part
>>>>>>>>>>> of or
>>>>>>>>>>> against 3)
>>>>>>>>>>>
>>>>>>>>>>> Somehow I doubt this would work very well.
>>>>>>>>>>>
>>>>>>>>>> Ideally it should, presuming the ABI is preserved between (1) and (3),
>>>>>>>>>> though I
>>>>>>>>>> agree, up until recently, that was an assumption that was unreliable.
>>>>>>>>> Versioning is a big and important step towards reliability but there are
>>>>>>>>> more issues to solve. This of course getting pretty far from the original
>>>>>>>>> topic, but at least one such issue is that there are some cases where a
>>>>>>>>> config value affects what are apparently public structs (rte_mbuf wrt
>>>>>>>>> RTE_MBUF_REFCNT for example), which really is a no-go.
>>>>>>>>>
>>>>>>>> Agree, the RTE_MBUF_REFCNT is something that needs to be dealt with asap.
>>>>>>>> I'll look into it.
>>>>>>>>
>>>>>>>>>>>> I think the problem is a little bit orthogonal to the libdpdk_core
>>>>>>>>>>>> problem you
>>>>>>>>>>>> were initially addressing.  That is to say, this problem of
>>>>>>>>>>>> dlopen-ed PMD's
>>>>>>>>>>>> exists regardless of weather you build the DPDK as part of a static
>>>>>>>>>>>> or dynamic
>>>>>>>>>>>> library.  The problems just happen to intersect in their
>>>>>>>>>>>> manipulation of the
>>>>>>>>>>>> DT_NEEDED entries.
>>>>>>>>>>>>
>>>>>>>>>>>> Ok, so, given the above, I would say your approach is likely
>>>>>>>>>>>> correct, just
>>>>>>>>>>>> prevent DT_NEEDED entries from getting added to PMD's. Doing so will
>>>>>>>>>>>> sidestep
>>>>>>>>>>>> loading issue for libraries that may not exist in the filesystem,
>>>>>>>>>>>> but thats ok,
>>>>>>>>>>>> because by all rights, the symbols codified in those needed
>>>>>>>>>>>> libraries should
>>>>>>>>>>>> already be present in the running application (either made available
>>>>>>>>>>>> by the
>>>>>>>>>>>> application having statically linked them, or having the linker load
>>>>>>>>>>>> them from
>>>>>>>>>>>> the proper libraries at run time).
>>>>>>>>>>> My 5c is that I'd much rather see the common case (all static or all
>>>>>>>>>>> shared)
>>>>>>>>>>> be simple and reliable, which in case of DSOs includes no lying
>>>>>>>>>>> (whether by
>>>>>>>>>>> omission or otherwise) about DT_NEEDED, ever. That way the issue is
>>>>>>>>>>> dealt
>>>>>>>>>>> once where it belongs. If somebody wants to go down the rabbit hole of
>>>>>>>>>>> mixed
>>>>>>>>>>> shared + static linkage, let them dig the hole by themselves :)
>>>>>>>>>>>
>>>>>>>>>> This is a fair point.  Can DT_NEEDED sections be stripped via tools like
>>>>>>>>>> objcopy
>>>>>>>>>> after the build is complete?  If so, end users can hack this corner case
>>>>>>>>>> to work
>>>>>>>>>> as needed.
>>>>>>>>> Patchelf (http://nixos.org/patchelf.html) appears to support that, but
>>>>>>>>> given that source is available it'd be easier to just modify the makefiles
>>>>>>>>> if that's really needed.
>>>>>>>>>
>>>>>>>> I think we agree on the issue.
>>>>>>>>
>>>>>>>> So I'll be sending a patch to add DT_NEEDED entries to all libraries and
>>>>>>>> PMDs. The only exception would be librte_eal, which would not have proper
>>>>>>>> NEEDED entries.
>>>>>>>> Do we bother adding a linker script for librte_eal that would include
>>>>>>>> dependent libraries?
>>>>>>>>
>>>>>>> I say yes to the linker script, but will happily bow to an alternate consensus
>>>>>>> Neil
>>>>>>>
>>>>>> So the case we want to solve is the following circular dependencies:
>>>>>> eal             -> mempool, malloc
>>>>>> mempool -> eal , malloc, ring
>>>>>> malloc      -> eal
>>>>>> ring           -> eal, malloc
>>>>>>
>>>>>> We cannot write/create the proposed (below) linker script at least until we
>>>>>> have built mempool and malloc.
>>>>>> INPUT ( -lrte_eal.so -lrte_mempool -lrte_malloc )
>>>>>>
>>>>> Not sure I understand why you have a build time dependency on this.  Link time
>>>>> perhaps, but not build time.  Or am I reading too much into your use of the term
>>>>> 'built' above?
>>>> I meant 'built' as compiled + linked. Am I misusing the term?
>>> No, you're not (though I misused the term link time above, I meant to say load
>>> time).  So you're saying that when you build shared libraries, you get linker
>>> errors indicating that, during the build, you're missing symbols, is that
>>> correct?  I guess I'm confused because I don't see how thats not happening for
>>> everyone, right now.  In other words, I'm not sure what about your changes is
>>> giving rise to that problem.
>>>
>>>>>> Few ways I have thought about implementing this (not particularly fond of
>>>>>> any of them) :
>>>>>>   - Have the linker script file in the repo (scripts/ ?) in a fixed location
>>>>>> and just copy it to $(RTE_OUTPUT)/lib/ once all libs have finished building.
>>>>>>   - Generate the file on build time from a defined make variable once all
>>>>>> libs have finished
>>>>>>
>>>>> I'm still not sure I understand.  Why does this dependency exist at build time?
>>>>> The dependency between malloc and eal shouldn't be a problem during the build,
>>>>> as symbols from each other should just remain undefined, and get resolved at
>>>>> load time.
>>>> Is that not the way it is currently implemented?
>>>> I get the impression that we are talking about different goals (correct me
>>>> if it is not the case)
>>>>
>>> We may well be, I'm not sure yet.
>>>
>>>> I thought that the agreed solution was to:
>>>> 1) NOT to create/generate a 'core' library
>>>> 2) Add DT_NEEDED entries for all libraries (except eal which is the first
>>>> library we link)
>>>> 3) Use linker script for eal
>>>>
>>> Ok, we're definately on the same page, as thats what I thought the goal was as
>>> well.
>>>
>>>> Given the previously mentioned circular dependencies between eal, mempool,
>>>> malloc and ring:
>>>> - eal would not be linked against other libraries (no NEEDED entries)
>>>> - malloc is linked against eal (previously built), so malloc would have a
>>>> NEEDED entry for eal.
>>>>
>>>> In that scenario, if the linker script is setup/created after we build eal,
>>>> then when we try to link malloc
>>>> against eal, the linker will pull mempool and malloc too (because we
>>>> included them in the linker script).
>>>> Therefore, the link fails as none of those libraries (malloc and mempool)
>>>> have been built yet.
>>>>
>>> Ah, I see now, I wasn't thinking about the extra requirements that DT_NEEDED
>>> entries placed on the build conditions.
>>>
>>> I see now, apologies for being dense previously.  Given what you indicate I
>>> would say that the solution here is to link the libraries against individual
>>> other specific libraries, not the core library that you generate as a linker
>>> script.  That way you avoid the circular dependency, and the core library just
>>> becomes a convienience for application developers looking to link to a single
>>> library.
>>>
>> I'm not sure I quite understand your suggestion.
>>
>> Could you roughly describe steps for building eal, malloc and mempool libs ?
>> For example, something like this?
>> 1) build eal, which creates librte_eal.so.1
>> 2) write linker script for librte_eal.so
>> 3) build malloc against eal (-lrte_eal )
>> etc
> Hm, so I spent a bit of time looking at this, and your right, I thought this was
> really just a artifact of the introduction of --as-needed to the build to force
> DT_NEEDED entries, and my suggestion was that you simply not link the libraries
> that were causing the circular dependency.  I had assumed that the link
> directives included -lrte_malloc -lrte_mempool for the eal library, but they
> weren't really needed, so you could remove them and it would work out.
>
> Unfortunately that turns out to not be the case.  librte_eal does explicitly use
> calls in librte_malloc, and vice versa.  The current use of -no-as-needed in the
> build system (which I was previously unaware of), is a hack to avoid having to
> address that problem.
>
> That throws a monkey wrench into this plan.  I would see 3 ways forward:
>
> 1) Fix the problem - That is to say, remove the use of --no-as-needed from the
> build, and address the circular dependencies that arise.  This could/will mean
> actually merging libraries with circular dependencies into a single library, as
> they should be so that they can completely resolve all of the symbols they use
> at link time
>
> 2) Ignore the problem.  If we just keep the lack of DT_NEEDED entries in place,
> I think the problem goes away, and we can continue on.
>
> I think option 1 is likely the more correct approach, as removing DT_NEEDED to
> avoid a circuar depenency is a hack, but it may not be the most pragmatic
> approach.  just living without DT_NEEDED entries and documenting link time needs
> will certainly be faster, and might be the better course of action, especially
> if we provide a 'core' pseudo library/linker script that embodies that action
> for the end user.
>
> Neil
>
So basically for 1) the approach of creating a core library would be a 
solution.
The last suggestion for the core library was to not merge the sources 
but generate a single library.
The problem with that was the versioning wouldn't work as it currently 
is, given that is per library.
So if we were to create a core library, we just need to merge the 
version.map files of each library
into a single version.map for the core library. This approach, as you 
noted, would be the proper fix.

The other solution would be to just leave eal without DT_NEEDED entries 
and specify in the docs
that apps require eal, mempool, malloc and ring to be link such as:
--no-as-needed -lrte_eal -lrte_mempool -lrte_malloc -lrte_ring --as-needed
With those flags it should work regardless of --as-needed being set 
before hand.

With this second approach we would have all libraries, but eal, with 
DT_NEEDED entries and we
would not need to create a core library. We don't really need to create 
a linker script for that
(not sure we can even create such linker script with those flags) and 
just documenting link time
needs as you mentioned should be enough.

So should I go forward with last suggested approach?

Regards,
Sergio
>> I suppose that another way to go about this, instead of creating the linker
>> script that pulls
>> dependent libraries, is to always link (using --no-as-needed in case gcc
>> adds it by default)
>> against these libraries (eal, mempool, malloc, and ring) with necessary doc
>> about how to build apps.
>>
>> Sergio
>>> Neil
>>>
>>>> Was your suggestion to leave all of these libraries (eal, mempool, malloc,
>>>> ring) without NEEDED entries?
>>>>
>>> No, you can add NEEDED entries there, they will just be for the individual
>>> libraries, not the core linker script library.
>>>
>>> Best
>>> Neil
>>>
>>>> Regards,
>>>> Sergio
>>>>> What is the error you are getting?
>>>>>
>>>>> Best
>>>>> Neil
>>>>>
>>>>>> Thoughts? any other approached is more than welcome!
>>>>>>
>>>>>> Sergio
>>>>>>
>>>>>> PS: Thinking again on the core library and the issue of having multiple
>>>>>> version.map files, we could have a core_version.map instead instead of
>>>>>> multiple files per core library (eal, mempool, etc)
>>>>>>
>>>>>>
>>>>
>>
>>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v6 0/3] DPDK ethdev callback support
  2015-02-23 18:30  4%   ` [dpdk-dev] [PATCH v6 " John McNamara
@ 2015-02-23 23:39  0%     ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-02-23 23:39 UTC (permalink / raw)
  To: John McNamara, bruce.richardson; +Cc: dev

2015-02-23 18:30, John McNamara:
> This patchset is for a small optional addition to the ethdev library,
> to add support for callbacks at the RX and TX stages. This allows
> packet processing to be done on packets before they get returned
> to applications using rte_eth_rx_burst call.
> 
> See the RFC cover letter for the use cases:
> 
>     http://dpdk.org/ml/archives/dev/2014-December/010491.html
> 
> For this version we spent some time investigating Stephen Hemminger's
> suggestion of using the userspace RCU (read-copy-update) library for
> SMP safety:
> 
>    http://urcu.so/
> 
> The default liburcu (which defaulted to liburcu-mb) requires the least
> interaction from the end user but showed a 25% drop in packet throughput
> in the callback sample app.
> 
> The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
> throughput in the callback sample app. However it requires registered
> RCU threads in the program to periodically announce quiescent states.
> This makes it more difficult to implement for end user applications.
> 
> For this release we will document that adding and removing callbacks
> is not thread safe.
> 
> Note: Sample application documentation to follow in a patch update.
> 
> Version 6 changes:
>     * RX/TX callback functions are no longer #ifdefed out if callback
>       option is off. Instead they return ENOTSUP.
>     * Simplified callbacks #ifdefs in rte_ethdev.
> 
> Version 5 changes:
>     * Turned the callback feature on by default.
>     * Simplified #define name.
> 
> Version 4 changes:
>     * Made the callback feature a compile time option.
> 
> Version 3 changes:
>     * Removed unnecessary header file from example folder
>       (which included baremetal reference).
>     * Renamed the interrupt, RX and TX callbacks to make their function
>       clearer (using the names suggested in the mailing list comments).
>     * Squashed ABI version update into the commit it relates to.
>     * Fixed various checkpatch warnings.
> 
> Version 2 changes:
>     * Added ABI versioning.
>     * Doxygen clarifications.
> 
> Version 1 changes:
>     * Added callback removal functions.
>     * Minor fixes.
> 
> Richardson, Bruce (3):
>   ethdev: rename callbacks field to link_intr_cbs
>   ethdev: add optional rxtx callback support
>   examples: example showing use of callbacks.

Applied, thanks

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] [PATCH v6 0/3] DPDK ethdev callback support
  2015-02-18 17:42  4% ` [dpdk-dev] [PATCH v3 " John McNamara
  2015-02-19 17:56  4%   ` [dpdk-dev] [PATCH v4 " John McNamara
  2015-02-20 17:03  4%   ` [dpdk-dev] [PATCH v5 " John McNamara
@ 2015-02-23 18:30  4%   ` John McNamara
  2015-02-23 23:39  0%     ` Thomas Monjalon
  2 siblings, 1 reply; 200+ results
From: John McNamara @ 2015-02-23 18:30 UTC (permalink / raw)
  To: dev

This patchset is for a small optional addition to the ethdev library,
to add support for callbacks at the RX and TX stages. This allows
packet processing to be done on packets before they get returned
to applications using rte_eth_rx_burst call.

See the RFC cover letter for the use cases:

    http://dpdk.org/ml/archives/dev/2014-December/010491.html

For this version we spent some time investigating Stephen Hemminger's
suggestion of using the userspace RCU (read-copy-update) library for
SMP safety:

   http://urcu.so/

The default liburcu (which defaulted to liburcu-mb) requires the least
interaction from the end user but showed a 25% drop in packet throughput
in the callback sample app.

The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
throughput in the callback sample app. However it requires registered
RCU threads in the program to periodically announce quiescent states.
This makes it more difficult to implement for end user applications.

For this release we will document that adding and removing callbacks
is not thread safe.

Note: Sample application documentation to follow in a patch update.

Version 6 changes:
    * RX/TX callback functions are no longer #ifdefed out if callback
      option is off. Instead they return ENOTSUP.
    * Simplified callbacks #ifdefs in rte_ethdev.

Version 5 changes:
    * Turned the callback feature on by default.
    * Simplified #define name.

Version 4 changes:
    * Made the callback feature a compile time option.

Version 3 changes:
    * Removed unnecessary header file from example folder
      (which included baremetal reference).
    * Renamed the interrupt, RX and TX callbacks to make their function
      clearer (using the names suggested in the mailing list comments).
    * Squashed ABI version update into the commit it relates to.
    * Fixed various checkpatch warnings.

Version 2 changes:
    * Added ABI versioning.
    * Doxygen clarifications.

Version 1 changes:
    * Added callback removal functions.
    * Minor fixes.


Richardson, Bruce (3):
  ethdev: rename callbacks field to link_intr_cbs
  ethdev: add optional rxtx callback support
  examples: example showing use of callbacks.

 MAINTAINERS                            |    4 +
 app/test/virtual_pmd.c                 |    2 +-
 config/common_bsdapp                   |    1 +
 config/common_linuxapp                 |    1 +
 examples/Makefile                      |    1 +
 examples/rxtx_callbacks/Makefile       |   57 ++++++++
 examples/rxtx_callbacks/main.c         |  228 ++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.c          |  216 ++++++++++++++++++++++++++++--
 lib/librte_ether/rte_ethdev.h          |  203 ++++++++++++++++++++++++++++-
 lib/librte_ether/rte_ether_version.map |    4 +
 lib/librte_pmd_bond/rte_eth_bond_api.c |    2 +-
 lib/librte_pmd_ring/rte_eth_ring.c     |    2 +-
 12 files changed, 706 insertions(+), 15 deletions(-)
 create mode 100644 examples/rxtx_callbacks/Makefile
 create mode 100644 examples/rxtx_callbacks/main.c

-- 
1.7.4.1

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH 0/8] Improve build process
  2015-02-23 14:58  0%                             ` Gonzalez Monroy, Sergio
@ 2015-02-23 18:23  0%                               ` Neil Horman
  2015-02-24 13:24  0%                                 ` Gonzalez Monroy, Sergio
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-02-23 18:23 UTC (permalink / raw)
  To: Gonzalez Monroy, Sergio; +Cc: dev

On Mon, Feb 23, 2015 at 02:58:30PM +0000, Gonzalez Monroy, Sergio wrote:
> On 23/02/2015 13:52, Neil Horman wrote:
> >On Mon, Feb 23, 2015 at 10:25:01AM +0000, Gonzalez Monroy, Sergio wrote:
> >>On 22/02/2015 23:37, Neil Horman wrote:
> >>>On Fri, Feb 20, 2015 at 02:31:36PM +0000, Gonzalez Monroy, Sergio wrote:
> >>>>On 13/02/2015 12:51, Neil Horman wrote:
> >>>>>On Fri, Feb 13, 2015 at 11:08:02AM +0000, Gonzalez Monroy, Sergio wrote:
> >>>>>>On 13/02/2015 10:14, Panu Matilainen wrote:
> >>>>>>>On 02/12/2015 05:52 PM, Neil Horman wrote:
> >>>>>>>>On Thu, Feb 12, 2015 at 04:07:50PM +0200, Panu Matilainen wrote:
> >>>>>>>>>On 02/12/2015 02:23 PM, Neil Horman wrote:
> >>>>>>>[...snip...]
> >>>>>>>>>>>>>So I just realized that I was not having into account a possible
> >>>>>>>>>>>>>scenario, where
> >>>>>>>>>>>>>we have an app built with static dpdk libs then loading a dso
> >>>>>>>>>>>>>with -d
> >>>>>>>>>>>>>option.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>In such case, because the pmd would have DT_NEEDED entries,
> >>>>>>>>>>>>>dlopen will
> >>>>>>>>>>>>>fail.
> >>>>>>>>>>>>>So to enable such scenario we would need to build PMDs without
> >>>>>>>>>>>>>DT_NEEDED
> >>>>>>>>>>>>>entries.
> >>>>>>>>>>>>Hmm, for that to be a problem you'd need to have the PMD built
> >>>>>>>>>>>>against
> >>>>>>>>>>>>shared dpdk libs and while the application is built against
> >>>>>>>>>>>>static dpdk
> >>>>>>>>>>>>libs. I dont think that's a supportable scenario in any case.
> >>>>>>>>>>>>
> >>>>>>>>>>>>Or is there some other scenario that I'm not seeing?
> >>>>>>>>>>>>
> >>>>>>>>>>>>    - Panu -
> >>>>>>>>>>>>
> >>>>>>>>>>>I agree with you. I suppose it comes down to, do we want to
> >>>>>>>>>>>support such
> >>>>>>>>>>>scenario?
> >>>>>>>>>>>
> >>>>>>>>>>> From what I can see, it seems that we do currently support such
> >>>>>>>>>>>scenario by
> >>>>>>>>>>>building dpdk apps against all static dpdk libs using
> >>>>>>>>>>>--whole-archive (all
> >>>>>>>>>>>libs and not only PMDs).
> >>>>>>>>>>>http://dpdk.org/browse/dpdk/commit/?id=20afd76a504155e947c770783ef5023e87136ad8
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>Am I misunderstanding this?
> >>>>>>>>>>>
> >>>>>>>>>>Shoot, you're right, I missed the static build aspect to this.  Yes,
> >>>>>>>>>>if we do the following:
> >>>>>>>>>>
> >>>>>>>>>>1) Build the DPDK as a static library
> >>>>>>>>>>2) Link an application against (1)
> >>>>>>>>>>3) Use the dlopen mechanism to load a PMD built as a DSO
> >>>>>>>>>>
> >>>>>>>>>>Then the DT_NEEDED entries in the DSO will go unsatisfied, because
> >>>>>>>>>>the shared
> >>>>>>>>>>objects on which it (the PMD) depends will not exist in the file
> >>>>>>>>>>system.
> >>>>>>>>>I think its even more twisty:
> >>>>>>>>>
> >>>>>>>>>1) Build the DPDK as a static library
> >>>>>>>>>2) Link an application against (1)
> >>>>>>>>>3) Do another build of DPDK as a shared library
> >>>>>>>>>4) In app 2), use the dlopen mechanism to load a PMD built as a part
> >>>>>>>>>of or
> >>>>>>>>>against 3)
> >>>>>>>>>
> >>>>>>>>>Somehow I doubt this would work very well.
> >>>>>>>>>
> >>>>>>>>Ideally it should, presuming the ABI is preserved between (1) and (3),
> >>>>>>>>though I
> >>>>>>>>agree, up until recently, that was an assumption that was unreliable.
> >>>>>>>Versioning is a big and important step towards reliability but there are
> >>>>>>>more issues to solve. This of course getting pretty far from the original
> >>>>>>>topic, but at least one such issue is that there are some cases where a
> >>>>>>>config value affects what are apparently public structs (rte_mbuf wrt
> >>>>>>>RTE_MBUF_REFCNT for example), which really is a no-go.
> >>>>>>>
> >>>>>>Agree, the RTE_MBUF_REFCNT is something that needs to be dealt with asap.
> >>>>>>I'll look into it.
> >>>>>>
> >>>>>>>>>>I think the problem is a little bit orthogonal to the libdpdk_core
> >>>>>>>>>>problem you
> >>>>>>>>>>were initially addressing.  That is to say, this problem of
> >>>>>>>>>>dlopen-ed PMD's
> >>>>>>>>>>exists regardless of weather you build the DPDK as part of a static
> >>>>>>>>>>or dynamic
> >>>>>>>>>>library.  The problems just happen to intersect in their
> >>>>>>>>>>manipulation of the
> >>>>>>>>>>DT_NEEDED entries.
> >>>>>>>>>>
> >>>>>>>>>>Ok, so, given the above, I would say your approach is likely
> >>>>>>>>>>correct, just
> >>>>>>>>>>prevent DT_NEEDED entries from getting added to PMD's. Doing so will
> >>>>>>>>>>sidestep
> >>>>>>>>>>loading issue for libraries that may not exist in the filesystem,
> >>>>>>>>>>but thats ok,
> >>>>>>>>>>because by all rights, the symbols codified in those needed
> >>>>>>>>>>libraries should
> >>>>>>>>>>already be present in the running application (either made available
> >>>>>>>>>>by the
> >>>>>>>>>>application having statically linked them, or having the linker load
> >>>>>>>>>>them from
> >>>>>>>>>>the proper libraries at run time).
> >>>>>>>>>My 5c is that I'd much rather see the common case (all static or all
> >>>>>>>>>shared)
> >>>>>>>>>be simple and reliable, which in case of DSOs includes no lying
> >>>>>>>>>(whether by
> >>>>>>>>>omission or otherwise) about DT_NEEDED, ever. That way the issue is
> >>>>>>>>>dealt
> >>>>>>>>>once where it belongs. If somebody wants to go down the rabbit hole of
> >>>>>>>>>mixed
> >>>>>>>>>shared + static linkage, let them dig the hole by themselves :)
> >>>>>>>>>
> >>>>>>>>This is a fair point.  Can DT_NEEDED sections be stripped via tools like
> >>>>>>>>objcopy
> >>>>>>>>after the build is complete?  If so, end users can hack this corner case
> >>>>>>>>to work
> >>>>>>>>as needed.
> >>>>>>>Patchelf (http://nixos.org/patchelf.html) appears to support that, but
> >>>>>>>given that source is available it'd be easier to just modify the makefiles
> >>>>>>>if that's really needed.
> >>>>>>>
> >>>>>>I think we agree on the issue.
> >>>>>>
> >>>>>>So I'll be sending a patch to add DT_NEEDED entries to all libraries and
> >>>>>>PMDs. The only exception would be librte_eal, which would not have proper
> >>>>>>NEEDED entries.
> >>>>>>Do we bother adding a linker script for librte_eal that would include
> >>>>>>dependent libraries?
> >>>>>>
> >>>>>I say yes to the linker script, but will happily bow to an alternate consensus
> >>>>>Neil
> >>>>>
> >>>>So the case we want to solve is the following circular dependencies:
> >>>>eal             -> mempool, malloc
> >>>>mempool -> eal , malloc, ring
> >>>>malloc      -> eal
> >>>>ring           -> eal, malloc
> >>>>
> >>>>We cannot write/create the proposed (below) linker script at least until we
> >>>>have built mempool and malloc.
> >>>>INPUT ( -lrte_eal.so -lrte_mempool -lrte_malloc )
> >>>>
> >>>Not sure I understand why you have a build time dependency on this.  Link time
> >>>perhaps, but not build time.  Or am I reading too much into your use of the term
> >>>'built' above?
> >>I meant 'built' as compiled + linked. Am I misusing the term?
> >No, you're not (though I misused the term link time above, I meant to say load
> >time).  So you're saying that when you build shared libraries, you get linker
> >errors indicating that, during the build, you're missing symbols, is that
> >correct?  I guess I'm confused because I don't see how thats not happening for
> >everyone, right now.  In other words, I'm not sure what about your changes is
> >giving rise to that problem.
> >
> >>>>Few ways I have thought about implementing this (not particularly fond of
> >>>>any of them) :
> >>>>  - Have the linker script file in the repo (scripts/ ?) in a fixed location
> >>>>and just copy it to $(RTE_OUTPUT)/lib/ once all libs have finished building.
> >>>>  - Generate the file on build time from a defined make variable once all
> >>>>libs have finished
> >>>>
> >>>I'm still not sure I understand.  Why does this dependency exist at build time?
> >>>The dependency between malloc and eal shouldn't be a problem during the build,
> >>>as symbols from each other should just remain undefined, and get resolved at
> >>>load time.
> >>Is that not the way it is currently implemented?
> >>I get the impression that we are talking about different goals (correct me
> >>if it is not the case)
> >>
> >We may well be, I'm not sure yet.
> >
> >>I thought that the agreed solution was to:
> >>1) NOT to create/generate a 'core' library
> >>2) Add DT_NEEDED entries for all libraries (except eal which is the first
> >>library we link)
> >>3) Use linker script for eal
> >>
> >Ok, we're definately on the same page, as thats what I thought the goal was as
> >well.
> >
> >>Given the previously mentioned circular dependencies between eal, mempool,
> >>malloc and ring:
> >>- eal would not be linked against other libraries (no NEEDED entries)
> >>- malloc is linked against eal (previously built), so malloc would have a
> >>NEEDED entry for eal.
> >>
> >>In that scenario, if the linker script is setup/created after we build eal,
> >>then when we try to link malloc
> >>against eal, the linker will pull mempool and malloc too (because we
> >>included them in the linker script).
> >>Therefore, the link fails as none of those libraries (malloc and mempool)
> >>have been built yet.
> >>
> >Ah, I see now, I wasn't thinking about the extra requirements that DT_NEEDED
> >entries placed on the build conditions.
> >
> >I see now, apologies for being dense previously.  Given what you indicate I
> >would say that the solution here is to link the libraries against individual
> >other specific libraries, not the core library that you generate as a linker
> >script.  That way you avoid the circular dependency, and the core library just
> >becomes a convienience for application developers looking to link to a single
> >library.
> >
> I'm not sure I quite understand your suggestion.
> 
> Could you roughly describe steps for building eal, malloc and mempool libs ?
> For example, something like this?
> 1) build eal, which creates librte_eal.so.1
> 2) write linker script for librte_eal.so
> 3) build malloc against eal (-lrte_eal )
> etc

Hm, so I spent a bit of time looking at this, and your right, I thought this was
really just a artifact of the introduction of --as-needed to the build to force
DT_NEEDED entries, and my suggestion was that you simply not link the libraries
that were causing the circular dependency.  I had assumed that the link
directives included -lrte_malloc -lrte_mempool for the eal library, but they
weren't really needed, so you could remove them and it would work out.

Unfortunately that turns out to not be the case.  librte_eal does explicitly use
calls in librte_malloc, and vice versa.  The current use of -no-as-needed in the
build system (which I was previously unaware of), is a hack to avoid having to
address that problem.

That throws a monkey wrench into this plan.  I would see 3 ways forward:

1) Fix the problem - That is to say, remove the use of --no-as-needed from the
build, and address the circular dependencies that arise.  This could/will mean
actually merging libraries with circular dependencies into a single library, as
they should be so that they can completely resolve all of the symbols they use
at link time

2) Ignore the problem.  If we just keep the lack of DT_NEEDED entries in place,
I think the problem goes away, and we can continue on.

I think option 1 is likely the more correct approach, as removing DT_NEEDED to
avoid a circuar depenency is a hack, but it may not be the most pragmatic
approach.  just living without DT_NEEDED entries and documenting link time needs
will certainly be faster, and might be the better course of action, especially
if we provide a 'core' pseudo library/linker script that embodies that action
for the end user.

Neil


> I suppose that another way to go about this, instead of creating the linker
> script that pulls
> dependent libraries, is to always link (using --no-as-needed in case gcc
> adds it by default)
> against these libraries (eal, mempool, malloc, and ring) with necessary doc
> about how to build apps.
> 
> Sergio
> >Neil
> >
> >>Was your suggestion to leave all of these libraries (eal, mempool, malloc,
> >>ring) without NEEDED entries?
> >>
> >No, you can add NEEDED entries there, they will just be for the individual
> >libraries, not the core linker script library.
> >
> >Best
> >Neil
> >
> >>Regards,
> >>Sergio
> >>>What is the error you are getting?
> >>>
> >>>Best
> >>>Neil
> >>>
> >>>>Thoughts? any other approached is more than welcome!
> >>>>
> >>>>Sergio
> >>>>
> >>>>PS: Thinking again on the core library and the issue of having multiple
> >>>>version.map files, we could have a core_version.map instead instead of
> >>>>multiple files per core library (eal, mempool, etc)
> >>>>
> >>>>
> >>
> >>
> 
> 
> 

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] [PATCH v5 1/6] ethdev: add rx interrupt enable/disable functions
  2015-02-23 16:55  3% [dpdk-dev] [PATCH v5 0/6] " Zhou Danny
@ 2015-02-23 16:55  3% ` Zhou Danny
  2015-02-27  4:56  3% ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Cunming Liang
  1 sibling, 0 replies; 200+ results
From: Zhou Danny @ 2015-02-23 16:55 UTC (permalink / raw)
  To: dev

v5 changes
- Rebase the patchset onto the HEAD

v4 changes
- Export interrupt enable/disable functions for shared libraries
- Put new functions at the end of eth_dev_ops to avoid breaking ABI

v3 changes
- Add return value for interrupt enable/disable functions

Add two dev_ops functions to enable and disable rx queue interrupts

Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Tested-by: Yong Liu <yong.liu@intel.com>
---
 lib/librte_ether/rte_ethdev.c          | 43 +++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.h          | 59 ++++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ether_version.map |  2 ++
 3 files changed, 104 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 27bbb0b..eaf29de 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -2830,6 +2830,49 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	}
 	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
+
+int
+rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
+				uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (port_id >= nb_ports) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return (-ENODEV);
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return (-ENODEV);
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+}
+
+int
+rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
+				uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (port_id >= nb_ports) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return (-ENODEV);
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return (-ENODEV);
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
+}
+
 #ifdef RTE_NIC_BYPASS
 int rte_eth_dev_bypass_init(uint8_t port_id)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 4acd595..7aa6c81 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -823,6 +823,8 @@ struct rte_eth_fdir {
 struct rte_intr_conf {
 	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
 	uint16_t lsc;
+	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
+	uint16_t rxq;
 };
 
 /**
@@ -1028,6 +1030,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    const struct rte_eth_txconf *tx_conf);
 /**< @internal Setup a transmit queue of an Ethernet device. */
 
+typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
+
 typedef void (*eth_queue_release_t)(void *queue);
 /**< @internal Release memory resources allocated by given RX/TX queue. */
 
@@ -1379,6 +1389,10 @@ struct eth_dev_ops {
 	/** Get current RSS hash configuration. */
 	rss_hash_conf_get_t rss_hash_conf_get;
 	eth_filter_ctrl_t              filter_ctrl;          /**< common filter control*/
+
+	/** Enable/disable Rx queue interrupt. */
+	eth_rx_enable_intr_t       rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
+	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
 };
 
 /**
@@ -2672,6 +2686,51 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 				enum rte_eth_event_type event);
 
 /**
+ * When there is no rx packet coming in Rx Queue for a long time, we can
+ * sleep lcore related to RX Queue for power saving, and enable rx interrupt
+ * to be triggered when rx packect arrives.
+ *
+ * The rte_eth_dev_rx_queue_intr_enable() function enables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
+				uint16_t queue_id);
+
+/**
+ * When lcore wakes up from rx interrupt indicating packet coming, disable rx
+ * interrupt and returns to polling mode.
+ *
+ * The rte_eth_dev_rx_queue_intr_disable() function disables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
+				uint16_t queue_id);
+
+/**
  * Turn on the LED on the Ethernet device.
  * This function turns on the LED on the Ethernet device.
  *
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index f66fd2d..6fef09e 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -42,6 +42,8 @@ DPDK_2.0 {
 	rte_eth_dev_rss_hash_update;
 	rte_eth_dev_rss_reta_query;
 	rte_eth_dev_rss_reta_update;
+	rte_eth_dev_rx_queue_intr_disable;
+	rte_eth_dev_rx_queue_intr_enable;
 	rte_eth_dev_rx_queue_start;
 	rte_eth_dev_rx_queue_stop;
 	rte_eth_dev_set_link_down;
-- 
1.8.1.4

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v5 0/6] Interrupt mode PMD
@ 2015-02-23 16:55  3% Zhou Danny
  2015-02-23 16:55  3% ` [dpdk-dev] [PATCH v5 1/6] ethdev: add rx interrupt enable/disable functions Zhou Danny
  2015-02-27  4:56  3% ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Cunming Liang
  0 siblings, 2 replies; 200+ results
From: Zhou Danny @ 2015-02-23 16:55 UTC (permalink / raw)
  To: dev

v5 changes
- Rebase the patchset onto the HEAD
- Isolate ethdev from EAL for new-added wait-for-rx interrupt function
- Export wait-for-rx interrupt function for shared libraries
- Split-off a new patch file for changed struct rte_intr_handle that
other patches depend on, to avoid breaking git bisect
- Change sample applicaiton to accomodate EAL function spec change
accordingly

v4 changes
- Export interrupt enable/disable functions for shared libraries
- Adjust position of new-added structure fields and functions to
avoid breaking ABI
 
v3 changes
- Add return value for interrupt enable/disable functions
- Move spinlok from PMD to L3fwd-power
- Remove unnecessary variables in e1000_mac_info
- Fix miscelleous review comments
 
v2 changes
- Fix compilation issue in Makefile for missed header file.
- Consolidate internal and community review comments of v1 patch set.
 
The patch series introduce low-latency one-shot rx interrupt into DPDK with
polling and interrupt mode switch control example.
 
DPDK userspace interrupt notification and handling mechanism is based on UIO
with below limitation:
1) It is designed to handle LSC interrupt only with inefficient suspended
pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
which then wakes up DPDK polling thread). In this way, it introduces
non-deterministic wakeup latency for DPDK polling thread as well as packet
latency if it is used to handle Rx interrupt.
2) UIO only supports a single interrupt vector which has to been shared by
LSC interrupt and interrupts assigned to dedicated rx queues.
 
This patchset includes below features:
1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF only).
2) Build on top of the VFIO mechanism instead of UIO, so it could support
up to 64 interrupt vectors for rx queue interrupts.
3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
user space.
4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
switch algorithms in L3fwd-power example.

Known limitations:
1) It does not work for UIO due to a single interrupt eventfd shared by LSC
and rx queue interrupt handlers causes a mess.
2) LSC interrupt is not supported by VF driver, so it is by default disabled
in L3fwd-power now. Feel free to turn in on if you want to support both LSC
and rx queue interrupts on a PF.

Danny Zhou (6):
  ethdev: add rx interrupt enable/disable functions
  eal: add rx queue interrupt FDs to intr handle struct
  ixgbe: enable rx queue interrupts for both PF and VF
  igb: enable rx queue interrupts for PF
  eal: add per rx queue interrupt handling based on VFIO
  l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode  
      switch

 examples/l3fwd-power/main.c                        | 155 ++++++---
 lib/librte_eal/bsdapp/eal/rte_eal_version.map      |   1 +
 lib/librte_eal/common/include/rte_eal.h            |   1 +
 lib/librte_eal/common/include/rte_interrupts.h     |  12 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 191 ++++++++---
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  12 +-
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |   4 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map    |   1 +
 lib/librte_ether/rte_ethdev.c                      |  43 +++
 lib/librte_ether/rte_ethdev.h                      |  59 ++++
 lib/librte_ether/rte_ether_version.map             |   2 +
 lib/librte_pmd_e1000/e1000_ethdev.h                |   3 +
 lib/librte_pmd_e1000/igb_ethdev.c                  | 228 +++++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c                | 365 ++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h                |   7 +
 15 files changed, 970 insertions(+), 114 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH 0/8] Improve build process
  2015-02-23 13:52  0%                           ` Neil Horman
@ 2015-02-23 14:58  0%                             ` Gonzalez Monroy, Sergio
  2015-02-23 18:23  0%                               ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Gonzalez Monroy, Sergio @ 2015-02-23 14:58 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On 23/02/2015 13:52, Neil Horman wrote:
> On Mon, Feb 23, 2015 at 10:25:01AM +0000, Gonzalez Monroy, Sergio wrote:
>> On 22/02/2015 23:37, Neil Horman wrote:
>>> On Fri, Feb 20, 2015 at 02:31:36PM +0000, Gonzalez Monroy, Sergio wrote:
>>>> On 13/02/2015 12:51, Neil Horman wrote:
>>>>> On Fri, Feb 13, 2015 at 11:08:02AM +0000, Gonzalez Monroy, Sergio wrote:
>>>>>> On 13/02/2015 10:14, Panu Matilainen wrote:
>>>>>>> On 02/12/2015 05:52 PM, Neil Horman wrote:
>>>>>>>> On Thu, Feb 12, 2015 at 04:07:50PM +0200, Panu Matilainen wrote:
>>>>>>>>> On 02/12/2015 02:23 PM, Neil Horman wrote:
>>>>>>> [...snip...]
>>>>>>>>>>>>> So I just realized that I was not having into account a possible
>>>>>>>>>>>>> scenario, where
>>>>>>>>>>>>> we have an app built with static dpdk libs then loading a dso
>>>>>>>>>>>>> with -d
>>>>>>>>>>>>> option.
>>>>>>>>>>>>>
>>>>>>>>>>>>> In such case, because the pmd would have DT_NEEDED entries,
>>>>>>>>>>>>> dlopen will
>>>>>>>>>>>>> fail.
>>>>>>>>>>>>> So to enable such scenario we would need to build PMDs without
>>>>>>>>>>>>> DT_NEEDED
>>>>>>>>>>>>> entries.
>>>>>>>>>>>> Hmm, for that to be a problem you'd need to have the PMD built
>>>>>>>>>>>> against
>>>>>>>>>>>> shared dpdk libs and while the application is built against
>>>>>>>>>>>> static dpdk
>>>>>>>>>>>> libs. I dont think that's a supportable scenario in any case.
>>>>>>>>>>>>
>>>>>>>>>>>> Or is there some other scenario that I'm not seeing?
>>>>>>>>>>>>
>>>>>>>>>>>>     - Panu -
>>>>>>>>>>>>
>>>>>>>>>>> I agree with you. I suppose it comes down to, do we want to
>>>>>>>>>>> support such
>>>>>>>>>>> scenario?
>>>>>>>>>>>
>>>>>>>>>>>  From what I can see, it seems that we do currently support such
>>>>>>>>>>> scenario by
>>>>>>>>>>> building dpdk apps against all static dpdk libs using
>>>>>>>>>>> --whole-archive (all
>>>>>>>>>>> libs and not only PMDs).
>>>>>>>>>>> http://dpdk.org/browse/dpdk/commit/?id=20afd76a504155e947c770783ef5023e87136ad8
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Am I misunderstanding this?
>>>>>>>>>>>
>>>>>>>>>> Shoot, you're right, I missed the static build aspect to this.  Yes,
>>>>>>>>>> if we do the following:
>>>>>>>>>>
>>>>>>>>>> 1) Build the DPDK as a static library
>>>>>>>>>> 2) Link an application against (1)
>>>>>>>>>> 3) Use the dlopen mechanism to load a PMD built as a DSO
>>>>>>>>>>
>>>>>>>>>> Then the DT_NEEDED entries in the DSO will go unsatisfied, because
>>>>>>>>>> the shared
>>>>>>>>>> objects on which it (the PMD) depends will not exist in the file
>>>>>>>>>> system.
>>>>>>>>> I think its even more twisty:
>>>>>>>>>
>>>>>>>>> 1) Build the DPDK as a static library
>>>>>>>>> 2) Link an application against (1)
>>>>>>>>> 3) Do another build of DPDK as a shared library
>>>>>>>>> 4) In app 2), use the dlopen mechanism to load a PMD built as a part
>>>>>>>>> of or
>>>>>>>>> against 3)
>>>>>>>>>
>>>>>>>>> Somehow I doubt this would work very well.
>>>>>>>>>
>>>>>>>> Ideally it should, presuming the ABI is preserved between (1) and (3),
>>>>>>>> though I
>>>>>>>> agree, up until recently, that was an assumption that was unreliable.
>>>>>>> Versioning is a big and important step towards reliability but there are
>>>>>>> more issues to solve. This of course getting pretty far from the original
>>>>>>> topic, but at least one such issue is that there are some cases where a
>>>>>>> config value affects what are apparently public structs (rte_mbuf wrt
>>>>>>> RTE_MBUF_REFCNT for example), which really is a no-go.
>>>>>>>
>>>>>> Agree, the RTE_MBUF_REFCNT is something that needs to be dealt with asap.
>>>>>> I'll look into it.
>>>>>>
>>>>>>>>>> I think the problem is a little bit orthogonal to the libdpdk_core
>>>>>>>>>> problem you
>>>>>>>>>> were initially addressing.  That is to say, this problem of
>>>>>>>>>> dlopen-ed PMD's
>>>>>>>>>> exists regardless of weather you build the DPDK as part of a static
>>>>>>>>>> or dynamic
>>>>>>>>>> library.  The problems just happen to intersect in their
>>>>>>>>>> manipulation of the
>>>>>>>>>> DT_NEEDED entries.
>>>>>>>>>>
>>>>>>>>>> Ok, so, given the above, I would say your approach is likely
>>>>>>>>>> correct, just
>>>>>>>>>> prevent DT_NEEDED entries from getting added to PMD's. Doing so will
>>>>>>>>>> sidestep
>>>>>>>>>> loading issue for libraries that may not exist in the filesystem,
>>>>>>>>>> but thats ok,
>>>>>>>>>> because by all rights, the symbols codified in those needed
>>>>>>>>>> libraries should
>>>>>>>>>> already be present in the running application (either made available
>>>>>>>>>> by the
>>>>>>>>>> application having statically linked them, or having the linker load
>>>>>>>>>> them from
>>>>>>>>>> the proper libraries at run time).
>>>>>>>>> My 5c is that I'd much rather see the common case (all static or all
>>>>>>>>> shared)
>>>>>>>>> be simple and reliable, which in case of DSOs includes no lying
>>>>>>>>> (whether by
>>>>>>>>> omission or otherwise) about DT_NEEDED, ever. That way the issue is
>>>>>>>>> dealt
>>>>>>>>> once where it belongs. If somebody wants to go down the rabbit hole of
>>>>>>>>> mixed
>>>>>>>>> shared + static linkage, let them dig the hole by themselves :)
>>>>>>>>>
>>>>>>>> This is a fair point.  Can DT_NEEDED sections be stripped via tools like
>>>>>>>> objcopy
>>>>>>>> after the build is complete?  If so, end users can hack this corner case
>>>>>>>> to work
>>>>>>>> as needed.
>>>>>>> Patchelf (http://nixos.org/patchelf.html) appears to support that, but
>>>>>>> given that source is available it'd be easier to just modify the makefiles
>>>>>>> if that's really needed.
>>>>>>>
>>>>>> I think we agree on the issue.
>>>>>>
>>>>>> So I'll be sending a patch to add DT_NEEDED entries to all libraries and
>>>>>> PMDs. The only exception would be librte_eal, which would not have proper
>>>>>> NEEDED entries.
>>>>>> Do we bother adding a linker script for librte_eal that would include
>>>>>> dependent libraries?
>>>>>>
>>>>> I say yes to the linker script, but will happily bow to an alternate consensus
>>>>> Neil
>>>>>
>>>> So the case we want to solve is the following circular dependencies:
>>>> eal             -> mempool, malloc
>>>> mempool -> eal , malloc, ring
>>>> malloc      -> eal
>>>> ring           -> eal, malloc
>>>>
>>>> We cannot write/create the proposed (below) linker script at least until we
>>>> have built mempool and malloc.
>>>> INPUT ( -lrte_eal.so -lrte_mempool -lrte_malloc )
>>>>
>>> Not sure I understand why you have a build time dependency on this.  Link time
>>> perhaps, but not build time.  Or am I reading too much into your use of the term
>>> 'built' above?
>> I meant 'built' as compiled + linked. Am I misusing the term?
> No, you're not (though I misused the term link time above, I meant to say load
> time).  So you're saying that when you build shared libraries, you get linker
> errors indicating that, during the build, you're missing symbols, is that
> correct?  I guess I'm confused because I don't see how thats not happening for
> everyone, right now.  In other words, I'm not sure what about your changes is
> giving rise to that problem.
>
>>>> Few ways I have thought about implementing this (not particularly fond of
>>>> any of them) :
>>>>   - Have the linker script file in the repo (scripts/ ?) in a fixed location
>>>> and just copy it to $(RTE_OUTPUT)/lib/ once all libs have finished building.
>>>>   - Generate the file on build time from a defined make variable once all
>>>> libs have finished
>>>>
>>> I'm still not sure I understand.  Why does this dependency exist at build time?
>>> The dependency between malloc and eal shouldn't be a problem during the build,
>>> as symbols from each other should just remain undefined, and get resolved at
>>> load time.
>> Is that not the way it is currently implemented?
>> I get the impression that we are talking about different goals (correct me
>> if it is not the case)
>>
> We may well be, I'm not sure yet.
>
>> I thought that the agreed solution was to:
>> 1) NOT to create/generate a 'core' library
>> 2) Add DT_NEEDED entries for all libraries (except eal which is the first
>> library we link)
>> 3) Use linker script for eal
>>
> Ok, we're definately on the same page, as thats what I thought the goal was as
> well.
>
>> Given the previously mentioned circular dependencies between eal, mempool,
>> malloc and ring:
>> - eal would not be linked against other libraries (no NEEDED entries)
>> - malloc is linked against eal (previously built), so malloc would have a
>> NEEDED entry for eal.
>>
>> In that scenario, if the linker script is setup/created after we build eal,
>> then when we try to link malloc
>> against eal, the linker will pull mempool and malloc too (because we
>> included them in the linker script).
>> Therefore, the link fails as none of those libraries (malloc and mempool)
>> have been built yet.
>>
> Ah, I see now, I wasn't thinking about the extra requirements that DT_NEEDED
> entries placed on the build conditions.
>
> I see now, apologies for being dense previously.  Given what you indicate I
> would say that the solution here is to link the libraries against individual
> other specific libraries, not the core library that you generate as a linker
> script.  That way you avoid the circular dependency, and the core library just
> becomes a convienience for application developers looking to link to a single
> library.
>
I'm not sure I quite understand your suggestion.

Could you roughly describe steps for building eal, malloc and mempool libs ?
For example, something like this?
1) build eal, which creates librte_eal.so.1
2) write linker script for librte_eal.so
3) build malloc against eal (-lrte_eal )
etc

I suppose that another way to go about this, instead of creating the 
linker script that pulls
dependent libraries, is to always link (using --no-as-needed in case gcc 
adds it by default)
against these libraries (eal, mempool, malloc, and ring) with necessary 
doc about how to build apps.

Sergio
> Neil
>
>> Was your suggestion to leave all of these libraries (eal, mempool, malloc,
>> ring) without NEEDED entries?
>>
> No, you can add NEEDED entries there, they will just be for the individual
> libraries, not the core linker script library.
>
> Best
> Neil
>
>> Regards,
>> Sergio
>>> What is the error you are getting?
>>>
>>> Best
>>> Neil
>>>
>>>> Thoughts? any other approached is more than welcome!
>>>>
>>>> Sergio
>>>>
>>>> PS: Thinking again on the core library and the issue of having multiple
>>>> version.map files, we could have a core_version.map instead instead of
>>>> multiple files per core library (eal, mempool, etc)
>>>>
>>>>
>>
>>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH 0/8] Improve build process
  2015-02-23 10:25  0%                         ` Gonzalez Monroy, Sergio
@ 2015-02-23 13:52  0%                           ` Neil Horman
  2015-02-23 14:58  0%                             ` Gonzalez Monroy, Sergio
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-02-23 13:52 UTC (permalink / raw)
  To: Gonzalez Monroy, Sergio; +Cc: dev

On Mon, Feb 23, 2015 at 10:25:01AM +0000, Gonzalez Monroy, Sergio wrote:
> On 22/02/2015 23:37, Neil Horman wrote:
> >On Fri, Feb 20, 2015 at 02:31:36PM +0000, Gonzalez Monroy, Sergio wrote:
> >>On 13/02/2015 12:51, Neil Horman wrote:
> >>>On Fri, Feb 13, 2015 at 11:08:02AM +0000, Gonzalez Monroy, Sergio wrote:
> >>>>On 13/02/2015 10:14, Panu Matilainen wrote:
> >>>>>On 02/12/2015 05:52 PM, Neil Horman wrote:
> >>>>>>On Thu, Feb 12, 2015 at 04:07:50PM +0200, Panu Matilainen wrote:
> >>>>>>>On 02/12/2015 02:23 PM, Neil Horman wrote:
> >>>>>[...snip...]
> >>>>>>>>>>>So I just realized that I was not having into account a possible
> >>>>>>>>>>>scenario, where
> >>>>>>>>>>>we have an app built with static dpdk libs then loading a dso
> >>>>>>>>>>>with -d
> >>>>>>>>>>>option.
> >>>>>>>>>>>
> >>>>>>>>>>>In such case, because the pmd would have DT_NEEDED entries,
> >>>>>>>>>>>dlopen will
> >>>>>>>>>>>fail.
> >>>>>>>>>>>So to enable such scenario we would need to build PMDs without
> >>>>>>>>>>>DT_NEEDED
> >>>>>>>>>>>entries.
> >>>>>>>>>>Hmm, for that to be a problem you'd need to have the PMD built
> >>>>>>>>>>against
> >>>>>>>>>>shared dpdk libs and while the application is built against
> >>>>>>>>>>static dpdk
> >>>>>>>>>>libs. I dont think that's a supportable scenario in any case.
> >>>>>>>>>>
> >>>>>>>>>>Or is there some other scenario that I'm not seeing?
> >>>>>>>>>>
> >>>>>>>>>>    - Panu -
> >>>>>>>>>>
> >>>>>>>>>I agree with you. I suppose it comes down to, do we want to
> >>>>>>>>>support such
> >>>>>>>>>scenario?
> >>>>>>>>>
> >>>>>>>>> From what I can see, it seems that we do currently support such
> >>>>>>>>>scenario by
> >>>>>>>>>building dpdk apps against all static dpdk libs using
> >>>>>>>>>--whole-archive (all
> >>>>>>>>>libs and not only PMDs).
> >>>>>>>>>http://dpdk.org/browse/dpdk/commit/?id=20afd76a504155e947c770783ef5023e87136ad8
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>Am I misunderstanding this?
> >>>>>>>>>
> >>>>>>>>Shoot, you're right, I missed the static build aspect to this.  Yes,
> >>>>>>>>if we do the following:
> >>>>>>>>
> >>>>>>>>1) Build the DPDK as a static library
> >>>>>>>>2) Link an application against (1)
> >>>>>>>>3) Use the dlopen mechanism to load a PMD built as a DSO
> >>>>>>>>
> >>>>>>>>Then the DT_NEEDED entries in the DSO will go unsatisfied, because
> >>>>>>>>the shared
> >>>>>>>>objects on which it (the PMD) depends will not exist in the file
> >>>>>>>>system.
> >>>>>>>I think its even more twisty:
> >>>>>>>
> >>>>>>>1) Build the DPDK as a static library
> >>>>>>>2) Link an application against (1)
> >>>>>>>3) Do another build of DPDK as a shared library
> >>>>>>>4) In app 2), use the dlopen mechanism to load a PMD built as a part
> >>>>>>>of or
> >>>>>>>against 3)
> >>>>>>>
> >>>>>>>Somehow I doubt this would work very well.
> >>>>>>>
> >>>>>>Ideally it should, presuming the ABI is preserved between (1) and (3),
> >>>>>>though I
> >>>>>>agree, up until recently, that was an assumption that was unreliable.
> >>>>>Versioning is a big and important step towards reliability but there are
> >>>>>more issues to solve. This of course getting pretty far from the original
> >>>>>topic, but at least one such issue is that there are some cases where a
> >>>>>config value affects what are apparently public structs (rte_mbuf wrt
> >>>>>RTE_MBUF_REFCNT for example), which really is a no-go.
> >>>>>
> >>>>Agree, the RTE_MBUF_REFCNT is something that needs to be dealt with asap.
> >>>>I'll look into it.
> >>>>
> >>>>>>>>I think the problem is a little bit orthogonal to the libdpdk_core
> >>>>>>>>problem you
> >>>>>>>>were initially addressing.  That is to say, this problem of
> >>>>>>>>dlopen-ed PMD's
> >>>>>>>>exists regardless of weather you build the DPDK as part of a static
> >>>>>>>>or dynamic
> >>>>>>>>library.  The problems just happen to intersect in their
> >>>>>>>>manipulation of the
> >>>>>>>>DT_NEEDED entries.
> >>>>>>>>
> >>>>>>>>Ok, so, given the above, I would say your approach is likely
> >>>>>>>>correct, just
> >>>>>>>>prevent DT_NEEDED entries from getting added to PMD's. Doing so will
> >>>>>>>>sidestep
> >>>>>>>>loading issue for libraries that may not exist in the filesystem,
> >>>>>>>>but thats ok,
> >>>>>>>>because by all rights, the symbols codified in those needed
> >>>>>>>>libraries should
> >>>>>>>>already be present in the running application (either made available
> >>>>>>>>by the
> >>>>>>>>application having statically linked them, or having the linker load
> >>>>>>>>them from
> >>>>>>>>the proper libraries at run time).
> >>>>>>>My 5c is that I'd much rather see the common case (all static or all
> >>>>>>>shared)
> >>>>>>>be simple and reliable, which in case of DSOs includes no lying
> >>>>>>>(whether by
> >>>>>>>omission or otherwise) about DT_NEEDED, ever. That way the issue is
> >>>>>>>dealt
> >>>>>>>once where it belongs. If somebody wants to go down the rabbit hole of
> >>>>>>>mixed
> >>>>>>>shared + static linkage, let them dig the hole by themselves :)
> >>>>>>>
> >>>>>>This is a fair point.  Can DT_NEEDED sections be stripped via tools like
> >>>>>>objcopy
> >>>>>>after the build is complete?  If so, end users can hack this corner case
> >>>>>>to work
> >>>>>>as needed.
> >>>>>Patchelf (http://nixos.org/patchelf.html) appears to support that, but
> >>>>>given that source is available it'd be easier to just modify the makefiles
> >>>>>if that's really needed.
> >>>>>
> >>>>I think we agree on the issue.
> >>>>
> >>>>So I'll be sending a patch to add DT_NEEDED entries to all libraries and
> >>>>PMDs. The only exception would be librte_eal, which would not have proper
> >>>>NEEDED entries.
> >>>>Do we bother adding a linker script for librte_eal that would include
> >>>>dependent libraries?
> >>>>
> >>>I say yes to the linker script, but will happily bow to an alternate consensus
> >>>Neil
> >>>
> >>So the case we want to solve is the following circular dependencies:
> >>eal             -> mempool, malloc
> >>mempool -> eal , malloc, ring
> >>malloc      -> eal
> >>ring           -> eal, malloc
> >>
> >>We cannot write/create the proposed (below) linker script at least until we
> >>have built mempool and malloc.
> >>INPUT ( -lrte_eal.so -lrte_mempool -lrte_malloc )
> >>
> >Not sure I understand why you have a build time dependency on this.  Link time
> >perhaps, but not build time.  Or am I reading too much into your use of the term
> >'built' above?
> I meant 'built' as compiled + linked. Am I misusing the term?
No, you're not (though I misused the term link time above, I meant to say load
time).  So you're saying that when you build shared libraries, you get linker
errors indicating that, during the build, you're missing symbols, is that
correct?  I guess I'm confused because I don't see how thats not happening for
everyone, right now.  In other words, I'm not sure what about your changes is
giving rise to that problem.

> >>Few ways I have thought about implementing this (not particularly fond of
> >>any of them) :
> >>  - Have the linker script file in the repo (scripts/ ?) in a fixed location
> >>and just copy it to $(RTE_OUTPUT)/lib/ once all libs have finished building.
> >>  - Generate the file on build time from a defined make variable once all
> >>libs have finished
> >>
> >I'm still not sure I understand.  Why does this dependency exist at build time?
> >The dependency between malloc and eal shouldn't be a problem during the build,
> >as symbols from each other should just remain undefined, and get resolved at
> >load time.
> Is that not the way it is currently implemented?
> I get the impression that we are talking about different goals (correct me
> if it is not the case)
> 
We may well be, I'm not sure yet.

> I thought that the agreed solution was to:
> 1) NOT to create/generate a 'core' library
> 2) Add DT_NEEDED entries for all libraries (except eal which is the first
> library we link)
> 3) Use linker script for eal
> 
Ok, we're definately on the same page, as thats what I thought the goal was as
well.

> Given the previously mentioned circular dependencies between eal, mempool,
> malloc and ring:
> - eal would not be linked against other libraries (no NEEDED entries)
> - malloc is linked against eal (previously built), so malloc would have a
> NEEDED entry for eal.
> 
> In that scenario, if the linker script is setup/created after we build eal,
> then when we try to link malloc
> against eal, the linker will pull mempool and malloc too (because we
> included them in the linker script).
> Therefore, the link fails as none of those libraries (malloc and mempool)
> have been built yet.
> 
Ah, I see now, I wasn't thinking about the extra requirements that DT_NEEDED
entries placed on the build conditions.

I see now, apologies for being dense previously.  Given what you indicate I
would say that the solution here is to link the libraries against individual
other specific libraries, not the core library that you generate as a linker
script.  That way you avoid the circular dependency, and the core library just
becomes a convienience for application developers looking to link to a single
library.

Neil

> Was your suggestion to leave all of these libraries (eal, mempool, malloc,
> ring) without NEEDED entries?
> 
No, you can add NEEDED entries there, they will just be for the individual
libraries, not the core linker script library.

Best
Neil

> Regards,
> Sergio
> >What is the error you are getting?
> >
> >Best
> >Neil
> >
> >>Thoughts? any other approached is more than welcome!
> >>
> >>Sergio
> >>
> >>PS: Thinking again on the core library and the issue of having multiple
> >>version.map files, we could have a core_version.map instead instead of
> >>multiple files per core library (eal, mempool, etc)
> >>
> >>
> 
> 

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH 0/8] Improve build process
  2015-02-22 23:37  0%                       ` Neil Horman
@ 2015-02-23 10:25  0%                         ` Gonzalez Monroy, Sergio
  2015-02-23 13:52  0%                           ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Gonzalez Monroy, Sergio @ 2015-02-23 10:25 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On 22/02/2015 23:37, Neil Horman wrote:
> On Fri, Feb 20, 2015 at 02:31:36PM +0000, Gonzalez Monroy, Sergio wrote:
>> On 13/02/2015 12:51, Neil Horman wrote:
>>> On Fri, Feb 13, 2015 at 11:08:02AM +0000, Gonzalez Monroy, Sergio wrote:
>>>> On 13/02/2015 10:14, Panu Matilainen wrote:
>>>>> On 02/12/2015 05:52 PM, Neil Horman wrote:
>>>>>> On Thu, Feb 12, 2015 at 04:07:50PM +0200, Panu Matilainen wrote:
>>>>>>> On 02/12/2015 02:23 PM, Neil Horman wrote:
>>>>> [...snip...]
>>>>>>>>>>> So I just realized that I was not having into account a possible
>>>>>>>>>>> scenario, where
>>>>>>>>>>> we have an app built with static dpdk libs then loading a dso
>>>>>>>>>>> with -d
>>>>>>>>>>> option.
>>>>>>>>>>>
>>>>>>>>>>> In such case, because the pmd would have DT_NEEDED entries,
>>>>>>>>>>> dlopen will
>>>>>>>>>>> fail.
>>>>>>>>>>> So to enable such scenario we would need to build PMDs without
>>>>>>>>>>> DT_NEEDED
>>>>>>>>>>> entries.
>>>>>>>>>> Hmm, for that to be a problem you'd need to have the PMD built
>>>>>>>>>> against
>>>>>>>>>> shared dpdk libs and while the application is built against
>>>>>>>>>> static dpdk
>>>>>>>>>> libs. I dont think that's a supportable scenario in any case.
>>>>>>>>>>
>>>>>>>>>> Or is there some other scenario that I'm not seeing?
>>>>>>>>>>
>>>>>>>>>>     - Panu -
>>>>>>>>>>
>>>>>>>>> I agree with you. I suppose it comes down to, do we want to
>>>>>>>>> support such
>>>>>>>>> scenario?
>>>>>>>>>
>>>>>>>>>  From what I can see, it seems that we do currently support such
>>>>>>>>> scenario by
>>>>>>>>> building dpdk apps against all static dpdk libs using
>>>>>>>>> --whole-archive (all
>>>>>>>>> libs and not only PMDs).
>>>>>>>>> http://dpdk.org/browse/dpdk/commit/?id=20afd76a504155e947c770783ef5023e87136ad8
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Am I misunderstanding this?
>>>>>>>>>
>>>>>>>> Shoot, you're right, I missed the static build aspect to this.  Yes,
>>>>>>>> if we do the following:
>>>>>>>>
>>>>>>>> 1) Build the DPDK as a static library
>>>>>>>> 2) Link an application against (1)
>>>>>>>> 3) Use the dlopen mechanism to load a PMD built as a DSO
>>>>>>>>
>>>>>>>> Then the DT_NEEDED entries in the DSO will go unsatisfied, because
>>>>>>>> the shared
>>>>>>>> objects on which it (the PMD) depends will not exist in the file
>>>>>>>> system.
>>>>>>> I think its even more twisty:
>>>>>>>
>>>>>>> 1) Build the DPDK as a static library
>>>>>>> 2) Link an application against (1)
>>>>>>> 3) Do another build of DPDK as a shared library
>>>>>>> 4) In app 2), use the dlopen mechanism to load a PMD built as a part
>>>>>>> of or
>>>>>>> against 3)
>>>>>>>
>>>>>>> Somehow I doubt this would work very well.
>>>>>>>
>>>>>> Ideally it should, presuming the ABI is preserved between (1) and (3),
>>>>>> though I
>>>>>> agree, up until recently, that was an assumption that was unreliable.
>>>>> Versioning is a big and important step towards reliability but there are
>>>>> more issues to solve. This of course getting pretty far from the original
>>>>> topic, but at least one such issue is that there are some cases where a
>>>>> config value affects what are apparently public structs (rte_mbuf wrt
>>>>> RTE_MBUF_REFCNT for example), which really is a no-go.
>>>>>
>>>> Agree, the RTE_MBUF_REFCNT is something that needs to be dealt with asap.
>>>> I'll look into it.
>>>>
>>>>>>>> I think the problem is a little bit orthogonal to the libdpdk_core
>>>>>>>> problem you
>>>>>>>> were initially addressing.  That is to say, this problem of
>>>>>>>> dlopen-ed PMD's
>>>>>>>> exists regardless of weather you build the DPDK as part of a static
>>>>>>>> or dynamic
>>>>>>>> library.  The problems just happen to intersect in their
>>>>>>>> manipulation of the
>>>>>>>> DT_NEEDED entries.
>>>>>>>>
>>>>>>>> Ok, so, given the above, I would say your approach is likely
>>>>>>>> correct, just
>>>>>>>> prevent DT_NEEDED entries from getting added to PMD's. Doing so will
>>>>>>>> sidestep
>>>>>>>> loading issue for libraries that may not exist in the filesystem,
>>>>>>>> but thats ok,
>>>>>>>> because by all rights, the symbols codified in those needed
>>>>>>>> libraries should
>>>>>>>> already be present in the running application (either made available
>>>>>>>> by the
>>>>>>>> application having statically linked them, or having the linker load
>>>>>>>> them from
>>>>>>>> the proper libraries at run time).
>>>>>>> My 5c is that I'd much rather see the common case (all static or all
>>>>>>> shared)
>>>>>>> be simple and reliable, which in case of DSOs includes no lying
>>>>>>> (whether by
>>>>>>> omission or otherwise) about DT_NEEDED, ever. That way the issue is
>>>>>>> dealt
>>>>>>> once where it belongs. If somebody wants to go down the rabbit hole of
>>>>>>> mixed
>>>>>>> shared + static linkage, let them dig the hole by themselves :)
>>>>>>>
>>>>>> This is a fair point.  Can DT_NEEDED sections be stripped via tools like
>>>>>> objcopy
>>>>>> after the build is complete?  If so, end users can hack this corner case
>>>>>> to work
>>>>>> as needed.
>>>>> Patchelf (http://nixos.org/patchelf.html) appears to support that, but
>>>>> given that source is available it'd be easier to just modify the makefiles
>>>>> if that's really needed.
>>>>>
>>>> I think we agree on the issue.
>>>>
>>>> So I'll be sending a patch to add DT_NEEDED entries to all libraries and
>>>> PMDs. The only exception would be librte_eal, which would not have proper
>>>> NEEDED entries.
>>>> Do we bother adding a linker script for librte_eal that would include
>>>> dependent libraries?
>>>>
>>> I say yes to the linker script, but will happily bow to an alternate consensus
>>> Neil
>>>
>> So the case we want to solve is the following circular dependencies:
>> eal             -> mempool, malloc
>> mempool -> eal , malloc, ring
>> malloc      -> eal
>> ring           -> eal, malloc
>>
>> We cannot write/create the proposed (below) linker script at least until we
>> have built mempool and malloc.
>> INPUT ( -lrte_eal.so -lrte_mempool -lrte_malloc )
>>
> Not sure I understand why you have a build time dependency on this.  Link time
> perhaps, but not build time.  Or am I reading too much into your use of the term
> 'built' above?
I meant 'built' as compiled + linked. Am I misusing the term?
>> Few ways I have thought about implementing this (not particularly fond of
>> any of them) :
>>   - Have the linker script file in the repo (scripts/ ?) in a fixed location
>> and just copy it to $(RTE_OUTPUT)/lib/ once all libs have finished building.
>>   - Generate the file on build time from a defined make variable once all
>> libs have finished
>>
> I'm still not sure I understand.  Why does this dependency exist at build time?
> The dependency between malloc and eal shouldn't be a problem during the build,
> as symbols from each other should just remain undefined, and get resolved at
> load time.
Is that not the way it is currently implemented?
I get the impression that we are talking about different goals (correct 
me if it is not the case)

I thought that the agreed solution was to:
1) NOT to create/generate a 'core' library
2) Add DT_NEEDED entries for all libraries (except eal which is the 
first library we link)
3) Use linker script for eal

Given the previously mentioned circular dependencies between eal, 
mempool, malloc and ring:
- eal would not be linked against other libraries (no NEEDED entries)
- malloc is linked against eal (previously built), so malloc would have 
a NEEDED entry for eal.

In that scenario, if the linker script is setup/created after we build 
eal, then when we try to link malloc
against eal, the linker will pull mempool and malloc too (because we 
included them in the linker script).
Therefore, the link fails as none of those libraries (malloc and 
mempool) have been built yet.

Was your suggestion to leave all of these libraries (eal, mempool, 
malloc, ring) without NEEDED entries?

Regards,
Sergio
> What is the error you are getting?
>
> Best
> Neil
>
>> Thoughts? any other approached is more than welcome!
>>
>> Sergio
>>
>> PS: Thinking again on the core library and the issue of having multiple
>> version.map files, we could have a core_version.map instead instead of
>> multiple files per core library (eal, mempool, etc)
>>
>>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH 0/8] Improve build process
  2015-02-20 14:31  0%                     ` Gonzalez Monroy, Sergio
@ 2015-02-22 23:37  0%                       ` Neil Horman
  2015-02-23 10:25  0%                         ` Gonzalez Monroy, Sergio
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-02-22 23:37 UTC (permalink / raw)
  To: Gonzalez Monroy, Sergio; +Cc: dev

On Fri, Feb 20, 2015 at 02:31:36PM +0000, Gonzalez Monroy, Sergio wrote:
> On 13/02/2015 12:51, Neil Horman wrote:
> >On Fri, Feb 13, 2015 at 11:08:02AM +0000, Gonzalez Monroy, Sergio wrote:
> >>On 13/02/2015 10:14, Panu Matilainen wrote:
> >>>On 02/12/2015 05:52 PM, Neil Horman wrote:
> >>>>On Thu, Feb 12, 2015 at 04:07:50PM +0200, Panu Matilainen wrote:
> >>>>>On 02/12/2015 02:23 PM, Neil Horman wrote:
> >>>[...snip...]
> >>>>>>>>>So I just realized that I was not having into account a possible
> >>>>>>>>>scenario, where
> >>>>>>>>>we have an app built with static dpdk libs then loading a dso
> >>>>>>>>>with -d
> >>>>>>>>>option.
> >>>>>>>>>
> >>>>>>>>>In such case, because the pmd would have DT_NEEDED entries,
> >>>>>>>>>dlopen will
> >>>>>>>>>fail.
> >>>>>>>>>So to enable such scenario we would need to build PMDs without
> >>>>>>>>>DT_NEEDED
> >>>>>>>>>entries.
> >>>>>>>>Hmm, for that to be a problem you'd need to have the PMD built
> >>>>>>>>against
> >>>>>>>>shared dpdk libs and while the application is built against
> >>>>>>>>static dpdk
> >>>>>>>>libs. I dont think that's a supportable scenario in any case.
> >>>>>>>>
> >>>>>>>>Or is there some other scenario that I'm not seeing?
> >>>>>>>>
> >>>>>>>>    - Panu -
> >>>>>>>>
> >>>>>>>I agree with you. I suppose it comes down to, do we want to
> >>>>>>>support such
> >>>>>>>scenario?
> >>>>>>>
> >>>>>>> From what I can see, it seems that we do currently support such
> >>>>>>>scenario by
> >>>>>>>building dpdk apps against all static dpdk libs using
> >>>>>>>--whole-archive (all
> >>>>>>>libs and not only PMDs).
> >>>>>>>http://dpdk.org/browse/dpdk/commit/?id=20afd76a504155e947c770783ef5023e87136ad8
> >>>>>>>
> >>>>>>>
> >>>>>>>Am I misunderstanding this?
> >>>>>>>
> >>>>>>Shoot, you're right, I missed the static build aspect to this.  Yes,
> >>>>>>if we do the following:
> >>>>>>
> >>>>>>1) Build the DPDK as a static library
> >>>>>>2) Link an application against (1)
> >>>>>>3) Use the dlopen mechanism to load a PMD built as a DSO
> >>>>>>
> >>>>>>Then the DT_NEEDED entries in the DSO will go unsatisfied, because
> >>>>>>the shared
> >>>>>>objects on which it (the PMD) depends will not exist in the file
> >>>>>>system.
> >>>>>I think its even more twisty:
> >>>>>
> >>>>>1) Build the DPDK as a static library
> >>>>>2) Link an application against (1)
> >>>>>3) Do another build of DPDK as a shared library
> >>>>>4) In app 2), use the dlopen mechanism to load a PMD built as a part
> >>>>>of or
> >>>>>against 3)
> >>>>>
> >>>>>Somehow I doubt this would work very well.
> >>>>>
> >>>>Ideally it should, presuming the ABI is preserved between (1) and (3),
> >>>>though I
> >>>>agree, up until recently, that was an assumption that was unreliable.
> >>>Versioning is a big and important step towards reliability but there are
> >>>more issues to solve. This of course getting pretty far from the original
> >>>topic, but at least one such issue is that there are some cases where a
> >>>config value affects what are apparently public structs (rte_mbuf wrt
> >>>RTE_MBUF_REFCNT for example), which really is a no-go.
> >>>
> >>Agree, the RTE_MBUF_REFCNT is something that needs to be dealt with asap.
> >>I'll look into it.
> >>
> >>>>>>I think the problem is a little bit orthogonal to the libdpdk_core
> >>>>>>problem you
> >>>>>>were initially addressing.  That is to say, this problem of
> >>>>>>dlopen-ed PMD's
> >>>>>>exists regardless of weather you build the DPDK as part of a static
> >>>>>>or dynamic
> >>>>>>library.  The problems just happen to intersect in their
> >>>>>>manipulation of the
> >>>>>>DT_NEEDED entries.
> >>>>>>
> >>>>>>Ok, so, given the above, I would say your approach is likely
> >>>>>>correct, just
> >>>>>>prevent DT_NEEDED entries from getting added to PMD's. Doing so will
> >>>>>>sidestep
> >>>>>>loading issue for libraries that may not exist in the filesystem,
> >>>>>>but thats ok,
> >>>>>>because by all rights, the symbols codified in those needed
> >>>>>>libraries should
> >>>>>>already be present in the running application (either made available
> >>>>>>by the
> >>>>>>application having statically linked them, or having the linker load
> >>>>>>them from
> >>>>>>the proper libraries at run time).
> >>>>>My 5c is that I'd much rather see the common case (all static or all
> >>>>>shared)
> >>>>>be simple and reliable, which in case of DSOs includes no lying
> >>>>>(whether by
> >>>>>omission or otherwise) about DT_NEEDED, ever. That way the issue is
> >>>>>dealt
> >>>>>once where it belongs. If somebody wants to go down the rabbit hole of
> >>>>>mixed
> >>>>>shared + static linkage, let them dig the hole by themselves :)
> >>>>>
> >>>>This is a fair point.  Can DT_NEEDED sections be stripped via tools like
> >>>>objcopy
> >>>>after the build is complete?  If so, end users can hack this corner case
> >>>>to work
> >>>>as needed.
> >>>Patchelf (http://nixos.org/patchelf.html) appears to support that, but
> >>>given that source is available it'd be easier to just modify the makefiles
> >>>if that's really needed.
> >>>
> >>I think we agree on the issue.
> >>
> >>So I'll be sending a patch to add DT_NEEDED entries to all libraries and
> >>PMDs. The only exception would be librte_eal, which would not have proper
> >>NEEDED entries.
> >>Do we bother adding a linker script for librte_eal that would include
> >>dependent libraries?
> >>
> >I say yes to the linker script, but will happily bow to an alternate consensus
> >Neil
> >
> So the case we want to solve is the following circular dependencies:
> eal             -> mempool, malloc
> mempool -> eal , malloc, ring
> malloc      -> eal
> ring           -> eal, malloc
> 
> We cannot write/create the proposed (below) linker script at least until we
> have built mempool and malloc.
> INPUT ( -lrte_eal.so -lrte_mempool -lrte_malloc )
> 
Not sure I understand why you have a build time dependency on this.  Link time
perhaps, but not build time.  Or am I reading too much into your use of the term
'built' above?

> Few ways I have thought about implementing this (not particularly fond of
> any of them) :
>  - Have the linker script file in the repo (scripts/ ?) in a fixed location
> and just copy it to $(RTE_OUTPUT)/lib/ once all libs have finished building.
>  - Generate the file on build time from a defined make variable once all
> libs have finished
> 
I'm still not sure I understand.  Why does this dependency exist at build time?
The dependency between malloc and eal shouldn't be a problem during the build,
as symbols from each other should just remain undefined, and get resolved at
load time.

What is the error you are getting?

Best
Neil

> Thoughts? any other approached is more than welcome!
> 
> Sergio
> 
> PS: Thinking again on the core library and the issue of having multiple
> version.map files, we could have a core_version.map instead instead of
> multiple files per core library (eal, mempool, etc)
> 
> 

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] [PATCH v5 0/3] DPDK ethdev callback support
  2015-02-18 17:42  4% ` [dpdk-dev] [PATCH v3 " John McNamara
  2015-02-19 17:56  4%   ` [dpdk-dev] [PATCH v4 " John McNamara
@ 2015-02-20 17:03  4%   ` John McNamara
  2015-02-23 18:30  4%   ` [dpdk-dev] [PATCH v6 " John McNamara
  2 siblings, 0 replies; 200+ results
From: John McNamara @ 2015-02-20 17:03 UTC (permalink / raw)
  To: dev

This patchset is for a small optional addition to the ethdev library,
to add support for callbacks at the RX and TX stages. This allows
packet processing to be done on packets before they get returned
to applications using rte_eth_rx_burst call.

See the RFC cover letter for the use cases:

    http://dpdk.org/ml/archives/dev/2014-December/010491.html

For this version we spent some time investigating Stephen Hemminger's
suggestion of using the userspace RCU (read-copy-update) library for
SMP safety:

   http://urcu.so/

The default liburcu (which defaulted to liburcu-mb) requires the least
interaction from the end user but showed a 25% drop in packet throughput
in the callback sample app.

The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
throughput in the callback sample app. However it requires registered
RCU threads in the program to periodically announce quiescent states.
This makes it more difficult to implement for end user applications.

For this release we will document that adding and removing callbacks
is not thread safe.

Note: Sample application documentation to follow in a patch update.

Version 5 changes:
    * Turn the callback feature on by default.
    * Simplify #define name.

Version 4 changes:
    * Make the callback feature a compile time option.

Version 3 changes:
    * Removed unnecessary header file from example folder
      (which included baremetal reference).
    * Renamed the interrupt, RX and TX callbacks to make their function
      clearer (using the names suggested in the mailing list comments).
    * Squashed ABI version update into the commit it relates to.
    * Fixed various checkpatch warnings.

Version 2 changes:
    * Added ABI versioning.
    * Doxygen clarifications.

Version 1 changes:
    * Added callback removal functions.
    * Minor fixes.


Richardson, Bruce (3):
  ethdev: rename callbacks field to link_intr_cbs
  ethdev: add optional rxtx callback support
  examples: example showing use of callbacks.

 MAINTAINERS                            |    4 +
 app/test/virtual_pmd.c                 |    2 +-
 config/common_bsdapp                   |    1 +
 config/common_linuxapp                 |    1 +
 examples/Makefile                      |    1 +
 examples/rxtx_callbacks/Makefile       |   57 ++++++++
 examples/rxtx_callbacks/main.c         |  228 ++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.c          |  204 +++++++++++++++++++++++++++--
 lib/librte_ether/rte_ethdev.h          |  204 ++++++++++++++++++++++++++++-
 lib/librte_ether/rte_ether_version.map |    4 +
 lib/librte_pmd_bond/rte_eth_bond_api.c |    2 +-
 lib/librte_pmd_ring/rte_eth_ring.c     |    2 +-
 12 files changed, 696 insertions(+), 14 deletions(-)
 create mode 100644 examples/rxtx_callbacks/Makefile
 create mode 100644 examples/rxtx_callbacks/main.c

-- 
1.7.4.1

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH 0/8] Improve build process
  @ 2015-02-20 14:31  0%                     ` Gonzalez Monroy, Sergio
  2015-02-22 23:37  0%                       ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Gonzalez Monroy, Sergio @ 2015-02-20 14:31 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On 13/02/2015 12:51, Neil Horman wrote:
> On Fri, Feb 13, 2015 at 11:08:02AM +0000, Gonzalez Monroy, Sergio wrote:
>> On 13/02/2015 10:14, Panu Matilainen wrote:
>>> On 02/12/2015 05:52 PM, Neil Horman wrote:
>>>> On Thu, Feb 12, 2015 at 04:07:50PM +0200, Panu Matilainen wrote:
>>>>> On 02/12/2015 02:23 PM, Neil Horman wrote:
>>> [...snip...]
>>>>>>>>> So I just realized that I was not having into account a possible
>>>>>>>>> scenario, where
>>>>>>>>> we have an app built with static dpdk libs then loading a dso
>>>>>>>>> with -d
>>>>>>>>> option.
>>>>>>>>>
>>>>>>>>> In such case, because the pmd would have DT_NEEDED entries,
>>>>>>>>> dlopen will
>>>>>>>>> fail.
>>>>>>>>> So to enable such scenario we would need to build PMDs without
>>>>>>>>> DT_NEEDED
>>>>>>>>> entries.
>>>>>>>> Hmm, for that to be a problem you'd need to have the PMD built
>>>>>>>> against
>>>>>>>> shared dpdk libs and while the application is built against
>>>>>>>> static dpdk
>>>>>>>> libs. I dont think that's a supportable scenario in any case.
>>>>>>>>
>>>>>>>> Or is there some other scenario that I'm not seeing?
>>>>>>>>
>>>>>>>>     - Panu -
>>>>>>>>
>>>>>>> I agree with you. I suppose it comes down to, do we want to
>>>>>>> support such
>>>>>>> scenario?
>>>>>>>
>>>>>>>  From what I can see, it seems that we do currently support such
>>>>>>> scenario by
>>>>>>> building dpdk apps against all static dpdk libs using
>>>>>>> --whole-archive (all
>>>>>>> libs and not only PMDs).
>>>>>>> http://dpdk.org/browse/dpdk/commit/?id=20afd76a504155e947c770783ef5023e87136ad8
>>>>>>>
>>>>>>>
>>>>>>> Am I misunderstanding this?
>>>>>>>
>>>>>> Shoot, you're right, I missed the static build aspect to this.  Yes,
>>>>>> if we do the following:
>>>>>>
>>>>>> 1) Build the DPDK as a static library
>>>>>> 2) Link an application against (1)
>>>>>> 3) Use the dlopen mechanism to load a PMD built as a DSO
>>>>>>
>>>>>> Then the DT_NEEDED entries in the DSO will go unsatisfied, because
>>>>>> the shared
>>>>>> objects on which it (the PMD) depends will not exist in the file
>>>>>> system.
>>>>> I think its even more twisty:
>>>>>
>>>>> 1) Build the DPDK as a static library
>>>>> 2) Link an application against (1)
>>>>> 3) Do another build of DPDK as a shared library
>>>>> 4) In app 2), use the dlopen mechanism to load a PMD built as a part
>>>>> of or
>>>>> against 3)
>>>>>
>>>>> Somehow I doubt this would work very well.
>>>>>
>>>> Ideally it should, presuming the ABI is preserved between (1) and (3),
>>>> though I
>>>> agree, up until recently, that was an assumption that was unreliable.
>>> Versioning is a big and important step towards reliability but there are
>>> more issues to solve. This of course getting pretty far from the original
>>> topic, but at least one such issue is that there are some cases where a
>>> config value affects what are apparently public structs (rte_mbuf wrt
>>> RTE_MBUF_REFCNT for example), which really is a no-go.
>>>
>> Agree, the RTE_MBUF_REFCNT is something that needs to be dealt with asap.
>> I'll look into it.
>>
>>>>>> I think the problem is a little bit orthogonal to the libdpdk_core
>>>>>> problem you
>>>>>> were initially addressing.  That is to say, this problem of
>>>>>> dlopen-ed PMD's
>>>>>> exists regardless of weather you build the DPDK as part of a static
>>>>>> or dynamic
>>>>>> library.  The problems just happen to intersect in their
>>>>>> manipulation of the
>>>>>> DT_NEEDED entries.
>>>>>>
>>>>>> Ok, so, given the above, I would say your approach is likely
>>>>>> correct, just
>>>>>> prevent DT_NEEDED entries from getting added to PMD's. Doing so will
>>>>>> sidestep
>>>>>> loading issue for libraries that may not exist in the filesystem,
>>>>>> but thats ok,
>>>>>> because by all rights, the symbols codified in those needed
>>>>>> libraries should
>>>>>> already be present in the running application (either made available
>>>>>> by the
>>>>>> application having statically linked them, or having the linker load
>>>>>> them from
>>>>>> the proper libraries at run time).
>>>>> My 5c is that I'd much rather see the common case (all static or all
>>>>> shared)
>>>>> be simple and reliable, which in case of DSOs includes no lying
>>>>> (whether by
>>>>> omission or otherwise) about DT_NEEDED, ever. That way the issue is
>>>>> dealt
>>>>> once where it belongs. If somebody wants to go down the rabbit hole of
>>>>> mixed
>>>>> shared + static linkage, let them dig the hole by themselves :)
>>>>>
>>>> This is a fair point.  Can DT_NEEDED sections be stripped via tools like
>>>> objcopy
>>>> after the build is complete?  If so, end users can hack this corner case
>>>> to work
>>>> as needed.
>>> Patchelf (http://nixos.org/patchelf.html) appears to support that, but
>>> given that source is available it'd be easier to just modify the makefiles
>>> if that's really needed.
>>>
>> I think we agree on the issue.
>>
>> So I'll be sending a patch to add DT_NEEDED entries to all libraries and
>> PMDs. The only exception would be librte_eal, which would not have proper
>> NEEDED entries.
>> Do we bother adding a linker script for librte_eal that would include
>> dependent libraries?
>>
> I say yes to the linker script, but will happily bow to an alternate consensus
> Neil
>
So the case we want to solve is the following circular dependencies:
eal             -> mempool, malloc
mempool -> eal , malloc, ring
malloc      -> eal
ring           -> eal, malloc

We cannot write/create the proposed (below) linker script at least until 
we have built mempool and malloc.
INPUT ( -lrte_eal.so -lrte_mempool -lrte_malloc )

Few ways I have thought about implementing this (not particularly fond 
of any of them) :
  - Have the linker script file in the repo (scripts/ ?) in a fixed 
location and just copy it to $(RTE_OUTPUT)/lib/ once all libs have 
finished building.
  - Generate the file on build time from a defined make variable once 
all libs have finished

Thoughts? any other approached is more than welcome!

Sergio

PS: Thinking again on the core library and the issue of having multiple 
version.map files, we could have a core_version.map instead instead of 
multiple files per core library (eal, mempool, etc)

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD
  2015-02-19 13:48  3% [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Zhou Danny
  2015-02-19 13:48  3% ` [dpdk-dev] [PATCH v4 1/5] ethdev: add rx interrupt enable/disable functions Zhou Danny
@ 2015-02-20  8:50  0% ` Gonzalez Monroy, Sergio
  1 sibling, 0 replies; 200+ results
From: Gonzalez Monroy, Sergio @ 2015-02-20  8:50 UTC (permalink / raw)
  To: Zhou Danny, dev

On 19/02/2015 13:48, Zhou Danny wrote:
> v4 changes
> - Export interrupt enable/disable functions for shared libraries
> - Adjust position of new-added structure fields and functions to
> avoid breaking ABI
>
> v3 changes
> - Add return value for interrupt enable/disable functions
> - Move spinlok from PMD to L3fwd-power
> - Remove unnecessary variables in e1000_mac_info
> - Fix miscelleous review comments
>   
> v2 changes
> - Fix compilation issue in Makefile for missed header file.
> - Consolidate internal and community review comments of v1 patch set.
>   
> The patch series introduce low-latency one-shot rx interrupt into DPDK with
> polling and interrupt mode switch control example.
>   
> DPDK userspace interrupt notification and handling mechanism is based on UIO
> with below limitation:
> 1) It is designed to handle LSC interrupt only with inefficient suspended
> pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
> which then wakes up DPDK polling thread). In this way, it introduces
> non-deterministic wakeup latency for DPDK polling thread as well as packet
> latency if it is used to handle Rx interrupt.
> 2) UIO only supports a single interrupt vector which has to been shared by
> LSC interrupt and interrupts assigned to dedicated rx queues.
>   
> This patchset includes below features:
> 1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF only).
> 2) Build on top of the VFIO mechanism instead of UIO, so it could support
> up to 64 interrupt vectors for rx queue interrupts.
> 3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
> VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
> user space.
> 4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
> switch algorithms in L3fwd-power example.
>   
> Known limitations:
> 1) It does not work for UIO due to a single interrupt eventfd shared by LSC
> and rx queue interrupt handlers causes a mess.
> 2) LSC interrupt is not supported by VF driver, so it is by default disabled
> in L3fwd-power now. Feel free to turn in on if you want to support both LSC
> and rx queue interrupts on a PF.
>
> Danny Zhou (5):
>    ethdev: add rx interrupt enable/disable functions
>    ixgbe: enable rx queue interrupts for both PF and VF
>    igb: enable rx queue interrupts for PF
>    eal: add per rx queue interrupt handling based on VFIO
>    l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
>        switch
>
>   examples/l3fwd-power/main.c                        | 153 ++++++---
>   lib/librte_eal/common/include/rte_eal.h            |  12 +
>   lib/librte_eal/linuxapp/eal/Makefile               |   1 +
>   lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 190 ++++++++---
>   lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  12 +-
>   .../linuxapp/eal/include/exec-env/rte_interrupts.h |   4 +
>   lib/librte_ether/rte_ethdev.c                      |  43 +++
>   lib/librte_ether/rte_ethdev.h                      |  59 ++++
>   lib/librte_ether/rte_ether_version.map             |   2 +
>   lib/librte_pmd_e1000/e1000_ethdev.h                |   3 +
>   lib/librte_pmd_e1000/igb_ethdev.c                  | 228 +++++++++++--
>   lib/librte_pmd_ixgbe/ixgbe_ethdev.c                | 365 ++++++++++++++++++++-
>   lib/librte_pmd_ixgbe/ixgbe_ethdev.h                |   6 +
>   13 files changed, 964 insertions(+), 114 deletions(-)
>
Series
Acked-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] [PATCH v4 0/3] DPDK ethdev callback support
  2015-02-18 17:42  4% ` [dpdk-dev] [PATCH v3 " John McNamara
@ 2015-02-19 17:56  4%   ` John McNamara
  2015-02-20 17:03  4%   ` [dpdk-dev] [PATCH v5 " John McNamara
  2015-02-23 18:30  4%   ` [dpdk-dev] [PATCH v6 " John McNamara
  2 siblings, 0 replies; 200+ results
From: John McNamara @ 2015-02-19 17:56 UTC (permalink / raw)
  To: dev

This patchset is for a small optional addition to the ethdev library,
to add support for callbacks at the RX and TX stages. This allows
packet processing to be done on packets before they get returned
to applications using rte_eth_rx_burst call.

See the RFC cover letter for the use cases:

    http://dpdk.org/ml/archives/dev/2014-December/010491.html

For this version we spent some time investigating Stephen Hemminger's
suggestion of using the userspace RCU (read-copy-update) library for
SMP safety:

   http://urcu.so/

The default liburcu (which defaulted to liburcu-mb) requires the least
interaction from the end user but showed a 25% drop in packet throughput
in the callback sample app.

The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
throughput in the callback sample app. However it requires registered
RCU threads in the program to periodically announce quiescent states.
This makes it more difficult to implement for end user applications.

For this release we will document that adding and removing callbacks
is not thread safe.

Note: Sample application documentation to follow in a patch update.


Version 4 changes:
    * Make the callback feature a compile time option.

Version 3 changes:
    * Removed unnecessary header file from example folder
      (which included baremetal reference).
    * Renamed the interrupt, RX and TX callbacks to make their function
      clearer (using the names suggested in the mailing list comments).
    * Squashed ABI version update into the commit it relates to.
    * Fixed various checkpatch warnings.

Version 2 changes:
    * Added ABI versioning.
    * Doxygen clarifications.

Version 1 changes:
    * Added callback removal functions.
    * Minor fixes.


Richardson, Bruce (3):
  ethdev: rename callbacks field to link_intr_cbs
  ethdev: add optional rxtx callback support
  examples: example showing use of callbacks.

 MAINTAINERS                            |    4 +
 app/test/virtual_pmd.c                 |    2 +-
 config/common_bsdapp                   |    1 +
 config/common_linuxapp                 |    1 +
 examples/Makefile                      |    1 +
 examples/rxtx_callbacks/Makefile       |   57 ++++++++
 examples/rxtx_callbacks/main.c         |  228 ++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.c          |  204 +++++++++++++++++++++++++++--
 lib/librte_ether/rte_ethdev.h          |  204 ++++++++++++++++++++++++++++-
 lib/librte_ether/rte_ether_version.map |    4 +
 lib/librte_pmd_bond/rte_eth_bond_api.c |    2 +-
 lib/librte_pmd_ring/rte_eth_ring.c     |    2 +-
 12 files changed, 696 insertions(+), 14 deletions(-)
 create mode 100644 examples/rxtx_callbacks/Makefile
 create mode 100644 examples/rxtx_callbacks/main.c

-- 
1.7.4.1

^ permalink raw reply	[relevance 4%]

* [dpdk-dev] [PATCH v4 1/5] ethdev: add rx interrupt enable/disable functions
  2015-02-19 13:48  3% [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Zhou Danny
@ 2015-02-19 13:48  3% ` Zhou Danny
  2015-02-20  8:50  0% ` [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Gonzalez Monroy, Sergio
  1 sibling, 0 replies; 200+ results
From: Zhou Danny @ 2015-02-19 13:48 UTC (permalink / raw)
  To: dev

v4 changes
- Export interrupt enable/disable functions for shared libraries
- Put new functions at the end of eth_dev_ops to avoid breaking ABI

v3 changes
- Add return value for interrupt enable/disable functions

Add two dev_ops functions to enable and disable rx queue interrupts

Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Tested-by: Yong Liu <yong.liu@intel.com>
---
 lib/librte_ether/rte_ethdev.c          | 43 +++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.h          | 59 ++++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ether_version.map |  2 ++
 3 files changed, 104 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..d27469a 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -2825,6 +2825,49 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	}
 	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
+
+int
+rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
+				uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (port_id >= nb_ports) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return (-ENODEV);
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return (-ENODEV);
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+}
+
+int
+rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
+				uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (port_id >= nb_ports) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return (-ENODEV);
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		PMD_DEBUG_TRACE("Invalid port device\n");
+		return (-ENODEV);
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
+}
+
 #ifdef RTE_NIC_BYPASS
 int rte_eth_dev_bypass_init(uint8_t port_id)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 84160c3..43035c2 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -848,6 +848,8 @@ struct rte_eth_fdir {
 struct rte_intr_conf {
 	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
 	uint16_t lsc;
+	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
+	uint16_t rxq;
 };
 
 /**
@@ -1109,6 +1111,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    const struct rte_eth_txconf *tx_conf);
 /**< @internal Setup a transmit queue of an Ethernet device. */
 
+typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
+
 typedef void (*eth_queue_release_t)(void *queue);
 /**< @internal Release memory resources allocated by given RX/TX queue. */
 
@@ -1520,6 +1530,10 @@ struct eth_dev_ops {
 	eth_remove_flex_filter_t       remove_flex_filter;   /**< remove flex filter. */
 	eth_get_flex_filter_t          get_flex_filter;      /**< get flex filter. */
 	eth_filter_ctrl_t              filter_ctrl;          /**< common filter control*/
+
+	/** Enable/disable Rx queue interrupt. */
+	eth_rx_enable_intr_t       rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
+	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
 };
 
 /**
@@ -2811,6 +2825,51 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 				enum rte_eth_event_type event);
 
 /**
+ * When there is no rx packet coming in Rx Queue for a long time, we can
+ * sleep lcore related to RX Queue for power saving, and enable rx interrupt
+ * to be triggered when rx packect arrives.
+ *
+ * The rte_eth_dev_rx_queue_intr_enable() function enables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
+				uint16_t queue_id);
+
+/**
+ * When lcore wakes up from rx interrupt indicating packet coming, disable rx
+ * interrupt and returns to polling mode.
+ *
+ * The rte_eth_dev_rx_queue_intr_disable() function disables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
+				uint16_t queue_id);
+
+/**
  * Turn on the LED on the Ethernet device.
  * This function turns on the LED on the Ethernet device.
  *
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 7316530..1e7af6e 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -57,6 +57,8 @@ DPDK_2.0 {
 	rte_eth_dev_rss_hash_update;
 	rte_eth_dev_rss_reta_query;
 	rte_eth_dev_rss_reta_update;
+	rte_eth_dev_rx_queue_intr_disable;
+	rte_eth_dev_rx_queue_intr_enable;
 	rte_eth_dev_rx_queue_start;
 	rte_eth_dev_rx_queue_stop;
 	rte_eth_dev_set_link_down;
-- 
1.8.1.4

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD
@ 2015-02-19 13:48  3% Zhou Danny
  2015-02-19 13:48  3% ` [dpdk-dev] [PATCH v4 1/5] ethdev: add rx interrupt enable/disable functions Zhou Danny
  2015-02-20  8:50  0% ` [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Gonzalez Monroy, Sergio
  0 siblings, 2 replies; 200+ results
From: Zhou Danny @ 2015-02-19 13:48 UTC (permalink / raw)
  To: dev

v4 changes
- Export interrupt enable/disable functions for shared libraries
- Adjust position of new-added structure fields and functions to
avoid breaking ABI

v3 changes
- Add return value for interrupt enable/disable functions
- Move spinlok from PMD to L3fwd-power
- Remove unnecessary variables in e1000_mac_info
- Fix miscelleous review comments
 
v2 changes
- Fix compilation issue in Makefile for missed header file.
- Consolidate internal and community review comments of v1 patch set.
 
The patch series introduce low-latency one-shot rx interrupt into DPDK with
polling and interrupt mode switch control example.
 
DPDK userspace interrupt notification and handling mechanism is based on UIO
with below limitation:
1) It is designed to handle LSC interrupt only with inefficient suspended
pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
which then wakes up DPDK polling thread). In this way, it introduces
non-deterministic wakeup latency for DPDK polling thread as well as packet
latency if it is used to handle Rx interrupt.
2) UIO only supports a single interrupt vector which has to been shared by
LSC interrupt and interrupts assigned to dedicated rx queues.
 
This patchset includes below features:
1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF only).
2) Build on top of the VFIO mechanism instead of UIO, so it could support
up to 64 interrupt vectors for rx queue interrupts.
3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
user space.
4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
switch algorithms in L3fwd-power example.
 
Known limitations:
1) It does not work for UIO due to a single interrupt eventfd shared by LSC
and rx queue interrupt handlers causes a mess.
2) LSC interrupt is not supported by VF driver, so it is by default disabled
in L3fwd-power now. Feel free to turn in on if you want to support both LSC
and rx queue interrupts on a PF.

Danny Zhou (5):
  ethdev: add rx interrupt enable/disable functions
  ixgbe: enable rx queue interrupts for both PF and VF
  igb: enable rx queue interrupts for PF
  eal: add per rx queue interrupt handling based on VFIO
  l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode  
      switch

 examples/l3fwd-power/main.c                        | 153 ++++++---
 lib/librte_eal/common/include/rte_eal.h            |  12 +
 lib/librte_eal/linuxapp/eal/Makefile               |   1 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 190 ++++++++---
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  12 +-
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |   4 +
 lib/librte_ether/rte_ethdev.c                      |  43 +++
 lib/librte_ether/rte_ethdev.h                      |  59 ++++
 lib/librte_ether/rte_ether_version.map             |   2 +
 lib/librte_pmd_e1000/e1000_ethdev.h                |   3 +
 lib/librte_pmd_e1000/igb_ethdev.c                  | 228 +++++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c                | 365 ++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h                |   6 +
 13 files changed, 964 insertions(+), 114 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v3 4/5] eal: add per rx queue interrupt handling based on VFIO
  2015-02-19  8:10  3%     ` Zhou, Danny
@ 2015-02-19 13:04  3%       ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-02-19 13:04 UTC (permalink / raw)
  To: Zhou, Danny; +Cc: dev

On Thu, Feb 19, 2015 at 08:10:47AM +0000, Zhou, Danny wrote:
> 
> 
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Tuesday, February 17, 2015 11:59 PM
> > To: Zhou, Danny
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 4/5] eal: add per rx queue interrupt handling based on VFIO
> > 
> > On Tue, Feb 17, 2015 at 09:47:18PM +0800, Zhou Danny wrote:
> > > v3 changes:
> > > - Fix review comments
> > >
> > > v2 changes:
> > > - Fix compilation issue for a missed header file
> > > - Bug fix: free unreleased resources on the exception path before return
> > > - Consolidate coding style related review comments
> > >
> > > This patch does below:
> > > - Create multiple VFIO eventfd for rx queues.
> > > - Handle per rx queue interrupt.
> > > - Eliminate unnecessary suspended DPDK polling thread wakeup mechanism
> > > for rx interrupt by allowing polling thread epoll_wait rx queue
> > > interrupt notification.
> > >
> > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > Tested-by: Yong Liu <yong.liu@intel.com>
> > > ---
> > >  lib/librte_eal/common/include/rte_eal.h            |  12 ++
> > >  lib/librte_eal/linuxapp/eal/Makefile               |   1 +
> > >  lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 190 ++++++++++++++++-----
> > >  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  12 +-
> > >  .../linuxapp/eal/include/exec-env/rte_interrupts.h |   4 +
> > >  5 files changed, 175 insertions(+), 44 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
> > > index f4ecd2e..d81331f 100644
> > > --- a/lib/librte_eal/common/include/rte_eal.h
> > > +++ b/lib/librte_eal/common/include/rte_eal.h
> > > @@ -150,6 +150,18 @@ int rte_eal_iopl_init(void);
> > >   *   - On failure, a negative error value.
> > >   */
> > >  int rte_eal_init(int argc, char **argv);
> > > +
> > > +/**
> > > + * @param port_id
> > > + *   the port id
> > > + * @param queue_id
> > > + *   the queue id
> > > + * @return
> > > + *   - On success, return 0
> > > + *   - On failure, returns -1.
> > > + */
> > > +int rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
> > > +
> > >  /**
> > >   * Usage function typedef used by the application usage function.
> > >   *
> > > diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
> > > index e117cec..c593dfa 100644
> > > --- a/lib/librte_eal/linuxapp/eal/Makefile
> > > +++ b/lib/librte_eal/linuxapp/eal/Makefile
> > > @@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
> > >  CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
> > >  CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
> > >  CFLAGS += -I$(RTE_SDK)/lib/librte_ring
> > > +CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
> > >  CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
> > >  CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
> > >  CFLAGS += -I$(RTE_SDK)/lib/librte_ether
> > > diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > > index dc2668a..97215ad 100644
> > > --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > > +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > > @@ -64,6 +64,7 @@
> > >  #include <rte_malloc.h>
> > >  #include <rte_errno.h>
> > >  #include <rte_spinlock.h>
> > > +#include <rte_ethdev.h>
> > >
> > >  #include "eal_private.h"
> > >  #include "eal_vfio.h"
> > > @@ -127,6 +128,9 @@ static pthread_t intr_thread;
> > >  #ifdef VFIO_PRESENT
> > >
> > >  #define IRQ_SET_BUF_LEN  (sizeof(struct vfio_irq_set) + sizeof(int))
> > > +/* irq set buffer length for queue interrupts and LSC interrupt */
> > > +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
> > > +				sizeof(int) * (VFIO_MAX_QUEUE_ID + 1))
> > >
> > >  /* enable legacy (INTx) interrupts */
> > >  static int
> > > @@ -218,10 +222,10 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
> > >  	return 0;
> > >  }
> > >
> > > -/* enable MSI-X interrupts */
> > > +/* enable MSI interrupts */
> > >  static int
> > >  vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> > > -	int len, ret;
> > > +	int len, ret, max_intr;
> > >  	char irq_set_buf[IRQ_SET_BUF_LEN];
> > >  	struct vfio_irq_set *irq_set;
> > >  	int *fd_ptr;
> > > @@ -230,12 +234,19 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> > >
> > >  	irq_set = (struct vfio_irq_set *) irq_set_buf;
> > >  	irq_set->argsz = len;
> > > -	irq_set->count = 1;
> > > +	if ((!intr_handle->max_intr) ||
> > > +		(intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> > > +		max_intr = VFIO_MAX_QUEUE_ID + 1;
> > > +	else
> > > +		max_intr = intr_handle->max_intr;
> > > +
> > > +	irq_set->count = max_intr;
> > >  	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
> > >  	irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> > >  	irq_set->start = 0;
> > >  	fd_ptr = (int *) &irq_set->data;
> > > -	*fd_ptr = intr_handle->fd;
> > > +	memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> > > +	fd_ptr[max_intr - 1] = intr_handle->fd;
> > >
> > >  	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > >
> > > @@ -244,27 +255,10 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> > >  						intr_handle->fd);
> > >  		return -1;
> > >  	}
> > > -
> > > -	/* manually trigger interrupt to enable it */
> > > -	memset(irq_set, 0, len);
> > > -	len = sizeof(struct vfio_irq_set);
> > > -	irq_set->argsz = len;
> > > -	irq_set->count = 1;
> > > -	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
> > > -	irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> > > -	irq_set->start = 0;
> > > -
> > > -	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > > -
> > > -	if (ret) {
> > > -		RTE_LOG(ERR, EAL, "Error triggering MSI interrupts for fd %d\n",
> > > -						intr_handle->fd);
> > > -		return -1;
> > > -	}
> > >  	return 0;
> > >  }
> > >
> > > -/* disable MSI-X interrupts */
> > > +/* disable MSI interrupts */
> > >  static int
> > >  vfio_disable_msi(struct rte_intr_handle *intr_handle) {
> > >  	struct vfio_irq_set *irq_set;
> > > @@ -292,8 +286,8 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
> > >  /* enable MSI-X interrupts */
> > >  static int
> > >  vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > > -	int len, ret;
> > > -	char irq_set_buf[IRQ_SET_BUF_LEN];
> > > +	int len, ret, max_intr;
> > > +	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> > >  	struct vfio_irq_set *irq_set;
> > >  	int *fd_ptr;
> > >
> > > @@ -301,12 +295,19 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > >
> > >  	irq_set = (struct vfio_irq_set *) irq_set_buf;
> > >  	irq_set->argsz = len;
> > > -	irq_set->count = 1;
> > > +	if ((!intr_handle->max_intr) ||
> > > +		(intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> > > +		max_intr = VFIO_MAX_QUEUE_ID + 1;
> > > +	else
> > > +		max_intr = intr_handle->max_intr;
> > > +
> > > +	irq_set->count = max_intr;
> > >  	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
> > >  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> > >  	irq_set->start = 0;
> > >  	fd_ptr = (int *) &irq_set->data;
> > > -	*fd_ptr = intr_handle->fd;
> > > +	memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> > > +	fd_ptr[max_intr - 1] = intr_handle->fd;
> > >
> > >  	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > >
> > > @@ -316,22 +317,6 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > >  		return -1;
> > >  	}
> > >
> > > -	/* manually trigger interrupt to enable it */
> > > -	memset(irq_set, 0, len);
> > > -	len = sizeof(struct vfio_irq_set);
> > > -	irq_set->argsz = len;
> > > -	irq_set->count = 1;
> > > -	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
> > > -	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> > > -	irq_set->start = 0;
> > > -
> > > -	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > > -
> > > -	if (ret) {
> > > -		RTE_LOG(ERR, EAL, "Error triggering MSI-X interrupts for fd %d\n",
> > > -						intr_handle->fd);
> > > -		return -1;
> > > -	}
> > >  	return 0;
> > >  }
> > >
> > > @@ -339,7 +324,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > >  static int
> > >  vfio_disable_msix(struct rte_intr_handle *intr_handle) {
> > >  	struct vfio_irq_set *irq_set;
> > > -	char irq_set_buf[IRQ_SET_BUF_LEN];
> > > +	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> > >  	int len, ret;
> > >
> > >  	len = sizeof(struct vfio_irq_set);
> > > @@ -824,3 +809,122 @@ rte_eal_intr_init(void)
> > >  	return -ret;
> > >  }
> > >
> > > +static void
> > > +eal_intr_process_rx_interrupts(uint8_t port_id,
> > > +			struct epoll_event *events, int nfds)
> > > +{
> > > +	int n, bytes_read;
> > > +	union rte_intr_read_buffer buf;
> > > +	struct rte_intr_handle intr_handle =
> > > +				rte_eth_devices[port_id].pci_dev->intr_handle;
> > > +
> > > +	for (n = 0; n < nfds; n++) {
> > > +		/* set the length to be read for different handle type */
> > > +		switch (intr_handle.type) {
> > > +		case RTE_INTR_HANDLE_UIO:
> > > +			bytes_read = sizeof(buf.uio_intr_count);
> > > +			break;
> > > +		case RTE_INTR_HANDLE_ALARM:
> > > +			bytes_read = sizeof(buf.timerfd_num);
> > > +			break;
> > > +#ifdef VFIO_PRESENT
> > > +		case RTE_INTR_HANDLE_VFIO_MSIX:
> > > +		case RTE_INTR_HANDLE_VFIO_MSI:
> > > +		case RTE_INTR_HANDLE_VFIO_LEGACY:
> > > +			bytes_read = sizeof(buf.vfio_intr_count);
> > > +			break;
> > > +#endif
> > > +		default:
> > > +			bytes_read = 1;
> > > +			break;
> > > +		}
> > > +
> > > +		/**
> > > +		* read out to clear the ready-to-be-read flag
> > > +		* for epoll_wait.
> > > +		*/
> > > +		bytes_read = read(events[n].data.fd, &buf, bytes_read);
> > > +		if (bytes_read < 0)
> > > +			RTE_LOG(ERR, EAL, "Error reading from file "
> > > +				"descriptor %d: %s\n", events[n].data.fd,
> > > +							strerror(errno));
> > > +		else if (bytes_read == 0)
> > > +			RTE_LOG(ERR, EAL, "Read nothing from file "
> > > +				"descriptor %d\n", events[n].data.fd);
> > > +	}
> > > +}
> > > +
> > > +static void
> > > +eal_intr_handle_rx_interrupts(uint8_t port_id, int pfd, unsigned totalfds)
> > > +{
> > > +	struct epoll_event events[totalfds];
> > > +	int nfds = 0;
> > > +
> > > +	do {
> > > +		nfds = epoll_wait(pfd, events, totalfds,
> > > +				EAL_INTR_EPOLL_WAIT_FOREVER);
> > > +		/* epoll_wait fail */
> > > +		if (nfds < 0) {
> > > +			RTE_LOG(ERR, EAL,
> > > +				"epoll_wait returns with fail\n");
> > > +			return;
> > > +		}
> > > +	} while (nfds == 0);
> > > +
> > > +	/* epoll_wait has at least one fd ready to read */
> > > +	eal_intr_process_rx_interrupts(port_id, events, nfds);
> > > +}
> > > +
> > > +int
> > > +rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id)
> > > +{
> > > +	struct rte_intr_handle intr_handle =
> > > +				rte_eth_devices[port_id].pci_dev->intr_handle;
> > > +	struct epoll_event ev;
> > > +	unsigned numfds = 0;
> > > +
> > > +	/* create epoll fd */
> > > +	int pfd = epoll_create(1);
> > > +	if (pfd < 0) {
> > > +		RTE_LOG(ERR, EAL, "Cannot create epoll instance\n");
> > > +		return -1;
> > > +	}
> > > +
> > > +	rte_spinlock_lock(&intr_lock);
> > > +
> > > +	ev.events = EPOLLIN | EPOLLPRI;
> > > +	switch (intr_handle.type) {
> > > +	case RTE_INTR_HANDLE_UIO:
> > > +		ev.data.fd = intr_handle.fd;
> > > +		break;
> > > +#ifdef VFIO_PRESENT
> > > +	case RTE_INTR_HANDLE_VFIO_MSIX:
> > > +	case RTE_INTR_HANDLE_VFIO_MSI:
> > > +	case RTE_INTR_HANDLE_VFIO_LEGACY:
> > > +		ev.data.fd = intr_handle.queue_fd[queue_id];
> > > +		break;
> > > +#endif
> > > +	default:
> > > +		rte_spinlock_unlock(&intr_lock);
> > > +		close(pfd);
> > > +		return -1;
> > > +	}
> > > +
> > > +	if (epoll_ctl(pfd, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0) {
> > > +		RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n",
> > > +			intr_handle.queue_fd[queue_id], strerror(errno));
> > > +	} else
> > > +		numfds++;
> > > +
> > > +	rte_spinlock_unlock(&intr_lock);
> > > +	/* serve the interrupt */
> > > +	eal_intr_handle_rx_interrupts(port_id, pfd, numfds);
> > > +
> > > +	/**
> > > +	* when we return, we need to rebuild the
> > > +	* list of fds to monitor.
> > > +	*/
> > > +	close(pfd);
> > > +
> > > +	return 0;
> > > +}
> > > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > > index 20e0977..0e5fa76 100644
> > > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > > @@ -283,11 +283,21 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
> > >
> > >  		dev->intr_handle.fd = fd;
> > >  		dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
> > > -
> > >  		switch (i) {
> > >  		case VFIO_PCI_MSIX_IRQ_INDEX:
> > >  			internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
> > >  			dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
> > > +			for (i = 0; i < VFIO_MAX_QUEUE_ID; i++) {
> > > +				fd = eventfd(0, 0);
> > > +				if (fd < 0) {
> > > +					RTE_LOG(ERR, EAL,
> > > +					"cannot setup eventfd,"
> > > +					"error %i (%s)\n",
> > > +					errno, strerror(errno));
> > > +					return -1;
> > > +				}
> > > +				dev->intr_handle.queue_fd[i] = fd;
> > > +			}
> > >  			break;
> > >  		case VFIO_PCI_MSI_IRQ_INDEX:
> > >  			internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
> > > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > index 23eafd9..c6982cf 100644
> > > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > > @@ -38,6 +38,8 @@
> > >  #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> > >  #define _RTE_LINUXAPP_INTERRUPTS_H_
> > >
> > > +#define VFIO_MAX_QUEUE_ID 32
> > > +
> > >  enum rte_intr_handle_type {
> > >  	RTE_INTR_HANDLE_UNKNOWN = 0,
> > >  	RTE_INTR_HANDLE_UIO,      /**< uio device handle */
> > > @@ -52,6 +54,8 @@ enum rte_intr_handle_type {
> > >  struct rte_intr_handle {
> > >  	int vfio_dev_fd;                 /**< VFIO device file descriptor */
> > >  	int fd;                          /**< file descriptor */
> > > +	int max_intr;                    /**< max interrupt requested */
> > > +	int queue_fd[VFIO_MAX_QUEUE_ID]; /**< rx and tx queue interrupt file descriptor */
> > This is used outside of this library, you need to move these new fields to the
> > end of the structure.
> > 
> > neil
> 
> Alright, I will move them to the end in V4 patch. 
> 
> Neil, do you have any simple writeup on guideline about how to add APIs and new fields to existing 
> structure in order to make sure new stuff does not break ABI? It might help all the developers to avoid
> making similar mistakes in the future.
> 
Not as such, but the ABI document in the release notes gives some examples.  We
can certainly start one if you like, though many of them are situationally
specific. 


> > 
> > >  	enum rte_intr_handle_type type;  /**< handle type */
> > >  };
> > >
> > > --
> > > 1.8.1.4
> > >
> > >
> 

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v3 1/5] ethdev: add rx interrupt enable/disable functions
  2015-02-19  7:58  3%     ` Zhou, Danny
@ 2015-02-19 13:02  3%       ` Neil Horman
  0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2015-02-19 13:02 UTC (permalink / raw)
  To: Zhou, Danny; +Cc: dev

On Thu, Feb 19, 2015 at 07:58:38AM +0000, Zhou, Danny wrote:
> 
> 
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Tuesday, February 17, 2015 11:55 PM
> > To: Zhou, Danny
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 1/5] ethdev: add rx interrupt enable/disable functions
> > 
> > On Tue, Feb 17, 2015 at 09:47:15PM +0800, Zhou Danny wrote:
> > > v3 changes
> > > - Add return value for interrupt enable/disable functions
> > >
> > > Add two dev_ops functions to enable and disable rx queue interrupts
> > >
> > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > Tested-by: Yong Liu <yong.liu@intel.com>
> > > ---
> > >  lib/librte_ether/rte_ethdev.c | 43 ++++++++++++++++++++++++++++++++
> > >  lib/librte_ether/rte_ethdev.h | 57 +++++++++++++++++++++++++++++++++++++++++++
> > >  2 files changed, 100 insertions(+)
> > >
> > > diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> > > index ea3a1fb..d27469a 100644
> > > --- a/lib/librte_ether/rte_ethdev.c
> > > +++ b/lib/librte_ether/rte_ethdev.c
> > > @@ -2825,6 +2825,49 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
> > >  	}
> > >  	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
> > >  }
> > > +
> > > +int
> > > +rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
> > > +				uint16_t queue_id)
> > > +{
> > > +	struct rte_eth_dev *dev;
> > > +
> > > +	if (port_id >= nb_ports) {
> > > +		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> > > +		return (-ENODEV);
> > > +	}
> > > +
> > > +	dev = &rte_eth_devices[port_id];
> > > +	if (dev == NULL) {
> > > +		PMD_DEBUG_TRACE("Invalid port device\n");
> > > +		return (-ENODEV);
> > > +	}
> > > +
> > > +	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
> > > +	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
> > > +}
> > > +
> > > +int
> > > +rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
> > > +				uint16_t queue_id)
> > > +{
> > > +	struct rte_eth_dev *dev;
> > > +
> > > +	if (port_id >= nb_ports) {
> > > +		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> > > +		return (-ENODEV);
> > > +	}
> > > +
> > > +	dev = &rte_eth_devices[port_id];
> > > +	if (dev == NULL) {
> > > +		PMD_DEBUG_TRACE("Invalid port device\n");
> > > +		return (-ENODEV);
> > > +	}
> > > +
> > > +	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
> > > +	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
> > > +}
> > > +
> > >  #ifdef RTE_NIC_BYPASS
> > >  int rte_eth_dev_bypass_init(uint8_t port_id)
> > >  {
> > > diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> > > index 84160c3..0f320a9 100644
> > > --- a/lib/librte_ether/rte_ethdev.h
> > > +++ b/lib/librte_ether/rte_ethdev.h
> > > @@ -848,6 +848,8 @@ struct rte_eth_fdir {
> > >  struct rte_intr_conf {
> > >  	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
> > >  	uint16_t lsc;
> > > +	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
> > > +	uint16_t rxq;
> > >  };
> > >
> > >  /**
> > > @@ -1109,6 +1111,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
> > >  				    const struct rte_eth_txconf *tx_conf);
> > >  /**< @internal Setup a transmit queue of an Ethernet device. */
> > >
> > > +typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> > > +				    uint16_t rx_queue_id);
> > > +/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
> > > +
> > > +typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
> > > +				    uint16_t rx_queue_id);
> > > +/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
> > > +
> > >  typedef void (*eth_queue_release_t)(void *queue);
> > >  /**< @internal Release memory resources allocated by given RX/TX queue. */
> > >
> > > @@ -1445,6 +1455,8 @@ struct eth_dev_ops {
> > >  	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue.*/
> > >  	eth_queue_stop_t           tx_queue_stop;/**< Stop TX for a queue.*/
> > >  	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue.*/
> > > +	eth_rx_enable_intr_t       rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
> > > +	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
> > Put these at the end of eth_dev_ops if you want to avoid breaking ABI
> 
> I purposely add those two APIs at current position to ensure all rxq related APIs are declared together
> in eth_dev_ops. Anyway, moving them to the end is ok to me for the reason of ABI, though the code looks
> a little bit ugly.
> 
Right, pretty isn't a reason to break ABI as noted in the release notes doc

Neil

> > 
> > >  	eth_queue_release_t        rx_queue_release;/**< Release RX queue.*/
> > >  	eth_rx_queue_count_t       rx_queue_count; /**< Get Rx queue count. */
> > >  	eth_rx_descriptor_done_t   rx_descriptor_done;  /**< Check rxd DD bit */
> > > @@ -2811,6 +2823,51 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
> > >  				enum rte_eth_event_type event);
> > >
> > >  /**
> > > + * When there is no rx packet coming in Rx Queue for a long time, we can
> > > + * sleep lcore related to RX Queue for power saving, and enable rx interrupt
> > > + * to be triggered when rx packect arrives.
> > > + *
> > > + * The rte_eth_dev_rx_queue_intr_enable() function enables rx queue
> > > + * interrupt on specific rx queue of a port.
> > > + *
> > > + * @param port_id
> > > + *   The port identifier of the Ethernet device.
> > > + * @param queue_id
> > > + *   The index of the receive queue from which to retrieve input packets.
> > > + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> > > + *   to rte_eth_dev_configure().
> > > + * @return
> > > + *   - (0) if successful.
> > > + *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
> > > + *     that operation.
> > > + *   - (-ENODEV) if *port_id* invalid.
> > > + */
> > > +int rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
> > > +				uint16_t queue_id);
> > > +
> > > +/**
> > > + * When lcore wakes up from rx interrupt indicating packet coming, disable rx
> > > + * interrupt and returns to polling mode.
> > > + *
> > > + * The rte_eth_dev_rx_queue_intr_disable() function disables rx queue
> > > + * interrupt on specific rx queue of a port.
> > > + *
> > > + * @param port_id
> > > + *   The port identifier of the Ethernet device.
> > > + * @param queue_id
> > > + *   The index of the receive queue from which to retrieve input packets.
> > > + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> > > + *   to rte_eth_dev_configure().
> > > + * @return
> > > + *   - (0) if successful.
> > > + *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
> > > + *     that operation.
> > > + *   - (-ENODEV) if *port_id* invalid.
> > > + */
> > > +int rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
> > > +				uint16_t queue_id);
> > > +
> > > +/**
> > >   * Turn on the LED on the Ethernet device.
> > >   * This function turns on the LED on the Ethernet device.
> > >   *
> > > --
> > > 1.8.1.4
> > >
> > >
> 

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v3 4/5] eal: add per rx queue interrupt handling based on VFIO
  @ 2015-02-19  8:10  3%     ` Zhou, Danny
  2015-02-19 13:04  3%       ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Zhou, Danny @ 2015-02-19  8:10 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev



> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Tuesday, February 17, 2015 11:59 PM
> To: Zhou, Danny
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 4/5] eal: add per rx queue interrupt handling based on VFIO
> 
> On Tue, Feb 17, 2015 at 09:47:18PM +0800, Zhou Danny wrote:
> > v3 changes:
> > - Fix review comments
> >
> > v2 changes:
> > - Fix compilation issue for a missed header file
> > - Bug fix: free unreleased resources on the exception path before return
> > - Consolidate coding style related review comments
> >
> > This patch does below:
> > - Create multiple VFIO eventfd for rx queues.
> > - Handle per rx queue interrupt.
> > - Eliminate unnecessary suspended DPDK polling thread wakeup mechanism
> > for rx interrupt by allowing polling thread epoll_wait rx queue
> > interrupt notification.
> >
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > Tested-by: Yong Liu <yong.liu@intel.com>
> > ---
> >  lib/librte_eal/common/include/rte_eal.h            |  12 ++
> >  lib/librte_eal/linuxapp/eal/Makefile               |   1 +
> >  lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 190 ++++++++++++++++-----
> >  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  12 +-
> >  .../linuxapp/eal/include/exec-env/rte_interrupts.h |   4 +
> >  5 files changed, 175 insertions(+), 44 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
> > index f4ecd2e..d81331f 100644
> > --- a/lib/librte_eal/common/include/rte_eal.h
> > +++ b/lib/librte_eal/common/include/rte_eal.h
> > @@ -150,6 +150,18 @@ int rte_eal_iopl_init(void);
> >   *   - On failure, a negative error value.
> >   */
> >  int rte_eal_init(int argc, char **argv);
> > +
> > +/**
> > + * @param port_id
> > + *   the port id
> > + * @param queue_id
> > + *   the queue id
> > + * @return
> > + *   - On success, return 0
> > + *   - On failure, returns -1.
> > + */
> > +int rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
> > +
> >  /**
> >   * Usage function typedef used by the application usage function.
> >   *
> > diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
> > index e117cec..c593dfa 100644
> > --- a/lib/librte_eal/linuxapp/eal/Makefile
> > +++ b/lib/librte_eal/linuxapp/eal/Makefile
> > @@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
> >  CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
> >  CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
> >  CFLAGS += -I$(RTE_SDK)/lib/librte_ring
> > +CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
> >  CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
> >  CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
> >  CFLAGS += -I$(RTE_SDK)/lib/librte_ether
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > index dc2668a..97215ad 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > @@ -64,6 +64,7 @@
> >  #include <rte_malloc.h>
> >  #include <rte_errno.h>
> >  #include <rte_spinlock.h>
> > +#include <rte_ethdev.h>
> >
> >  #include "eal_private.h"
> >  #include "eal_vfio.h"
> > @@ -127,6 +128,9 @@ static pthread_t intr_thread;
> >  #ifdef VFIO_PRESENT
> >
> >  #define IRQ_SET_BUF_LEN  (sizeof(struct vfio_irq_set) + sizeof(int))
> > +/* irq set buffer length for queue interrupts and LSC interrupt */
> > +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
> > +				sizeof(int) * (VFIO_MAX_QUEUE_ID + 1))
> >
> >  /* enable legacy (INTx) interrupts */
> >  static int
> > @@ -218,10 +222,10 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
> >  	return 0;
> >  }
> >
> > -/* enable MSI-X interrupts */
> > +/* enable MSI interrupts */
> >  static int
> >  vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> > -	int len, ret;
> > +	int len, ret, max_intr;
> >  	char irq_set_buf[IRQ_SET_BUF_LEN];
> >  	struct vfio_irq_set *irq_set;
> >  	int *fd_ptr;
> > @@ -230,12 +234,19 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> >
> >  	irq_set = (struct vfio_irq_set *) irq_set_buf;
> >  	irq_set->argsz = len;
> > -	irq_set->count = 1;
> > +	if ((!intr_handle->max_intr) ||
> > +		(intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> > +		max_intr = VFIO_MAX_QUEUE_ID + 1;
> > +	else
> > +		max_intr = intr_handle->max_intr;
> > +
> > +	irq_set->count = max_intr;
> >  	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
> >  	irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> >  	irq_set->start = 0;
> >  	fd_ptr = (int *) &irq_set->data;
> > -	*fd_ptr = intr_handle->fd;
> > +	memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> > +	fd_ptr[max_intr - 1] = intr_handle->fd;
> >
> >  	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> >
> > @@ -244,27 +255,10 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> >  						intr_handle->fd);
> >  		return -1;
> >  	}
> > -
> > -	/* manually trigger interrupt to enable it */
> > -	memset(irq_set, 0, len);
> > -	len = sizeof(struct vfio_irq_set);
> > -	irq_set->argsz = len;
> > -	irq_set->count = 1;
> > -	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
> > -	irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> > -	irq_set->start = 0;
> > -
> > -	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > -
> > -	if (ret) {
> > -		RTE_LOG(ERR, EAL, "Error triggering MSI interrupts for fd %d\n",
> > -						intr_handle->fd);
> > -		return -1;
> > -	}
> >  	return 0;
> >  }
> >
> > -/* disable MSI-X interrupts */
> > +/* disable MSI interrupts */
> >  static int
> >  vfio_disable_msi(struct rte_intr_handle *intr_handle) {
> >  	struct vfio_irq_set *irq_set;
> > @@ -292,8 +286,8 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
> >  /* enable MSI-X interrupts */
> >  static int
> >  vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > -	int len, ret;
> > -	char irq_set_buf[IRQ_SET_BUF_LEN];
> > +	int len, ret, max_intr;
> > +	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> >  	struct vfio_irq_set *irq_set;
> >  	int *fd_ptr;
> >
> > @@ -301,12 +295,19 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> >
> >  	irq_set = (struct vfio_irq_set *) irq_set_buf;
> >  	irq_set->argsz = len;
> > -	irq_set->count = 1;
> > +	if ((!intr_handle->max_intr) ||
> > +		(intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> > +		max_intr = VFIO_MAX_QUEUE_ID + 1;
> > +	else
> > +		max_intr = intr_handle->max_intr;
> > +
> > +	irq_set->count = max_intr;
> >  	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
> >  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> >  	irq_set->start = 0;
> >  	fd_ptr = (int *) &irq_set->data;
> > -	*fd_ptr = intr_handle->fd;
> > +	memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> > +	fd_ptr[max_intr - 1] = intr_handle->fd;
> >
> >  	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> >
> > @@ -316,22 +317,6 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> >  		return -1;
> >  	}
> >
> > -	/* manually trigger interrupt to enable it */
> > -	memset(irq_set, 0, len);
> > -	len = sizeof(struct vfio_irq_set);
> > -	irq_set->argsz = len;
> > -	irq_set->count = 1;
> > -	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
> > -	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> > -	irq_set->start = 0;
> > -
> > -	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > -
> > -	if (ret) {
> > -		RTE_LOG(ERR, EAL, "Error triggering MSI-X interrupts for fd %d\n",
> > -						intr_handle->fd);
> > -		return -1;
> > -	}
> >  	return 0;
> >  }
> >
> > @@ -339,7 +324,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> >  static int
> >  vfio_disable_msix(struct rte_intr_handle *intr_handle) {
> >  	struct vfio_irq_set *irq_set;
> > -	char irq_set_buf[IRQ_SET_BUF_LEN];
> > +	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> >  	int len, ret;
> >
> >  	len = sizeof(struct vfio_irq_set);
> > @@ -824,3 +809,122 @@ rte_eal_intr_init(void)
> >  	return -ret;
> >  }
> >
> > +static void
> > +eal_intr_process_rx_interrupts(uint8_t port_id,
> > +			struct epoll_event *events, int nfds)
> > +{
> > +	int n, bytes_read;
> > +	union rte_intr_read_buffer buf;
> > +	struct rte_intr_handle intr_handle =
> > +				rte_eth_devices[port_id].pci_dev->intr_handle;
> > +
> > +	for (n = 0; n < nfds; n++) {
> > +		/* set the length to be read for different handle type */
> > +		switch (intr_handle.type) {
> > +		case RTE_INTR_HANDLE_UIO:
> > +			bytes_read = sizeof(buf.uio_intr_count);
> > +			break;
> > +		case RTE_INTR_HANDLE_ALARM:
> > +			bytes_read = sizeof(buf.timerfd_num);
> > +			break;
> > +#ifdef VFIO_PRESENT
> > +		case RTE_INTR_HANDLE_VFIO_MSIX:
> > +		case RTE_INTR_HANDLE_VFIO_MSI:
> > +		case RTE_INTR_HANDLE_VFIO_LEGACY:
> > +			bytes_read = sizeof(buf.vfio_intr_count);
> > +			break;
> > +#endif
> > +		default:
> > +			bytes_read = 1;
> > +			break;
> > +		}
> > +
> > +		/**
> > +		* read out to clear the ready-to-be-read flag
> > +		* for epoll_wait.
> > +		*/
> > +		bytes_read = read(events[n].data.fd, &buf, bytes_read);
> > +		if (bytes_read < 0)
> > +			RTE_LOG(ERR, EAL, "Error reading from file "
> > +				"descriptor %d: %s\n", events[n].data.fd,
> > +							strerror(errno));
> > +		else if (bytes_read == 0)
> > +			RTE_LOG(ERR, EAL, "Read nothing from file "
> > +				"descriptor %d\n", events[n].data.fd);
> > +	}
> > +}
> > +
> > +static void
> > +eal_intr_handle_rx_interrupts(uint8_t port_id, int pfd, unsigned totalfds)
> > +{
> > +	struct epoll_event events[totalfds];
> > +	int nfds = 0;
> > +
> > +	do {
> > +		nfds = epoll_wait(pfd, events, totalfds,
> > +				EAL_INTR_EPOLL_WAIT_FOREVER);
> > +		/* epoll_wait fail */
> > +		if (nfds < 0) {
> > +			RTE_LOG(ERR, EAL,
> > +				"epoll_wait returns with fail\n");
> > +			return;
> > +		}
> > +	} while (nfds == 0);
> > +
> > +	/* epoll_wait has at least one fd ready to read */
> > +	eal_intr_process_rx_interrupts(port_id, events, nfds);
> > +}
> > +
> > +int
> > +rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id)
> > +{
> > +	struct rte_intr_handle intr_handle =
> > +				rte_eth_devices[port_id].pci_dev->intr_handle;
> > +	struct epoll_event ev;
> > +	unsigned numfds = 0;
> > +
> > +	/* create epoll fd */
> > +	int pfd = epoll_create(1);
> > +	if (pfd < 0) {
> > +		RTE_LOG(ERR, EAL, "Cannot create epoll instance\n");
> > +		return -1;
> > +	}
> > +
> > +	rte_spinlock_lock(&intr_lock);
> > +
> > +	ev.events = EPOLLIN | EPOLLPRI;
> > +	switch (intr_handle.type) {
> > +	case RTE_INTR_HANDLE_UIO:
> > +		ev.data.fd = intr_handle.fd;
> > +		break;
> > +#ifdef VFIO_PRESENT
> > +	case RTE_INTR_HANDLE_VFIO_MSIX:
> > +	case RTE_INTR_HANDLE_VFIO_MSI:
> > +	case RTE_INTR_HANDLE_VFIO_LEGACY:
> > +		ev.data.fd = intr_handle.queue_fd[queue_id];
> > +		break;
> > +#endif
> > +	default:
> > +		rte_spinlock_unlock(&intr_lock);
> > +		close(pfd);
> > +		return -1;
> > +	}
> > +
> > +	if (epoll_ctl(pfd, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0) {
> > +		RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n",
> > +			intr_handle.queue_fd[queue_id], strerror(errno));
> > +	} else
> > +		numfds++;
> > +
> > +	rte_spinlock_unlock(&intr_lock);
> > +	/* serve the interrupt */
> > +	eal_intr_handle_rx_interrupts(port_id, pfd, numfds);
> > +
> > +	/**
> > +	* when we return, we need to rebuild the
> > +	* list of fds to monitor.
> > +	*/
> > +	close(pfd);
> > +
> > +	return 0;
> > +}
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > index 20e0977..0e5fa76 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > @@ -283,11 +283,21 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
> >
> >  		dev->intr_handle.fd = fd;
> >  		dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
> > -
> >  		switch (i) {
> >  		case VFIO_PCI_MSIX_IRQ_INDEX:
> >  			internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
> >  			dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
> > +			for (i = 0; i < VFIO_MAX_QUEUE_ID; i++) {
> > +				fd = eventfd(0, 0);
> > +				if (fd < 0) {
> > +					RTE_LOG(ERR, EAL,
> > +					"cannot setup eventfd,"
> > +					"error %i (%s)\n",
> > +					errno, strerror(errno));
> > +					return -1;
> > +				}
> > +				dev->intr_handle.queue_fd[i] = fd;
> > +			}
> >  			break;
> >  		case VFIO_PCI_MSI_IRQ_INDEX:
> >  			internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
> > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > index 23eafd9..c6982cf 100644
> > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > @@ -38,6 +38,8 @@
> >  #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> >  #define _RTE_LINUXAPP_INTERRUPTS_H_
> >
> > +#define VFIO_MAX_QUEUE_ID 32
> > +
> >  enum rte_intr_handle_type {
> >  	RTE_INTR_HANDLE_UNKNOWN = 0,
> >  	RTE_INTR_HANDLE_UIO,      /**< uio device handle */
> > @@ -52,6 +54,8 @@ enum rte_intr_handle_type {
> >  struct rte_intr_handle {
> >  	int vfio_dev_fd;                 /**< VFIO device file descriptor */
> >  	int fd;                          /**< file descriptor */
> > +	int max_intr;                    /**< max interrupt requested */
> > +	int queue_fd[VFIO_MAX_QUEUE_ID]; /**< rx and tx queue interrupt file descriptor */
> This is used outside of this library, you need to move these new fields to the
> end of the structure.
> 
> neil

Alright, I will move them to the end in V4 patch. 

Neil, do you have any simple writeup on guideline about how to add APIs and new fields to existing 
structure in order to make sure new stuff does not break ABI? It might help all the developers to avoid
making similar mistakes in the future.

> 
> >  	enum rte_intr_handle_type type;  /**< handle type */
> >  };
> >
> > --
> > 1.8.1.4
> >
> >

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v3 1/5] ethdev: add rx interrupt enable/disable functions
  2015-02-17 15:54  3%   ` Neil Horman
@ 2015-02-19  7:58  3%     ` Zhou, Danny
  2015-02-19 13:02  3%       ` Neil Horman
  0 siblings, 1 reply; 200+ results
From: Zhou, Danny @ 2015-02-19  7:58 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev



> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Tuesday, February 17, 2015 11:55 PM
> To: Zhou, Danny
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 1/5] ethdev: add rx interrupt enable/disable functions
> 
> On Tue, Feb 17, 2015 at 09:47:15PM +0800, Zhou Danny wrote:
> > v3 changes
> > - Add return value for interrupt enable/disable functions
> >
> > Add two dev_ops functions to enable and disable rx queue interrupts
> >
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > Tested-by: Yong Liu <yong.liu@intel.com>
> > ---
> >  lib/librte_ether/rte_ethdev.c | 43 ++++++++++++++++++++++++++++++++
> >  lib/librte_ether/rte_ethdev.h | 57 +++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 100 insertions(+)
> >
> > diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> > index ea3a1fb..d27469a 100644
> > --- a/lib/librte_ether/rte_ethdev.c
> > +++ b/lib/librte_ether/rte_ethdev.c
> > @@ -2825,6 +2825,49 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
> >  	}
> >  	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
> >  }
> > +
> > +int
> > +rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
> > +				uint16_t queue_id)
> > +{
> > +	struct rte_eth_dev *dev;
> > +
> > +	if (port_id >= nb_ports) {
> > +		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> > +		return (-ENODEV);
> > +	}
> > +
> > +	dev = &rte_eth_devices[port_id];
> > +	if (dev == NULL) {
> > +		PMD_DEBUG_TRACE("Invalid port device\n");
> > +		return (-ENODEV);
> > +	}
> > +
> > +	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
> > +	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
> > +}
> > +
> > +int
> > +rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
> > +				uint16_t queue_id)
> > +{
> > +	struct rte_eth_dev *dev;
> > +
> > +	if (port_id >= nb_ports) {
> > +		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> > +		return (-ENODEV);
> > +	}
> > +
> > +	dev = &rte_eth_devices[port_id];
> > +	if (dev == NULL) {
> > +		PMD_DEBUG_TRACE("Invalid port device\n");
> > +		return (-ENODEV);
> > +	}
> > +
> > +	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
> > +	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
> > +}
> > +
> >  #ifdef RTE_NIC_BYPASS
> >  int rte_eth_dev_bypass_init(uint8_t port_id)
> >  {
> > diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> > index 84160c3..0f320a9 100644
> > --- a/lib/librte_ether/rte_ethdev.h
> > +++ b/lib/librte_ether/rte_ethdev.h
> > @@ -848,6 +848,8 @@ struct rte_eth_fdir {
> >  struct rte_intr_conf {
> >  	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
> >  	uint16_t lsc;
> > +	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
> > +	uint16_t rxq;
> >  };
> >
> >  /**
> > @@ -1109,6 +1111,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
> >  				    const struct rte_eth_txconf *tx_conf);
> >  /**< @internal Setup a transmit queue of an Ethernet device. */
> >
> > +typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> > +				    uint16_t rx_queue_id);
> > +/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
> > +
> > +typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
> > +				    uint16_t rx_queue_id);
> > +/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
> > +
> >  typedef void (*eth_queue_release_t)(void *queue);
> >  /**< @internal Release memory resources allocated by given RX/TX queue. */
> >
> > @@ -1445,6 +1455,8 @@ struct eth_dev_ops {
> >  	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue.*/
> >  	eth_queue_stop_t           tx_queue_stop;/**< Stop TX for a queue.*/
> >  	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue.*/
> > +	eth_rx_enable_intr_t       rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
> > +	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
> Put these at the end of eth_dev_ops if you want to avoid breaking ABI

I purposely add those two APIs at current position to ensure all rxq related APIs are declared together
in eth_dev_ops. Anyway, moving them to the end is ok to me for the reason of ABI, though the code looks
a little bit ugly.

> 
> >  	eth_queue_release_t        rx_queue_release;/**< Release RX queue.*/
> >  	eth_rx_queue_count_t       rx_queue_count; /**< Get Rx queue count. */
> >  	eth_rx_descriptor_done_t   rx_descriptor_done;  /**< Check rxd DD bit */
> > @@ -2811,6 +2823,51 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
> >  				enum rte_eth_event_type event);
> >
> >  /**
> > + * When there is no rx packet coming in Rx Queue for a long time, we can
> > + * sleep lcore related to RX Queue for power saving, and enable rx interrupt
> > + * to be triggered when rx packect arrives.
> > + *
> > + * The rte_eth_dev_rx_queue_intr_enable() function enables rx queue
> > + * interrupt on specific rx queue of a port.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param queue_id
> > + *   The index of the receive queue from which to retrieve input packets.
> > + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> > + *   to rte_eth_dev_configure().
> > + * @return
> > + *   - (0) if successful.
> > + *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
> > + *     that operation.
> > + *   - (-ENODEV) if *port_id* invalid.
> > + */
> > +int rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
> > +				uint16_t queue_id);
> > +
> > +/**
> > + * When lcore wakes up from rx interrupt indicating packet coming, disable rx
> > + * interrupt and returns to polling mode.
> > + *
> > + * The rte_eth_dev_rx_queue_intr_disable() function disables rx queue
> > + * interrupt on specific rx queue of a port.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param queue_id
> > + *   The index of the receive queue from which to retrieve input packets.
> > + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> > + *   to rte_eth_dev_configure().
> > + * @return
> > + *   - (0) if successful.
> > + *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
> > + *     that operation.
> > + *   - (-ENODEV) if *port_id* invalid.
> > + */
> > +int rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
> > +				uint16_t queue_id);
> > +
> > +/**
> >   * Turn on the LED on the Ethernet device.
> >   * This function turns on the LED on the Ethernet device.
> >   *
> > --
> > 1.8.1.4
> >
> >

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v2] doc: Add requirements for x32 ABI
  2015-02-16 16:29  4%   ` De Lara Guarch, Pablo
@ 2015-02-18 19:33  4%     ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-02-18 19:33 UTC (permalink / raw)
  To: Mrzyglod, DanielX T; +Cc: dev

> > This patch add requirements about compiler and distribution support.
> > 
> > v2:
> > spelling fixes
> > 
> > Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com>
> 
> Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> 
> Thanks Daniel!

Applied, thanks

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PATCH] x32 ABI support, first iteration
  @ 2015-02-18 19:32  4%     ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-02-18 19:32 UTC (permalink / raw)
  To: Daniel Mrzyglod; +Cc: dev

> > > Signed-off-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
> > > Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod at intel.com>
> > 
> > Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 
> Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> 
> Just add that documentation should be updated for this.

Applied, thanks

^ permalink raw reply	[relevance 4%]

* [dpdk-dev] [PATCH v3 0/3] DPDK ethdev callback support
  @ 2015-02-18 17:42  4% ` John McNamara
  2015-02-19 17:56  4%   ` [dpdk-dev] [PATCH v4 " John McNamara
                     ` (2 more replies)
  0 siblings, 3 replies; 200+ results
From: John McNamara @ 2015-02-18 17:42 UTC (permalink / raw)
  To: dev

This patchset is for a small addition to the ethdev library, to
add in support for callbacks at the RX and TX stages. This allows
packet processing to be done on packets before they get returned
to applications using rte_eth_rx_burst call.

See the RFC cover letter for the use cases:

    http://dpdk.org/ml/archives/dev/2014-December/010491.html

For this version we spent some time investigating Stephen Hemminger's
suggestion of using the userspace RCU (read-copy-update) library for
SMP safety:

   http://urcu.so/

The default liburcu (which defaulted to liburcu-mb) requires the least
interaction from the end user but showed a 25% drop in packet throughput
in the callback sample app.

The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
throughput in the callback sample app. However it requires registered
RCU threads in the program to periodically announce quiescent states.
This makes it more difficult to implement for end user applications.

For this release we will document that adding and removing callbacks
is not thread safe.

Note: Sample application documentation to follow in a patch update.

Version 3 changes:
    * Removed unnecessary header file from example folder
      (which included baremetal reference).
    * Renamed the interrupt, RX and TX callbacks to make their function
      clearer (using the names suggested in the mailing list comments).
    * Squashed ABI version update into the commit it relates to.
    * Fixed various checkpatch warnings.

Version 2 changes:
    * Added ABI versioning.
    * Doxygen clarifications.

Version 1 changes:
    * Added callback removal functions.
    * Minor fixes.


Richardson, Bruce (3):
  ethdev: Rename callbacks field to link_intr_cbs
  ethdev: Add rxtx callback support
  examples: example showing use of callbacks.

 app/test/virtual_pmd.c                 |    2 +-
 examples/rxtx_callbacks/Makefile       |   57 ++++++++
 examples/rxtx_callbacks/main.c         |  228 ++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.c          |  183 ++++++++++++++++++++++++--
 lib/librte_ether/rte_ethdev.h          |  192 ++++++++++++++++++++++++++-
 lib/librte_ether/rte_ether_version.map |    4 +
 lib/librte_pmd_bond/rte_eth_bond_api.c |    2 +-
 7 files changed, 654 insertions(+), 14 deletions(-)
 create mode 100644 examples/rxtx_callbacks/Makefile
 create mode 100644 examples/rxtx_callbacks/main.c

-- 
1.7.4.1

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier
  2015-02-18 13:05  0%     ` Wodkowski, PawelX
@ 2015-02-18 14:10  0%       ` Bruce Richardson
  0 siblings, 0 replies; 200+ results
From: Bruce Richardson @ 2015-02-18 14:10 UTC (permalink / raw)
  To: Wodkowski, PawelX; +Cc: dev

On Wed, Feb 18, 2015 at 01:05:10PM +0000, Wodkowski, PawelX wrote:
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> > Sent: Wednesday, February 18, 2015 1:32 PM
> > To: Tetsuya Mukawa
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier
> > 
> > On Wed, Feb 18, 2015 at 12:30:07PM +0000, Bruce Richardson wrote:
> > > On Wed, Feb 18, 2015 at 08:02:49PM +0900, Tetsuya Mukawa wrote:
> > > > Currently uint8_t is used for port identifier. This patch changes it,
> > > > and use uint16_t as port identifier.
> > > > This patch only changes ethdev library. ABI of the library will be
> > > > kept even after applying it.
> > > >
> > > > Also, this patch involves following fixes.
> > > > - Use "port_id" as variable name instead of "port".
> > > >
> > > >
> > > > Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> > > > ---
> > > >  lib/librte_ether/rte_ethdev.c          |  212 +-
> > > >  lib/librte_ether/rte_ethdev_internal.h | 3672
> > ++++++++++++++++++++++++++++++++
> > > >  2 files changed, 3778 insertions(+), 106 deletions(-)
> > > >  create mode 100644 lib/librte_ether/rte_ethdev_internal.h
> > > >
> > > I'm not sure I follow why we need a new header file for this.
> > > Also, thinking about this change, a more fundamental problem is going to be
> > > the mbuf structure, which stores a port id inside it in an 8-bit value.
> > > Upgrading that to a 16-bit value requires some thought, and verification to
> > > ensure any adjustment of fields does not lead to serious performance issues.
> > >
> > > Therefore, I suggest we leave the port id values as 8-bits until such time
> > > as we need greater than 255 port values in a real-world use case.
> > > Out of interest - anyone have a DPDK app where they use >16 port id values? If
> > > so, how high does the port id value get?
> 
> Not real application but simple example of setup:
> 4 Niantic x 2 ports x 64 VF = 512 port id

However, I'd find it hard to see why you would split a single port into 64
within a *single* application, let alone do that with 8 of them :-)

> 
> I don't know what would be the real need/advantage of such setup (bonding?) but 
> you see, in theory it is already insufficient.

Well, in theory any number of ports is insufficient, as you can create thousands
and thousands of pcap or ring ethdevs in an app if you really want. Hence the
question about any actual use cases which use a large number of ports. :-)

/Bruce

> 
> > >
> > > Regards,
> > > /Bruce
> > >
> > 
> > Resending with correct email addr for Neil.
> > 
> > /Bruce

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier
  2015-02-18 13:10  0%     ` Marc Sune
@ 2015-02-18 13:49  0%       ` Bruce Richardson
  0 siblings, 0 replies; 200+ results
From: Bruce Richardson @ 2015-02-18 13:49 UTC (permalink / raw)
  To: Marc Sune; +Cc: dev

On Wed, Feb 18, 2015 at 02:10:48PM +0100, Marc Sune wrote:
> 
> On 18/02/15 13:31, Bruce Richardson wrote:
> >On Wed, Feb 18, 2015 at 12:30:07PM +0000, Bruce Richardson wrote:
> >>On Wed, Feb 18, 2015 at 08:02:49PM +0900, Tetsuya Mukawa wrote:
> >>>Currently uint8_t is used for port identifier. This patch changes it,
> >>>and use uint16_t as port identifier.
> >>>This patch only changes ethdev library. ABI of the library will be
> >>>kept even after applying it.
> >>>
> >>>Also, this patch involves following fixes.
> >>>- Use "port_id" as variable name instead of "port".
> >>>
> >>>
> >>>Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> >>>---
> >>>  lib/librte_ether/rte_ethdev.c          |  212 +-
> >>>  lib/librte_ether/rte_ethdev_internal.h | 3672 ++++++++++++++++++++++++++++++++
> >>>  2 files changed, 3778 insertions(+), 106 deletions(-)
> >>>  create mode 100644 lib/librte_ether/rte_ethdev_internal.h
> >>>
> >>I'm not sure I follow why we need a new header file for this.
> >>Also, thinking about this change, a more fundamental problem is going to be
> >>the mbuf structure, which stores a port id inside it in an 8-bit value.
> >>Upgrading that to a 16-bit value requires some thought, and verification to
> >>ensure any adjustment of fields does not lead to serious performance issues.
> >>
> >>Therefore, I suggest we leave the port id values as 8-bits until such time
> >>as we need greater than 255 port values in a real-world use case.
> >>Out of interest - anyone have a DPDK app where they use >16 port id values? If
> >>so, how high does the port id value get?
> 
> Just a though on port_id in general; I wouldn't see why other type of ports
> could fall into the same abstraction of using port_ids as we do for PHY
> ports, if eventually we could create a unified API TX/RX routines he same
> regardless of the port (I know KNI deprecated this approach in the past). Of
> course initialization routines should be different for each type of port.
> 
> I see quite a bit of code duplicity, basically in TX/RX routines for PHY
> ports, KNI ports, SHMEM (ring) ports like ivshmem etc.., which are very
> similar, and we put into the shoulders of all users of DPDK to have to do
> the "switch() - case" based on the type of port (which is state that they
> have to store themselves too). This seems to me it could be improved from a
> DPDK user's point of view.
> 
> By no means I am saying lower level APIs should not be exposed (current
> APIs)... There is the need to, since users using one type of ports only
> should be able to by-pass that (small?) extra overhead of this higher level
> APIs.
> 
> If the implementation would eventually would go into this direction, there
> would be more pressure in the port_id identifier; e.g. KNI interfaces and
> other SW like interfaces can be created and destroyed quite frequently (e.g.
> VMs), so more than 8 bits for addressing would probably be needed.
> 
> I know it not helping in the short-term, but let's see if someone thinks
> this makes any sense at all.
> 
> Marc
> 

Yes, this makes complete sense, Marc, and it's something I (and some others 
here in Intel ) have certainly been thinking about - and go on thinking about.
The ability to have multiple objects of different types accessible under a
generic rx_burst/tx_burst interface is a very powerful one. 
[I actually tried something a bit like this before, with
patches to allow a form of type-casting from rings to ethdevs, but it didn't
go further as it was felt to overlap too much with the rings pmd.
http://dpdk.org/ml/archives/dev/2014-May/002505.html ].

Overall, at this point we believe that the ethdev itself is a bit of overkill
for a common API, since it's got a lot of NIC specific baggage in its APIs.
We are thinking that perhaps a "higher-level", more minimal abstraction, which
just has rx_burst or tx_burst functions and not a lot else, might be more
useful, as it can then be "sub-classed" [to use object-oriented terminology]
into device-type specific versions such as ethdev.
At this stage, we're just thinking about what such a thing might look like, and trying
a few things out to see if such an idea is workable. If we think it's something
worth pursuing, we hope to have something to share very soon, to get input
from the whole community to see if it's worth doing for future releases. However,
it appears your thinking closely aligns with what we are thinking, which is good
to see!

Regards,
/Bruce

> >>
> >>Regards,
> >>/Bruce
> >>
> >Resending with correct email addr for Neil.
> >
> >/Bruce
> 

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier
  2015-02-18 12:31  0%   ` Bruce Richardson
  2015-02-18 13:05  0%     ` Wodkowski, PawelX
@ 2015-02-18 13:10  0%     ` Marc Sune
  2015-02-18 13:49  0%       ` Bruce Richardson
  1 sibling, 1 reply; 200+ results
From: Marc Sune @ 2015-02-18 13:10 UTC (permalink / raw)
  To: dev


On 18/02/15 13:31, Bruce Richardson wrote:
> On Wed, Feb 18, 2015 at 12:30:07PM +0000, Bruce Richardson wrote:
>> On Wed, Feb 18, 2015 at 08:02:49PM +0900, Tetsuya Mukawa wrote:
>>> Currently uint8_t is used for port identifier. This patch changes it,
>>> and use uint16_t as port identifier.
>>> This patch only changes ethdev library. ABI of the library will be
>>> kept even after applying it.
>>>
>>> Also, this patch involves following fixes.
>>> - Use "port_id" as variable name instead of "port".
>>>
>>>
>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>> ---
>>>   lib/librte_ether/rte_ethdev.c          |  212 +-
>>>   lib/librte_ether/rte_ethdev_internal.h | 3672 ++++++++++++++++++++++++++++++++
>>>   2 files changed, 3778 insertions(+), 106 deletions(-)
>>>   create mode 100644 lib/librte_ether/rte_ethdev_internal.h
>>>
>> I'm not sure I follow why we need a new header file for this.
>> Also, thinking about this change, a more fundamental problem is going to be
>> the mbuf structure, which stores a port id inside it in an 8-bit value.
>> Upgrading that to a 16-bit value requires some thought, and verification to
>> ensure any adjustment of fields does not lead to serious performance issues.
>>
>> Therefore, I suggest we leave the port id values as 8-bits until such time
>> as we need greater than 255 port values in a real-world use case.
>> Out of interest - anyone have a DPDK app where they use >16 port id values? If
>> so, how high does the port id value get?

Just a though on port_id in general; I wouldn't see why other type of 
ports could fall into the same abstraction of using port_ids as we do 
for PHY ports, if eventually we could create a unified API TX/RX 
routines he same regardless of the port (I know KNI deprecated this 
approach in the past). Of course initialization routines should be 
different for each type of port.

I see quite a bit of code duplicity, basically in TX/RX routines for PHY 
ports, KNI ports, SHMEM (ring) ports like ivshmem etc.., which are very 
similar, and we put into the shoulders of all users of DPDK to have to 
do the "switch() - case" based on the type of port (which is state that 
they have to store themselves too). This seems to me it could be 
improved from a DPDK user's point of view.

By no means I am saying lower level APIs should not be exposed (current 
APIs)... There is the need to, since users using one type of ports only 
should be able to by-pass that (small?) extra overhead of this higher 
level APIs.

If the implementation would eventually would go into this direction, 
there would be more pressure in the port_id identifier; e.g. KNI 
interfaces and other SW like interfaces can be created and destroyed 
quite frequently (e.g. VMs), so more than 8 bits for addressing would 
probably be needed.

I know it not helping in the short-term, but let's see if someone thinks 
this makes any sense at all.

Marc

>>
>> Regards,
>> /Bruce
>>
> Resending with correct email addr for Neil.
>
> /Bruce

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier
  2015-02-18 12:31  0%   ` Bruce Richardson
@ 2015-02-18 13:05  0%     ` Wodkowski, PawelX
  2015-02-18 14:10  0%       ` Bruce Richardson
  2015-02-18 13:10  0%     ` Marc Sune
  1 sibling, 1 reply; 200+ results
From: Wodkowski, PawelX @ 2015-02-18 13:05 UTC (permalink / raw)
  To: Richardson, Bruce, Tetsuya Mukawa; +Cc: dev

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> Sent: Wednesday, February 18, 2015 1:32 PM
> To: Tetsuya Mukawa
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier
> 
> On Wed, Feb 18, 2015 at 12:30:07PM +0000, Bruce Richardson wrote:
> > On Wed, Feb 18, 2015 at 08:02:49PM +0900, Tetsuya Mukawa wrote:
> > > Currently uint8_t is used for port identifier. This patch changes it,
> > > and use uint16_t as port identifier.
> > > This patch only changes ethdev library. ABI of the library will be
> > > kept even after applying it.
> > >
> > > Also, this patch involves following fixes.
> > > - Use "port_id" as variable name instead of "port".
> > >
> > >
> > > Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> > > ---
> > >  lib/librte_ether/rte_ethdev.c          |  212 +-
> > >  lib/librte_ether/rte_ethdev_internal.h | 3672
> ++++++++++++++++++++++++++++++++
> > >  2 files changed, 3778 insertions(+), 106 deletions(-)
> > >  create mode 100644 lib/librte_ether/rte_ethdev_internal.h
> > >
> > I'm not sure I follow why we need a new header file for this.
> > Also, thinking about this change, a more fundamental problem is going to be
> > the mbuf structure, which stores a port id inside it in an 8-bit value.
> > Upgrading that to a 16-bit value requires some thought, and verification to
> > ensure any adjustment of fields does not lead to serious performance issues.
> >
> > Therefore, I suggest we leave the port id values as 8-bits until such time
> > as we need greater than 255 port values in a real-world use case.
> > Out of interest - anyone have a DPDK app where they use >16 port id values? If
> > so, how high does the port id value get?

Not real application but simple example of setup:
4 Niantic x 2 ports x 64 VF = 512 port id

I don't know what would be the real need/advantage of such setup (bonding?) but 
you see, in theory it is already insufficient.

> >
> > Regards,
> > /Bruce
> >
> 
> Resending with correct email addr for Neil.
> 
> /Bruce

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18 12:33  0%                 ` Iremonger, Bernard
@ 2015-02-18 12:41  0%                   ` Tetsuya Mukawa
  0 siblings, 0 replies; 200+ results
From: Tetsuya Mukawa @ 2015-02-18 12:41 UTC (permalink / raw)
  To: Iremonger, Bernard, Richardson, Bruce, Thomas Monjalon; +Cc: dev

On 2015/02/18 21:33, Iremonger, Bernard wrote:
>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Tetsuya Mukawa
>> Sent: Wednesday, February 18, 2015 10:58 AM
>> To: Richardson, Bruce; Thomas Monjalon
>> Cc: dev@dpdk.org; Neil Horman
>> Subject: Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be
>> detached
>>
>> On 2015/02/18 19:03, Bruce Richardson wrote:
>>> On Wed, Feb 18, 2015 at 10:57:25AM +0100, Thomas Monjalon wrote:
>>>> 2015-02-18 15:10, Tetsuya Mukawa:
>>>>> On 2015/02/18 10:54, Tetsuya Mukawa wrote:
>>>>>> On 2015/02/18 9:31, Thomas Monjalon wrote:
>>>>>>> 2015-02-17 15:14, Tetsuya Mukawa:
>>>>>>>> On 2015/02/17 9:36, Thomas Monjalon wrote:
>>>>>>>>> 2015-02-16 13:14, Tetsuya Mukawa:
>>>>>>>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
>>>>>>>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
>>>>>>>> as port id.
>>>>>>>> If someone reports it doesn't enough, I guess it will be the time
>>>>>>>> to write a patch to change all uint_8 in one patch.
>>>>>>> It's a big ABI breakage. So if we feel it's going to be required,
>>>>>>> it's better to do it now in 2.0 release I think.
>>>>>>>
>>>>>>> Any opinion?
>>>>>>>
>>>>>> Hi Thomas,
>>>>>>
>>>>>> I agree with it.
>>>>>> I will add an one more patch to change uint8_t to uint16_t.
>>>>>>
>>>>>> Thanks,
>>>>>> Tetsuya
>>>>>>
>>>>> Hi Thomas,
>>>>>
>>>>> Could I make sure.
>>>>> After changing uint8_t to uint16_t in "rte_ethdev.[ch]", must I also
>>>>> need to change other applications and libraries that call ethdev APIs?
>>>>> If so, I would not finish it by 23rd.
>>>>>
>>>>> I've counted how many lines call ethdev APIs that are related to port_id.
>>>>> Could you please check an attached file?
>>>>> It's over 1200 lines. Probably to fix  one of caller, I will need to
>>>>> check how port_id is used, and fix more related lines. So probably
>>>>> thousands lines may need to be fixed.
>>>>>
>>>>> When is deadline for fixing this changing?
>>>>> Also, if you have a good idea to fix it easier, could you please let
>>>>> me know?
>>>> It was an open question.
>>>> If everybody is fine with 255 ports maximum, let's keep it as is.
>>>>
>>> I think we are probably ok for now (and forseeable future) with 255 max.
>>>
>>> However, if we did change it, I agree that in 2.0 is a very good time to do so.
>>> Since we are expanding the field, rather than shrinking it, I don't
>>> see why we can't just make the change at the ethdev level (and in libs
>>> API) in 2.0 and then in later releases (e.g. 2.1) update the apps and
>>> examples to match. That way the ABI stays the same from 2.0 onwards,
>>> and we don't have a huge amount of churn changing it everywhere late in the 2.0 release cycle.
>> Hi Bruce,
>>
>> Could you please check my RFC patch I will send soon?
>> I wrote the patch like below.
>>
>> 1. Copy header file like below.
>> $ cp lib/librte_ether/rte_ethdev.h lib/librte_ether/rte_ethdev_internal.h
>> 2. Change "rte_ethdev.c" to include "rte_ethdev_internal.h"
>> 3. Change type of port id in "rte_ethdev.c" and "rte_ethdev_internal.h".
>>
>> If the patch is OK, I wll send it with hotplug patches.
>>
>> Thanks,
>> Tetsuya
>>
>>
>>> /Bruce
> Hi Tetsuya,
>
> After this change there will be two header files with a lot of the same information.
> lib/librte_ether/rte_ethdev.h
> lib/librte_ether/rte_ethdev_internal.h
> I don't think this is a good idea for maintenance in the future.
> If 255 is ok for the foreseeable future, why change it now.

Hi Bernard,

I appreciate for your checking.
Agree, it will not be good to have almost same headers.

Thanks,
Tetsuya

> Regards,
>
> Bernard.
>  
>

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18 12:23  0%                 ` Bruce Richardson
@ 2015-02-18 12:38  0%                   ` Tetsuya Mukawa
  0 siblings, 0 replies; 200+ results
From: Tetsuya Mukawa @ 2015-02-18 12:38 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev, Neil Horman

On 2015/02/18 21:23, Bruce Richardson wrote:
> On Wed, Feb 18, 2015 at 07:58:06PM +0900, Tetsuya Mukawa wrote:
>> On 2015/02/18 19:03, Bruce Richardson wrote:
>>> On Wed, Feb 18, 2015 at 10:57:25AM +0100, Thomas Monjalon wrote:
>>>> 2015-02-18 15:10, Tetsuya Mukawa:
>>>>> On 2015/02/18 10:54, Tetsuya Mukawa wrote:
>>>>>> On 2015/02/18 9:31, Thomas Monjalon wrote:
>>>>>>> 2015-02-17 15:14, Tetsuya Mukawa:
>>>>>>>> On 2015/02/17 9:36, Thomas Monjalon wrote:
>>>>>>>>> 2015-02-16 13:14, Tetsuya Mukawa:
>>>>>>>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
>>>>>>>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
>>>>>>>> as port id.
>>>>>>>> If someone reports it doesn't enough, I guess it will be the time to
>>>>>>>> write a patch to change all uint_8 in one patch.
>>>>>>> It's a big ABI breakage. So if we feel it's going to be required,
>>>>>>> it's better to do it now in 2.0 release I think.
>>>>>>>
>>>>>>> Any opinion?
>>>>>>>
>>>>>> Hi Thomas,
>>>>>>
>>>>>> I agree with it.
>>>>>> I will add an one more patch to change uint8_t to uint16_t.
>>>>>>
>>>>>> Thanks,
>>>>>> Tetsuya
>>>>>>
>>>>> Hi Thomas,
>>>>>
>>>>> Could I make sure.
>>>>> After changing uint8_t to uint16_t in "rte_ethdev.[ch]", must I also
>>>>> need to change other applications and libraries that call ethdev APIs?
>>>>> If so, I would not finish it by 23rd.
>>>>>
>>>>> I've counted how many lines call ethdev APIs that are related to port_id.
>>>>> Could you please check an attached file?
>>>>> It's over 1200 lines. Probably to fix  one of caller, I will need to
>>>>> check how port_id is used, and fix more related lines. So probably
>>>>> thousands lines may need to be fixed.
>>>>>
>>>>> When is deadline for fixing this changing?
>>>>> Also, if you have a good idea to fix it easier, could you please let me
>>>>> know?
>>>> It was an open question.
>>>> If everybody is fine with 255 ports maximum, let's keep it as is.
>>>>
>>> I think we are probably ok for now (and forseeable future) with 255 max.
>>>
>>> However, if we did change it, I agree that in 2.0 is a very good time to do so.
>>> Since we are expanding the field, rather than shrinking it, I don't see why we
>>> can't just make the change at the ethdev level (and in libs API) in 2.0 and then in
>>> later releases (e.g. 2.1) update the apps and examples to match. That way the
>>> ABI stays the same from 2.0 onwards, and we don't have a huge amount of churn
>>> changing it everywhere late in the 2.0 release cycle.
>> Hi Bruce,
>>
>> Could you please check my RFC patch I will send soon?
>> I wrote the patch like below.
>>
>> 1. Copy header file like below.
>> $ cp lib/librte_ether/rte_ethdev.h lib/librte_ether/rte_ethdev_internal.h
>> 2. Change "rte_ethdev.c" to include "rte_ethdev_internal.h"
>> 3. Change type of port id in "rte_ethdev.c" and "rte_ethdev_internal.h".
>>
>> If the patch is OK, I wll send it with hotplug patches.
>>
>> Thanks,
>> Tetsuya
>>
>>
> Why the new ethdev internal file? 

I guess some libraries that  include "rte_ethdev.h". To compile these
libraries, I thought such a header was needed.
But, it seems it's not the time to change type of port_id.
I appreciate for your checking.

Tetsuya

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18 10:58  0%               ` Tetsuya Mukawa
  2015-02-18 12:23  0%                 ` Bruce Richardson
@ 2015-02-18 12:33  0%                 ` Iremonger, Bernard
  2015-02-18 12:41  0%                   ` Tetsuya Mukawa
  1 sibling, 1 reply; 200+ results
From: Iremonger, Bernard @ 2015-02-18 12:33 UTC (permalink / raw)
  To: Tetsuya Mukawa, Richardson, Bruce, Thomas Monjalon; +Cc: dev, Neil Horman



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Tetsuya Mukawa
> Sent: Wednesday, February 18, 2015 10:58 AM
> To: Richardson, Bruce; Thomas Monjalon
> Cc: dev@dpdk.org; Neil Horman
> Subject: Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be
> detached
> 
> On 2015/02/18 19:03, Bruce Richardson wrote:
> > On Wed, Feb 18, 2015 at 10:57:25AM +0100, Thomas Monjalon wrote:
> >> 2015-02-18 15:10, Tetsuya Mukawa:
> >>> On 2015/02/18 10:54, Tetsuya Mukawa wrote:
> >>>> On 2015/02/18 9:31, Thomas Monjalon wrote:
> >>>>> 2015-02-17 15:14, Tetsuya Mukawa:
> >>>>>> On 2015/02/17 9:36, Thomas Monjalon wrote:
> >>>>>>> 2015-02-16 13:14, Tetsuya Mukawa:
> >>>>>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
> >>>>>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
> >>>>>> as port id.
> >>>>>> If someone reports it doesn't enough, I guess it will be the time
> >>>>>> to write a patch to change all uint_8 in one patch.
> >>>>> It's a big ABI breakage. So if we feel it's going to be required,
> >>>>> it's better to do it now in 2.0 release I think.
> >>>>>
> >>>>> Any opinion?
> >>>>>
> >>>> Hi Thomas,
> >>>>
> >>>> I agree with it.
> >>>> I will add an one more patch to change uint8_t to uint16_t.
> >>>>
> >>>> Thanks,
> >>>> Tetsuya
> >>>>
> >>> Hi Thomas,
> >>>
> >>> Could I make sure.
> >>> After changing uint8_t to uint16_t in "rte_ethdev.[ch]", must I also
> >>> need to change other applications and libraries that call ethdev APIs?
> >>> If so, I would not finish it by 23rd.
> >>>
> >>> I've counted how many lines call ethdev APIs that are related to port_id.
> >>> Could you please check an attached file?
> >>> It's over 1200 lines. Probably to fix  one of caller, I will need to
> >>> check how port_id is used, and fix more related lines. So probably
> >>> thousands lines may need to be fixed.
> >>>
> >>> When is deadline for fixing this changing?
> >>> Also, if you have a good idea to fix it easier, could you please let
> >>> me know?
> >> It was an open question.
> >> If everybody is fine with 255 ports maximum, let's keep it as is.
> >>
> > I think we are probably ok for now (and forseeable future) with 255 max.
> >
> > However, if we did change it, I agree that in 2.0 is a very good time to do so.
> > Since we are expanding the field, rather than shrinking it, I don't
> > see why we can't just make the change at the ethdev level (and in libs
> > API) in 2.0 and then in later releases (e.g. 2.1) update the apps and
> > examples to match. That way the ABI stays the same from 2.0 onwards,
> > and we don't have a huge amount of churn changing it everywhere late in the 2.0 release cycle.
> 
> Hi Bruce,
> 
> Could you please check my RFC patch I will send soon?
> I wrote the patch like below.
> 
> 1. Copy header file like below.
> $ cp lib/librte_ether/rte_ethdev.h lib/librte_ether/rte_ethdev_internal.h
> 2. Change "rte_ethdev.c" to include "rte_ethdev_internal.h"
> 3. Change type of port id in "rte_ethdev.c" and "rte_ethdev_internal.h".
> 
> If the patch is OK, I wll send it with hotplug patches.
> 
> Thanks,
> Tetsuya
> 
> 
> > /Bruce
> 
Hi Tetsuya,

After this change there will be two header files with a lot of the same information.
lib/librte_ether/rte_ethdev.h
lib/librte_ether/rte_ethdev_internal.h
I don't think this is a good idea for maintenance in the future.
If 255 is ok for the foreseeable future, why change it now.

Regards,

Bernard.
 

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier
  2015-02-18 12:30  0% ` Bruce Richardson
@ 2015-02-18 12:31  0%   ` Bruce Richardson
  2015-02-18 13:05  0%     ` Wodkowski, PawelX
  2015-02-18 13:10  0%     ` Marc Sune
  0 siblings, 2 replies; 200+ results
From: Bruce Richardson @ 2015-02-18 12:31 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev

On Wed, Feb 18, 2015 at 12:30:07PM +0000, Bruce Richardson wrote:
> On Wed, Feb 18, 2015 at 08:02:49PM +0900, Tetsuya Mukawa wrote:
> > Currently uint8_t is used for port identifier. This patch changes it,
> > and use uint16_t as port identifier.
> > This patch only changes ethdev library. ABI of the library will be
> > kept even after applying it.
> > 
> > Also, this patch involves following fixes.
> > - Use "port_id" as variable name instead of "port".
> > 
> >
> > Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> > ---
> >  lib/librte_ether/rte_ethdev.c          |  212 +-
> >  lib/librte_ether/rte_ethdev_internal.h | 3672 ++++++++++++++++++++++++++++++++
> >  2 files changed, 3778 insertions(+), 106 deletions(-)
> >  create mode 100644 lib/librte_ether/rte_ethdev_internal.h
> > 
> I'm not sure I follow why we need a new header file for this.
> Also, thinking about this change, a more fundamental problem is going to be
> the mbuf structure, which stores a port id inside it in an 8-bit value.
> Upgrading that to a 16-bit value requires some thought, and verification to
> ensure any adjustment of fields does not lead to serious performance issues.
> 
> Therefore, I suggest we leave the port id values as 8-bits until such time
> as we need greater than 255 port values in a real-world use case.
> Out of interest - anyone have a DPDK app where they use >16 port id values? If
> so, how high does the port id value get?
> 
> Regards,
> /Bruce
> 

Resending with correct email addr for Neil.

/Bruce

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier
  2015-02-18 11:02  1% [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier Tetsuya Mukawa
@ 2015-02-18 12:30  0% ` Bruce Richardson
  2015-02-18 12:31  0%   ` Bruce Richardson
  0 siblings, 1 reply; 200+ results
From: Bruce Richardson @ 2015-02-18 12:30 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev, nhroman

On Wed, Feb 18, 2015 at 08:02:49PM +0900, Tetsuya Mukawa wrote:
> Currently uint8_t is used for port identifier. This patch changes it,
> and use uint16_t as port identifier.
> This patch only changes ethdev library. ABI of the library will be
> kept even after applying it.
> 
> Also, this patch involves following fixes.
> - Use "port_id" as variable name instead of "port".
> 
>
> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> ---
>  lib/librte_ether/rte_ethdev.c          |  212 +-
>  lib/librte_ether/rte_ethdev_internal.h | 3672 ++++++++++++++++++++++++++++++++
>  2 files changed, 3778 insertions(+), 106 deletions(-)
>  create mode 100644 lib/librte_ether/rte_ethdev_internal.h
> 
I'm not sure I follow why we need a new header file for this.
Also, thinking about this change, a more fundamental problem is going to be
the mbuf structure, which stores a port id inside it in an 8-bit value.
Upgrading that to a 16-bit value requires some thought, and verification to
ensure any adjustment of fields does not lead to serious performance issues.

Therefore, I suggest we leave the port id values as 8-bits until such time
as we need greater than 255 port values in a real-world use case.
Out of interest - anyone have a DPDK app where they use >16 port id values? If
so, how high does the port id value get?

Regards,
/Bruce

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18 10:58  0%               ` Tetsuya Mukawa
@ 2015-02-18 12:23  0%                 ` Bruce Richardson
  2015-02-18 12:38  0%                   ` Tetsuya Mukawa
  2015-02-18 12:33  0%                 ` Iremonger, Bernard
  1 sibling, 1 reply; 200+ results
From: Bruce Richardson @ 2015-02-18 12:23 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev, Neil Horman

On Wed, Feb 18, 2015 at 07:58:06PM +0900, Tetsuya Mukawa wrote:
> On 2015/02/18 19:03, Bruce Richardson wrote:
> > On Wed, Feb 18, 2015 at 10:57:25AM +0100, Thomas Monjalon wrote:
> >> 2015-02-18 15:10, Tetsuya Mukawa:
> >>> On 2015/02/18 10:54, Tetsuya Mukawa wrote:
> >>>> On 2015/02/18 9:31, Thomas Monjalon wrote:
> >>>>> 2015-02-17 15:14, Tetsuya Mukawa:
> >>>>>> On 2015/02/17 9:36, Thomas Monjalon wrote:
> >>>>>>> 2015-02-16 13:14, Tetsuya Mukawa:
> >>>>>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
> >>>>>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
> >>>>>> as port id.
> >>>>>> If someone reports it doesn't enough, I guess it will be the time to
> >>>>>> write a patch to change all uint_8 in one patch.
> >>>>> It's a big ABI breakage. So if we feel it's going to be required,
> >>>>> it's better to do it now in 2.0 release I think.
> >>>>>
> >>>>> Any opinion?
> >>>>>
> >>>> Hi Thomas,
> >>>>
> >>>> I agree with it.
> >>>> I will add an one more patch to change uint8_t to uint16_t.
> >>>>
> >>>> Thanks,
> >>>> Tetsuya
> >>>>
> >>> Hi Thomas,
> >>>
> >>> Could I make sure.
> >>> After changing uint8_t to uint16_t in "rte_ethdev.[ch]", must I also
> >>> need to change other applications and libraries that call ethdev APIs?
> >>> If so, I would not finish it by 23rd.
> >>>
> >>> I've counted how many lines call ethdev APIs that are related to port_id.
> >>> Could you please check an attached file?
> >>> It's over 1200 lines. Probably to fix  one of caller, I will need to
> >>> check how port_id is used, and fix more related lines. So probably
> >>> thousands lines may need to be fixed.
> >>>
> >>> When is deadline for fixing this changing?
> >>> Also, if you have a good idea to fix it easier, could you please let me
> >>> know?
> >> It was an open question.
> >> If everybody is fine with 255 ports maximum, let's keep it as is.
> >>
> > I think we are probably ok for now (and forseeable future) with 255 max.
> >
> > However, if we did change it, I agree that in 2.0 is a very good time to do so.
> > Since we are expanding the field, rather than shrinking it, I don't see why we
> > can't just make the change at the ethdev level (and in libs API) in 2.0 and then in
> > later releases (e.g. 2.1) update the apps and examples to match. That way the
> > ABI stays the same from 2.0 onwards, and we don't have a huge amount of churn
> > changing it everywhere late in the 2.0 release cycle.
> 
> Hi Bruce,
> 
> Could you please check my RFC patch I will send soon?
> I wrote the patch like below.
> 
> 1. Copy header file like below.
> $ cp lib/librte_ether/rte_ethdev.h lib/librte_ether/rte_ethdev_internal.h
> 2. Change "rte_ethdev.c" to include "rte_ethdev_internal.h"
> 3. Change type of port id in "rte_ethdev.c" and "rte_ethdev_internal.h".
> 
> If the patch is OK, I wll send it with hotplug patches.
> 
> Thanks,
> Tetsuya
> 
>
Why the new ethdev internal file? 

^ permalink raw reply	[relevance 0%]

* [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier
@ 2015-02-18 11:02  1% Tetsuya Mukawa
  2015-02-18 12:30  0% ` Bruce Richardson
  0 siblings, 1 reply; 200+ results
From: Tetsuya Mukawa @ 2015-02-18 11:02 UTC (permalink / raw)
  To: dev, bruce.richardson, thomas.monjalon; +Cc: nhroman

Currently uint8_t is used for port identifier. This patch changes it,
and use uint16_t as port identifier.
This patch only changes ethdev library. ABI of the library will be
kept even after applying it.

Also, this patch involves following fixes.
- Use "port_id" as variable name instead of "port".

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_ether/rte_ethdev.c          |  212 +-
 lib/librte_ether/rte_ethdev_internal.h | 3672 ++++++++++++++++++++++++++++++++
 2 files changed, 3778 insertions(+), 106 deletions(-)
 create mode 100644 lib/librte_ether/rte_ethdev_internal.h

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..3568e4a 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -68,7 +68,7 @@
 #include <rte_string_fns.h>
 
 #include "rte_ether.h"
-#include "rte_ethdev.h"
+#include "rte_ethdev_internal.h"
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
 #define PMD_DEBUG_TRACE(fmt, args...) do {                        \
@@ -109,7 +109,7 @@
 static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
 struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
 static struct rte_eth_dev_data *rte_eth_dev_data = NULL;
-static uint8_t nb_ports = 0;
+static uint16_t nb_ports = 0;
 
 /* spinlock for eth device callbacks */
 static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
@@ -309,14 +309,14 @@ rte_eth_driver_register(struct eth_driver *eth_drv)
 }
 
 int
-rte_eth_dev_socket_id(uint8_t port_id)
+rte_eth_dev_socket_id(uint16_t port_id)
 {
 	if (port_id >= nb_ports)
 		return -1;
 	return rte_eth_devices[port_id].pci_dev->numa_node;
 }
 
-uint8_t
+uint16_t
 rte_eth_dev_count(void)
 {
 	return (nb_ports);
@@ -361,7 +361,7 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 }
 
 int
-rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id)
+rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -387,7 +387,7 @@ rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id)
 }
 
 int
-rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id)
+rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -413,7 +413,7 @@ rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id)
 }
 
 int
-rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id)
+rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -439,7 +439,7 @@ rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id)
 }
 
 int
-rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id)
+rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -503,7 +503,7 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 }
 
 static int
-rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
+rte_eth_dev_check_vf_rss_rxq_num(uint16_t port_id, uint16_t nb_rx_q)
 {
 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
 	switch (nb_rx_q) {
@@ -528,7 +528,7 @@ rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
 }
 
 static int
-rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
+rte_eth_dev_check_mq_mode(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		      const struct rte_eth_conf *dev_conf)
 {
 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
@@ -692,7 +692,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 }
 
 int
-rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
+rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		      const struct rte_eth_conf *dev_conf)
 {
 	struct rte_eth_dev *dev;
@@ -830,7 +830,7 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 }
 
 static void
-rte_eth_dev_config_restore(uint8_t port_id)
+rte_eth_dev_config_restore(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 	struct rte_eth_dev_info dev_info;
@@ -879,7 +879,7 @@ rte_eth_dev_config_restore(uint8_t port_id)
 }
 
 int
-rte_eth_dev_start(uint8_t port_id)
+rte_eth_dev_start(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 	int diag;
@@ -915,7 +915,7 @@ rte_eth_dev_start(uint8_t port_id)
 }
 
 void
-rte_eth_dev_stop(uint8_t port_id)
+rte_eth_dev_stop(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -943,7 +943,7 @@ rte_eth_dev_stop(uint8_t port_id)
 }
 
 int
-rte_eth_dev_set_link_up(uint8_t port_id)
+rte_eth_dev_set_link_up(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -962,7 +962,7 @@ rte_eth_dev_set_link_up(uint8_t port_id)
 }
 
 int
-rte_eth_dev_set_link_down(uint8_t port_id)
+rte_eth_dev_set_link_down(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -981,7 +981,7 @@ rte_eth_dev_set_link_down(uint8_t port_id)
 }
 
 void
-rte_eth_dev_close(uint8_t port_id)
+rte_eth_dev_close(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -1002,7 +1002,7 @@ rte_eth_dev_close(uint8_t port_id)
 }
 
 int
-rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
+rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		       uint16_t nb_rx_desc, unsigned int socket_id,
 		       const struct rte_eth_rxconf *rx_conf,
 		       struct rte_mempool *mp)
@@ -1079,7 +1079,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
 }
 
 int
-rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
+rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 		       uint16_t nb_tx_desc, unsigned int socket_id,
 		       const struct rte_eth_txconf *tx_conf)
 {
@@ -1119,7 +1119,7 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
 }
 
 void
-rte_eth_promiscuous_enable(uint8_t port_id)
+rte_eth_promiscuous_enable(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -1135,7 +1135,7 @@ rte_eth_promiscuous_enable(uint8_t port_id)
 }
 
 void
-rte_eth_promiscuous_disable(uint8_t port_id)
+rte_eth_promiscuous_disable(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -1151,7 +1151,7 @@ rte_eth_promiscuous_disable(uint8_t port_id)
 }
 
 int
-rte_eth_promiscuous_get(uint8_t port_id)
+rte_eth_promiscuous_get(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -1165,7 +1165,7 @@ rte_eth_promiscuous_get(uint8_t port_id)
 }
 
 void
-rte_eth_allmulticast_enable(uint8_t port_id)
+rte_eth_allmulticast_enable(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -1181,7 +1181,7 @@ rte_eth_allmulticast_enable(uint8_t port_id)
 }
 
 void
-rte_eth_allmulticast_disable(uint8_t port_id)
+rte_eth_allmulticast_disable(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -1197,7 +1197,7 @@ rte_eth_allmulticast_disable(uint8_t port_id)
 }
 
 int
-rte_eth_allmulticast_get(uint8_t port_id)
+rte_eth_allmulticast_get(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -1225,7 +1225,7 @@ rte_eth_dev_atomic_read_link_status(struct rte_eth_dev *dev,
 }
 
 void
-rte_eth_link_get(uint8_t port_id, struct rte_eth_link *eth_link)
+rte_eth_link_get(uint16_t port_id, struct rte_eth_link *eth_link)
 {
 	struct rte_eth_dev *dev;
 
@@ -1245,7 +1245,7 @@ rte_eth_link_get(uint8_t port_id, struct rte_eth_link *eth_link)
 }
 
 void
-rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
+rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *eth_link)
 {
 	struct rte_eth_dev *dev;
 
@@ -1265,7 +1265,7 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
 }
 
 void
-rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
+rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats)
 {
 	struct rte_eth_dev *dev;
 
@@ -1282,7 +1282,7 @@ rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
 }
 
 void
-rte_eth_stats_reset(uint8_t port_id)
+rte_eth_stats_reset(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -1298,7 +1298,7 @@ rte_eth_stats_reset(uint8_t port_id)
 
 /* retrieve ethdev extended statistics */
 int
-rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats,
+rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstats *xstats,
 	unsigned n)
 {
 	struct rte_eth_stats eth_stats;
@@ -1372,7 +1372,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats,
 
 /* reset ethdev extended statistics */
 void
-rte_eth_xstats_reset(uint8_t port_id)
+rte_eth_xstats_reset(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -1393,7 +1393,7 @@ rte_eth_xstats_reset(uint8_t port_id)
 }
 
 static int
-set_queue_stats_mapping(uint8_t port_id, uint16_t queue_id, uint8_t stat_idx,
+set_queue_stats_mapping(uint16_t port_id, uint16_t queue_id, uint8_t stat_idx,
 		uint8_t is_rx)
 {
 	struct rte_eth_dev *dev;
@@ -1411,7 +1411,7 @@ set_queue_stats_mapping(uint8_t port_id, uint16_t queue_id, uint8_t stat_idx,
 
 
 int
-rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, uint16_t tx_queue_id,
+rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t tx_queue_id,
 		uint8_t stat_idx)
 {
 	return set_queue_stats_mapping(port_id, tx_queue_id, stat_idx,
@@ -1420,7 +1420,7 @@ rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, uint16_t tx_queue_id,
 
 
 int
-rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id,
+rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, uint16_t rx_queue_id,
 		uint8_t stat_idx)
 {
 	return set_queue_stats_mapping(port_id, rx_queue_id, stat_idx,
@@ -1429,7 +1429,7 @@ rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id,
 
 
 void
-rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
+rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info)
 {
 	struct rte_eth_dev *dev;
 
@@ -1449,7 +1449,7 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
 }
 
 void
-rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr)
+rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr)
 {
 	struct rte_eth_dev *dev;
 
@@ -1463,7 +1463,7 @@ rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr)
 
 
 int
-rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu)
+rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu)
 {
 	struct rte_eth_dev *dev;
 
@@ -1478,7 +1478,7 @@ rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu)
 }
 
 int
-rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu)
+rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu)
 {
 	int ret;
 	struct rte_eth_dev *dev;
@@ -1499,7 +1499,7 @@ rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu)
 }
 
 int
-rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
+rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on)
 {
 	struct rte_eth_dev *dev;
 
@@ -1524,7 +1524,7 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
 }
 
 int
-rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, int on)
+rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id, int on)
 {
 	struct rte_eth_dev *dev;
 
@@ -1546,7 +1546,7 @@ rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, int o
 }
 
 int
-rte_eth_dev_set_vlan_ether_type(uint8_t port_id, uint16_t tpid)
+rte_eth_dev_set_vlan_ether_type(uint16_t port_id, uint16_t tpid)
 {
 	struct rte_eth_dev *dev;
 
@@ -1563,7 +1563,7 @@ rte_eth_dev_set_vlan_ether_type(uint8_t port_id, uint16_t tpid)
 }
 
 int
-rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask)
+rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask)
 {
 	struct rte_eth_dev *dev;
 	int ret = 0;
@@ -1610,7 +1610,7 @@ rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask)
 }
 
 int
-rte_eth_dev_get_vlan_offload(uint8_t port_id)
+rte_eth_dev_get_vlan_offload(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 	int ret = 0;
@@ -1635,7 +1635,7 @@ rte_eth_dev_get_vlan_offload(uint8_t port_id)
 }
 
 int
-rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on)
+rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on)
 {
 	struct rte_eth_dev *dev;
 
@@ -1651,7 +1651,7 @@ rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on)
 }
 
 int
-rte_eth_dev_fdir_add_signature_filter(uint8_t port_id,
+rte_eth_dev_fdir_add_signature_filter(uint16_t port_id,
 				      struct rte_fdir_filter *fdir_filter,
 				      uint8_t queue)
 {
@@ -1685,7 +1685,7 @@ rte_eth_dev_fdir_add_signature_filter(uint8_t port_id,
 }
 
 int
-rte_eth_dev_fdir_update_signature_filter(uint8_t port_id,
+rte_eth_dev_fdir_update_signature_filter(uint16_t port_id,
 					 struct rte_fdir_filter *fdir_filter,
 					 uint8_t queue)
 {
@@ -1720,7 +1720,7 @@ rte_eth_dev_fdir_update_signature_filter(uint8_t port_id,
 }
 
 int
-rte_eth_dev_fdir_remove_signature_filter(uint8_t port_id,
+rte_eth_dev_fdir_remove_signature_filter(uint16_t port_id,
 					 struct rte_fdir_filter *fdir_filter)
 {
 	struct rte_eth_dev *dev;
@@ -1752,7 +1752,7 @@ rte_eth_dev_fdir_remove_signature_filter(uint8_t port_id,
 }
 
 int
-rte_eth_dev_fdir_get_infos(uint8_t port_id, struct rte_eth_fdir *fdir)
+rte_eth_dev_fdir_get_infos(uint16_t port_id, struct rte_eth_fdir *fdir)
 {
 	struct rte_eth_dev *dev;
 
@@ -1774,7 +1774,7 @@ rte_eth_dev_fdir_get_infos(uint8_t port_id, struct rte_eth_fdir *fdir)
 }
 
 int
-rte_eth_dev_fdir_add_perfect_filter(uint8_t port_id,
+rte_eth_dev_fdir_add_perfect_filter(uint16_t port_id,
 				    struct rte_fdir_filter *fdir_filter,
 				    uint16_t soft_id, uint8_t queue,
 				    uint8_t drop)
@@ -1814,7 +1814,7 @@ rte_eth_dev_fdir_add_perfect_filter(uint8_t port_id,
 }
 
 int
-rte_eth_dev_fdir_update_perfect_filter(uint8_t port_id,
+rte_eth_dev_fdir_update_perfect_filter(uint16_t port_id,
 				       struct rte_fdir_filter *fdir_filter,
 				       uint16_t soft_id, uint8_t queue,
 				       uint8_t drop)
@@ -1853,7 +1853,7 @@ rte_eth_dev_fdir_update_perfect_filter(uint8_t port_id,
 }
 
 int
-rte_eth_dev_fdir_remove_perfect_filter(uint8_t port_id,
+rte_eth_dev_fdir_remove_perfect_filter(uint16_t port_id,
 				       struct rte_fdir_filter *fdir_filter,
 				       uint16_t soft_id)
 {
@@ -1891,7 +1891,7 @@ rte_eth_dev_fdir_remove_perfect_filter(uint8_t port_id,
 }
 
 int
-rte_eth_dev_fdir_set_masks(uint8_t port_id, struct rte_fdir_masks *fdir_mask)
+rte_eth_dev_fdir_set_masks(uint16_t port_id, struct rte_fdir_masks *fdir_mask)
 {
 	struct rte_eth_dev *dev;
 
@@ -1911,7 +1911,7 @@ rte_eth_dev_fdir_set_masks(uint8_t port_id, struct rte_fdir_masks *fdir_mask)
 }
 
 int
-rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
+rte_eth_dev_flow_ctrl_get(uint16_t port_id, struct rte_eth_fc_conf *fc_conf)
 {
 	struct rte_eth_dev *dev;
 
@@ -1927,7 +1927,7 @@ rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
 }
 
 int
-rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
+rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf)
 {
 	struct rte_eth_dev *dev;
 
@@ -1947,7 +1947,7 @@ rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
 }
 
 int
-rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, struct rte_eth_pfc_conf *pfc_conf)
+rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id, struct rte_eth_pfc_conf *pfc_conf)
 {
 	struct rte_eth_dev *dev;
 
@@ -2023,7 +2023,7 @@ rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf,
 }
 
 int
-rte_eth_dev_rss_reta_update(uint8_t port_id,
+rte_eth_dev_rss_reta_update(uint16_t port_id,
 			    struct rte_eth_rss_reta_entry64 *reta_conf,
 			    uint16_t reta_size)
 {
@@ -2053,7 +2053,7 @@ rte_eth_dev_rss_reta_update(uint8_t port_id,
 }
 
 int
-rte_eth_dev_rss_reta_query(uint8_t port_id,
+rte_eth_dev_rss_reta_query(uint16_t port_id,
 			   struct rte_eth_rss_reta_entry64 *reta_conf,
 			   uint16_t reta_size)
 {
@@ -2076,7 +2076,7 @@ rte_eth_dev_rss_reta_query(uint8_t port_id,
 }
 
 int
-rte_eth_dev_rss_hash_update(uint8_t port_id, struct rte_eth_rss_conf *rss_conf)
+rte_eth_dev_rss_hash_update(uint16_t port_id, struct rte_eth_rss_conf *rss_conf)
 {
 	struct rte_eth_dev *dev;
 	uint16_t rss_hash_protos;
@@ -2098,7 +2098,7 @@ rte_eth_dev_rss_hash_update(uint8_t port_id, struct rte_eth_rss_conf *rss_conf)
 }
 
 int
-rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
+rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
 			      struct rte_eth_rss_conf *rss_conf)
 {
 	struct rte_eth_dev *dev;
@@ -2113,7 +2113,7 @@ rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
 }
 
 int
-rte_eth_dev_udp_tunnel_add(uint8_t port_id,
+rte_eth_dev_udp_tunnel_add(uint16_t port_id,
 			   struct rte_eth_udp_tunnel *udp_tunnel)
 {
 	struct rte_eth_dev *dev;
@@ -2139,7 +2139,7 @@ rte_eth_dev_udp_tunnel_add(uint8_t port_id,
 }
 
 int
-rte_eth_dev_udp_tunnel_delete(uint8_t port_id,
+rte_eth_dev_udp_tunnel_delete(uint16_t port_id,
 			      struct rte_eth_udp_tunnel *udp_tunnel)
 {
 	struct rte_eth_dev *dev;
@@ -2165,7 +2165,7 @@ rte_eth_dev_udp_tunnel_delete(uint8_t port_id,
 }
 
 int
-rte_eth_led_on(uint8_t port_id)
+rte_eth_led_on(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -2180,7 +2180,7 @@ rte_eth_led_on(uint8_t port_id)
 }
 
 int
-rte_eth_led_off(uint8_t port_id)
+rte_eth_led_off(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -2199,7 +2199,7 @@ rte_eth_led_off(uint8_t port_id)
  * an empty spot.
  */
 static inline int
-get_mac_addr_index(uint8_t port_id, struct ether_addr *addr)
+get_mac_addr_index(uint16_t port_id, struct ether_addr *addr)
 {
 	struct rte_eth_dev_info dev_info;
 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
@@ -2217,7 +2217,7 @@ get_mac_addr_index(uint8_t port_id, struct ether_addr *addr)
 static struct ether_addr null_mac_addr = {{0, 0, 0, 0, 0, 0}};
 
 int
-rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr,
+rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr,
 			uint32_t pool)
 {
 	struct rte_eth_dev *dev;
@@ -2270,7 +2270,7 @@ rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr,
 }
 
 int
-rte_eth_dev_mac_addr_remove(uint8_t port_id, struct ether_addr *addr)
+rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *addr)
 {
 	struct rte_eth_dev *dev;
 	int index;
@@ -2302,7 +2302,7 @@ rte_eth_dev_mac_addr_remove(uint8_t port_id, struct ether_addr *addr)
 }
 
 int
-rte_eth_dev_set_vf_rxmode(uint8_t port_id,  uint16_t vf,
+rte_eth_dev_set_vf_rxmode(uint16_t port_id,  uint16_t vf,
 				uint16_t rx_mode, uint8_t on)
 {
 	uint16_t num_vfs;
@@ -2338,7 +2338,7 @@ rte_eth_dev_set_vf_rxmode(uint8_t port_id,  uint16_t vf,
  * an empty spot.
  */
 static inline int
-get_hash_mac_addr_index(uint8_t port_id, struct ether_addr *addr)
+get_hash_mac_addr_index(uint16_t port_id, struct ether_addr *addr)
 {
 	struct rte_eth_dev_info dev_info;
 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
@@ -2357,7 +2357,7 @@ get_hash_mac_addr_index(uint8_t port_id, struct ether_addr *addr)
 }
 
 int
-rte_eth_dev_uc_hash_table_set(uint8_t port_id, struct ether_addr *addr,
+rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr,
 				uint8_t on)
 {
 	int index;
@@ -2413,7 +2413,7 @@ rte_eth_dev_uc_hash_table_set(uint8_t port_id, struct ether_addr *addr,
 }
 
 int
-rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on)
+rte_eth_dev_uc_all_hash_table_set(uint16_t port_id, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 
@@ -2430,7 +2430,7 @@ rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on)
 }
 
 int
-rte_eth_dev_set_vf_rx(uint8_t port_id,uint16_t vf, uint8_t on)
+rte_eth_dev_set_vf_rx(uint16_t port_id,uint16_t vf, uint8_t on)
 {
 	uint16_t num_vfs;
 	struct rte_eth_dev *dev;
@@ -2456,7 +2456,7 @@ rte_eth_dev_set_vf_rx(uint8_t port_id,uint16_t vf, uint8_t on)
 }
 
 int
-rte_eth_dev_set_vf_tx(uint8_t port_id,uint16_t vf, uint8_t on)
+rte_eth_dev_set_vf_tx(uint16_t port_id,uint16_t vf, uint8_t on)
 {
 	uint16_t num_vfs;
 	struct rte_eth_dev *dev;
@@ -2482,7 +2482,7 @@ rte_eth_dev_set_vf_tx(uint8_t port_id,uint16_t vf, uint8_t on)
 }
 
 int
-rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id,
+rte_eth_dev_set_vf_vlan_filter(uint16_t port_id, uint16_t vlan_id,
 				 uint64_t vf_mask,uint8_t vlan_on)
 {
 	struct rte_eth_dev *dev;
@@ -2511,7 +2511,7 @@ rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id,
 						vf_mask,vlan_on);
 }
 
-int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
+int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 					uint16_t tx_rate)
 {
 	struct rte_eth_dev *dev;
@@ -2545,7 +2545,7 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
 	return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate);
 }
 
-int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate,
+int rte_eth_set_vf_rate_limit(uint16_t port_id, uint16_t vf, uint16_t tx_rate,
 				uint64_t q_msk)
 {
 	struct rte_eth_dev *dev;
@@ -2583,7 +2583,7 @@ int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate,
 }
 
 int
-rte_eth_mirror_rule_set(uint8_t port_id,
+rte_eth_mirror_rule_set(uint16_t port_id,
 			struct rte_eth_vmdq_mirror_conf *mirror_conf,
 			uint8_t rule_id, uint8_t on)
 {
@@ -2626,7 +2626,7 @@ rte_eth_mirror_rule_set(uint8_t port_id,
 }
 
 int
-rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id)
+rte_eth_mirror_rule_reset(uint16_t port_id, uint8_t rule_id)
 {
 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
 
@@ -2650,7 +2650,7 @@ rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id)
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
 uint16_t
-rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
+rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
 		 struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	struct rte_eth_dev *dev;
@@ -2670,7 +2670,7 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
 }
 
 uint16_t
-rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
+rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
 		 struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
 	struct rte_eth_dev *dev;
@@ -2691,7 +2691,7 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
 }
 
 uint32_t
-rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id)
+rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -2705,7 +2705,7 @@ rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id)
 }
 
 int
-rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset)
+rte_eth_rx_descriptor_done(uint16_t port_id, uint16_t queue_id, uint16_t offset)
 {
 	struct rte_eth_dev *dev;
 
@@ -2721,7 +2721,7 @@ rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset)
 #endif
 
 int
-rte_eth_dev_callback_register(uint8_t port_id,
+rte_eth_dev_callback_register(uint16_t port_id,
 			enum rte_eth_event_type event,
 			rte_eth_dev_cb_fn cb_fn, void *cb_arg)
 {
@@ -2760,7 +2760,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
 }
 
 int
-rte_eth_dev_callback_unregister(uint8_t port_id,
+rte_eth_dev_callback_unregister(uint16_t port_id,
 			enum rte_eth_event_type event,
 			rte_eth_dev_cb_fn cb_fn, void *cb_arg)
 {
@@ -2826,7 +2826,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
 #ifdef RTE_NIC_BYPASS
-int rte_eth_dev_bypass_init(uint8_t port_id)
+int rte_eth_dev_bypass_init(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -2846,7 +2846,7 @@ int rte_eth_dev_bypass_init(uint8_t port_id)
 }
 
 int
-rte_eth_dev_bypass_state_show(uint8_t port_id, uint32_t *state)
+rte_eth_dev_bypass_state_show(uint16_t port_id, uint32_t *state)
 {
 	struct rte_eth_dev *dev;
 
@@ -2865,7 +2865,7 @@ rte_eth_dev_bypass_state_show(uint8_t port_id, uint32_t *state)
 }
 
 int
-rte_eth_dev_bypass_state_set(uint8_t port_id, uint32_t *new_state)
+rte_eth_dev_bypass_state_set(uint16_t port_id, uint32_t *new_state)
 {
 	struct rte_eth_dev *dev;
 
@@ -2885,7 +2885,7 @@ rte_eth_dev_bypass_state_set(uint8_t port_id, uint32_t *new_state)
 }
 
 int
-rte_eth_dev_bypass_event_show(uint8_t port_id, uint32_t event, uint32_t *state)
+rte_eth_dev_bypass_event_show(uint16_t port_id, uint32_t event, uint32_t *state)
 {
 	struct rte_eth_dev *dev;
 
@@ -2905,7 +2905,7 @@ rte_eth_dev_bypass_event_show(uint8_t port_id, uint32_t event, uint32_t *state)
 }
 
 int
-rte_eth_dev_bypass_event_store(uint8_t port_id, uint32_t event, uint32_t state)
+rte_eth_dev_bypass_event_store(uint16_t port_id, uint32_t event, uint32_t state)
 {
 	struct rte_eth_dev *dev;
 
@@ -2925,7 +2925,7 @@ rte_eth_dev_bypass_event_store(uint8_t port_id, uint32_t event, uint32_t state)
 }
 
 int
-rte_eth_dev_wd_timeout_store(uint8_t port_id, uint32_t timeout)
+rte_eth_dev_wd_timeout_store(uint16_t port_id, uint32_t timeout)
 {
 	struct rte_eth_dev *dev;
 
@@ -2945,7 +2945,7 @@ rte_eth_dev_wd_timeout_store(uint8_t port_id, uint32_t timeout)
 }
 
 int
-rte_eth_dev_bypass_ver_show(uint8_t port_id, uint32_t *ver)
+rte_eth_dev_bypass_ver_show(uint16_t port_id, uint32_t *ver)
 {
 	struct rte_eth_dev *dev;
 
@@ -2965,7 +2965,7 @@ rte_eth_dev_bypass_ver_show(uint8_t port_id, uint32_t *ver)
 }
 
 int
-rte_eth_dev_bypass_wd_timeout_show(uint8_t port_id, uint32_t *wd_timeout)
+rte_eth_dev_bypass_wd_timeout_show(uint16_t port_id, uint32_t *wd_timeout)
 {
 	struct rte_eth_dev *dev;
 
@@ -2985,7 +2985,7 @@ rte_eth_dev_bypass_wd_timeout_show(uint8_t port_id, uint32_t *wd_timeout)
 }
 
 int
-rte_eth_dev_bypass_wd_reset(uint8_t port_id)
+rte_eth_dev_bypass_wd_reset(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -3006,7 +3006,7 @@ rte_eth_dev_bypass_wd_reset(uint8_t port_id)
 #endif
 
 int
-rte_eth_dev_add_syn_filter(uint8_t port_id,
+rte_eth_dev_add_syn_filter(uint16_t port_id,
 			struct rte_syn_filter *filter, uint16_t rx_queue)
 {
 	struct rte_eth_dev *dev;
@@ -3022,7 +3022,7 @@ rte_eth_dev_add_syn_filter(uint8_t port_id,
 }
 
 int
-rte_eth_dev_remove_syn_filter(uint8_t port_id)
+rte_eth_dev_remove_syn_filter(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -3037,7 +3037,7 @@ rte_eth_dev_remove_syn_filter(uint8_t port_id)
 }
 
 int
-rte_eth_dev_get_syn_filter(uint8_t port_id,
+rte_eth_dev_get_syn_filter(uint16_t port_id,
 			struct rte_syn_filter *filter, uint16_t *rx_queue)
 {
 	struct rte_eth_dev *dev;
@@ -3056,7 +3056,7 @@ rte_eth_dev_get_syn_filter(uint8_t port_id,
 }
 
 int
-rte_eth_dev_add_2tuple_filter(uint8_t port_id, uint16_t index,
+rte_eth_dev_add_2tuple_filter(uint16_t port_id, uint16_t index,
 			struct rte_2tuple_filter *filter, uint16_t rx_queue)
 {
 	struct rte_eth_dev *dev;
@@ -3079,7 +3079,7 @@ rte_eth_dev_add_2tuple_filter(uint8_t port_id, uint16_t index,
 }
 
 int
-rte_eth_dev_remove_2tuple_filter(uint8_t port_id, uint16_t index)
+rte_eth_dev_remove_2tuple_filter(uint16_t port_id, uint16_t index)
 {
 	struct rte_eth_dev *dev;
 
@@ -3094,7 +3094,7 @@ rte_eth_dev_remove_2tuple_filter(uint8_t port_id, uint16_t index)
 }
 
 int
-rte_eth_dev_get_2tuple_filter(uint8_t port_id, uint16_t index,
+rte_eth_dev_get_2tuple_filter(uint16_t port_id, uint16_t index,
 			struct rte_2tuple_filter *filter, uint16_t *rx_queue)
 {
 	struct rte_eth_dev *dev;
@@ -3113,7 +3113,7 @@ rte_eth_dev_get_2tuple_filter(uint8_t port_id, uint16_t index,
 }
 
 int
-rte_eth_dev_add_5tuple_filter(uint8_t port_id, uint16_t index,
+rte_eth_dev_add_5tuple_filter(uint16_t port_id, uint16_t index,
 			struct rte_5tuple_filter *filter, uint16_t rx_queue)
 {
 	struct rte_eth_dev *dev;
@@ -3137,7 +3137,7 @@ rte_eth_dev_add_5tuple_filter(uint8_t port_id, uint16_t index,
 }
 
 int
-rte_eth_dev_remove_5tuple_filter(uint8_t port_id, uint16_t index)
+rte_eth_dev_remove_5tuple_filter(uint16_t port_id, uint16_t index)
 {
 	struct rte_eth_dev *dev;
 
@@ -3152,7 +3152,7 @@ rte_eth_dev_remove_5tuple_filter(uint8_t port_id, uint16_t index)
 }
 
 int
-rte_eth_dev_get_5tuple_filter(uint8_t port_id, uint16_t index,
+rte_eth_dev_get_5tuple_filter(uint16_t port_id, uint16_t index,
 			struct rte_5tuple_filter *filter, uint16_t *rx_queue)
 {
 	struct rte_eth_dev *dev;
@@ -3172,7 +3172,7 @@ rte_eth_dev_get_5tuple_filter(uint8_t port_id, uint16_t index,
 }
 
 int
-rte_eth_dev_add_flex_filter(uint8_t port_id, uint16_t index,
+rte_eth_dev_add_flex_filter(uint16_t port_id, uint16_t index,
 			struct rte_flex_filter *filter, uint16_t rx_queue)
 {
 	struct rte_eth_dev *dev;
@@ -3188,7 +3188,7 @@ rte_eth_dev_add_flex_filter(uint8_t port_id, uint16_t index,
 }
 
 int
-rte_eth_dev_remove_flex_filter(uint8_t port_id, uint16_t index)
+rte_eth_dev_remove_flex_filter(uint16_t port_id, uint16_t index)
 {
 	struct rte_eth_dev *dev;
 
@@ -3203,7 +3203,7 @@ rte_eth_dev_remove_flex_filter(uint8_t port_id, uint16_t index)
 }
 
 int
-rte_eth_dev_get_flex_filter(uint8_t port_id, uint16_t index,
+rte_eth_dev_get_flex_filter(uint16_t port_id, uint16_t index,
 			struct rte_flex_filter *filter, uint16_t *rx_queue)
 {
 	struct rte_eth_dev *dev;
@@ -3223,7 +3223,7 @@ rte_eth_dev_get_flex_filter(uint8_t port_id, uint16_t index,
 }
 
 int
-rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type)
+rte_eth_dev_filter_supported(uint16_t port_id, enum rte_filter_type filter_type)
 {
 	struct rte_eth_dev *dev;
 
@@ -3239,7 +3239,7 @@ rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type)
 }
 
 int
-rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
+rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type,
 		       enum rte_filter_op filter_op, void *arg)
 {
 	struct rte_eth_dev *dev;
diff --git a/lib/librte_ether/rte_ethdev_internal.h b/lib/librte_ether/rte_ethdev_internal.h
new file mode 100644
index 0000000..06068ad
--- /dev/null
+++ b/lib/librte_ether/rte_ethdev_internal.h
@@ -0,0 +1,3672 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETHDEV_H_
+#define _RTE_ETHDEV_H_
+
+/**
+ * @file
+ *
+ * RTE Ethernet Device API
+ *
+ * The Ethernet Device API is composed of two parts:
+ *
+ * - The application-oriented Ethernet API that includes functions to setup
+ *   an Ethernet device (configure it, setup its RX and TX queues and start it),
+ *   to get its MAC address, the speed and the status of its physical link,
+ *   to receive and to transmit packets, and so on.
+ *
+ * - The driver-oriented Ethernet API that exports a function allowing
+ *   an Ethernet Poll Mode Driver (PMD) to simultaneously register itself as
+ *   an Ethernet device driver and as a PCI driver for a set of matching PCI
+ *   [Ethernet] devices classes.
+ *
+ * By default, all the functions of the Ethernet Device API exported by a PMD
+ * are lock-free functions which assume to not be invoked in parallel on
+ * different logical cores to work on the same target object.  For instance,
+ * the receive function of a PMD cannot be invoked in parallel on two logical
+ * cores to poll the same RX queue [of the same port]. Of course, this function
+ * can be invoked in parallel by different logical cores on different RX queues.
+ * It is the responsibility of the upper level application to enforce this rule.
+ *
+ * If needed, parallel accesses by multiple logical cores to shared queues
+ * shall be explicitly protected by dedicated inline lock-aware functions
+ * built on top of their corresponding lock-free functions of the PMD API.
+ *
+ * In all functions of the Ethernet API, the Ethernet device is
+ * designated by an integer >= 0 named the device port identifier.
+ *
+ * At the Ethernet driver level, Ethernet devices are represented by a generic
+ * data structure of type *rte_eth_dev*.
+ *
+ * Ethernet devices are dynamically registered during the PCI probing phase
+ * performed at EAL initialization time.
+ * When an Ethernet device is being probed, an *rte_eth_dev* structure and
+ * a new port identifier are allocated for that device. Then, the eth_dev_init()
+ * function supplied by the Ethernet driver matching the probed PCI
+ * device is invoked to properly initialize the device.
+ *
+ * The role of the device init function consists of resetting the hardware,
+ * checking access to Non-volatile Memory (NVM), reading the MAC address
+ * from NVM etc.
+ *
+ * If the device init operation is successful, the correspondence between
+ * the port identifier assigned to the new device and its associated
+ * *rte_eth_dev* structure is effectively registered.
+ * Otherwise, both the *rte_eth_dev* structure and the port identifier are
+ * freed.
+ *
+ * The functions exported by the application Ethernet API to setup a device
+ * designated by its port identifier must be invoked in the following order:
+ *     - rte_eth_dev_configure()
+ *     - rte_eth_tx_queue_setup()
+ *     - rte_eth_rx_queue_setup()
+ *     - rte_eth_dev_start()
+ *
+ * Then, the network application can invoke, in any order, the functions
+ * exported by the Ethernet API to get the MAC address of a given device, to
+ * get the speed and the status of a device physical link, to receive/transmit
+ * [burst of] packets, and so on.
+ *
+ * If the application wants to change the configuration (i.e. call
+ * rte_eth_dev_configure(), rte_eth_tx_queue_setup(), or
+ * rte_eth_rx_queue_setup()), it must call rte_eth_dev_stop() first to stop the
+ * device and then do the reconfiguration before calling rte_eth_dev_start()
+ * again. The tramsit and receive functions should not be invoked when the
+ * device is stopped.
+ *
+ * Please note that some configuration is not stored between calls to
+ * rte_eth_dev_stop()/rte_eth_dev_start(). The following configuration will
+ * be retained:
+ *
+ *     - flow control settings
+ *     - receive mode configuration (promiscuous mode, hardware checksum mode,
+ *       RSS/VMDQ settings etc.)
+ *     - VLAN filtering configuration
+ *     - MAC addresses supplied to MAC address array
+ *     - flow director filtering mode (but not filtering rules)
+ *     - NIC queue statistics mappings
+ *
+ * Any other configuration will not be stored and will need to be re-entered
+ * after a call to rte_eth_dev_start().
+ *
+ * Finally, a network application can close an Ethernet device by invoking the
+ * rte_eth_dev_close() function.
+ *
+ * Each function of the application Ethernet API invokes a specific function
+ * of the PMD that controls the target device designated by its port
+ * identifier.
+ * For this purpose, all device-specific functions of an Ethernet driver are
+ * supplied through a set of pointers contained in a generic structure of type
+ * *eth_dev_ops*.
+ * The address of the *eth_dev_ops* structure is stored in the *rte_eth_dev*
+ * structure by the device init function of the Ethernet driver, which is
+ * invoked during the PCI probing phase, as explained earlier.
+ *
+ * In other words, each function of the Ethernet API simply retrieves the
+ * *rte_eth_dev* structure associated with the device port identifier and
+ * performs an indirect invocation of the corresponding driver function
+ * supplied in the *eth_dev_ops* structure of the *rte_eth_dev* structure.
+ *
+ * For performance reasons, the address of the burst-oriented RX and TX
+ * functions of the Ethernet driver are not contained in the *eth_dev_ops*
+ * structure. Instead, they are directly stored at the beginning of the
+ * *rte_eth_dev* structure to avoid an extra indirect memory access during
+ * their invocation.
+ *
+ * RTE ethernet device drivers do not use interrupts for transmitting or
+ * receiving. Instead, Ethernet drivers export Poll-Mode receive and transmit
+ * functions to applications.
+ * Both receive and transmit functions are packet-burst oriented to minimize
+ * their cost per packet through the following optimizations:
+ *
+ * - Sharing among multiple packets the incompressible cost of the
+ *   invocation of receive/transmit functions.
+ *
+ * - Enabling receive/transmit functions to take advantage of burst-oriented
+ *   hardware features (L1 cache, prefetch instructions, NIC head/tail
+ *   registers) to minimize the number of CPU cycles per packet, for instance,
+ *   by avoiding useless read memory accesses to ring descriptors, or by
+ *   systematically using arrays of pointers that exactly fit L1 cache line
+ *   boundaries and sizes.
+ *
+ * The burst-oriented receive function does not provide any error notification,
+ * to avoid the corresponding overhead. As a hint, the upper-level application
+ * might check the status of the device link once being systematically returned
+ * a 0 value by the receive function of the driver for a given number of tries.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_mbuf.h>
+#include "rte_ether.h"
+#include "rte_eth_ctrl.h"
+
+/**
+ * A structure used to retrieve statistics for an Ethernet port.
+ */
+struct rte_eth_stats {
+	uint64_t ipackets;  /**< Total number of successfully received packets. */
+	uint64_t opackets;  /**< Total number of successfully transmitted packets.*/
+	uint64_t ibytes;    /**< Total number of successfully received bytes. */
+	uint64_t obytes;    /**< Total number of successfully transmitted bytes. */
+	uint64_t imissed;   /**< Total of RX missed packets (e.g full FIFO). */
+	uint64_t ibadcrc;   /**< Total of RX packets with CRC error. */
+	uint64_t ibadlen;   /**< Total of RX packets with bad length. */
+	uint64_t ierrors;   /**< Total number of erroneous received packets. */
+	uint64_t oerrors;   /**< Total number of failed transmitted packets. */
+	uint64_t imcasts;   /**< Total number of multicast received packets. */
+	uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */
+	uint64_t fdirmatch; /**< Total number of RX packets matching a filter. */
+	uint64_t fdirmiss;  /**< Total number of RX packets not matching any filter. */
+	uint64_t tx_pause_xon;  /**< Total nb. of XON pause frame sent. */
+	uint64_t rx_pause_xon;  /**< Total nb. of XON pause frame received. */
+	uint64_t tx_pause_xoff; /**< Total nb. of XOFF pause frame sent. */
+	uint64_t rx_pause_xoff; /**< Total nb. of XOFF pause frame received. */
+	uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of queue RX packets. */
+	uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of queue TX packets. */
+	uint64_t q_ibytes[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of successfully received queue bytes. */
+	uint64_t q_obytes[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of successfully transmitted queue bytes. */
+	uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of queue packets received that are dropped. */
+	uint64_t ilbpackets;
+	/**< Total number of good packets received from loopback,VF Only */
+	uint64_t olbpackets;
+	/**< Total number of good packets transmitted to loopback,VF Only */
+	uint64_t ilbbytes;
+	/**< Total number of good bytes received from loopback,VF Only */
+	uint64_t olbbytes;
+	/**< Total number of good bytes transmitted to loopback,VF Only */
+};
+
+/**
+ * A structure used to retrieve link-level information of an Ethernet port.
+ */
+struct rte_eth_link {
+	uint16_t link_speed;      /**< ETH_LINK_SPEED_[10, 100, 1000, 10000] */
+	uint16_t link_duplex;     /**< ETH_LINK_[HALF_DUPLEX, FULL_DUPLEX] */
+	uint8_t  link_status : 1; /**< 1 -> link up, 0 -> link down */
+}__attribute__((aligned(8)));     /**< aligned for atomic64 read/write */
+
+#define ETH_LINK_SPEED_AUTONEG  0       /**< Auto-negotiate link speed. */
+#define ETH_LINK_SPEED_10       10      /**< 10 megabits/second. */
+#define ETH_LINK_SPEED_100      100     /**< 100 megabits/second. */
+#define ETH_LINK_SPEED_1000     1000    /**< 1 gigabits/second. */
+#define ETH_LINK_SPEED_10000    10000   /**< 10 gigabits/second. */
+#define ETH_LINK_SPEED_10G      10000   /**< alias of 10 gigabits/second. */
+#define ETH_LINK_SPEED_20G      20000   /**< 20 gigabits/second. */
+#define ETH_LINK_SPEED_40G      40000   /**< 40 gigabits/second. */
+
+#define ETH_LINK_AUTONEG_DUPLEX 0       /**< Auto-negotiate duplex. */
+#define ETH_LINK_HALF_DUPLEX    1       /**< Half-duplex connection. */
+#define ETH_LINK_FULL_DUPLEX    2       /**< Full-duplex connection. */
+
+/**
+ * A structure used to configure the ring threshold registers of an RX/TX
+ * queue for an Ethernet port.
+ */
+struct rte_eth_thresh {
+	uint8_t pthresh; /**< Ring prefetch threshold. */
+	uint8_t hthresh; /**< Ring host threshold. */
+	uint8_t wthresh; /**< Ring writeback threshold. */
+};
+
+/**
+ *  Simple flags are used for rte_eth_conf.rxmode.mq_mode.
+ */
+#define ETH_MQ_RX_RSS_FLAG  0x1
+#define ETH_MQ_RX_DCB_FLAG  0x2
+#define ETH_MQ_RX_VMDQ_FLAG 0x4
+
+/**
+ *  A set of values to identify what method is to be used to route
+ *  packets to multiple queues.
+ */
+enum rte_eth_rx_mq_mode {
+	/** None of DCB,RSS or VMDQ mode */
+	ETH_MQ_RX_NONE = 0,
+
+	/** For RX side, only RSS is on */
+	ETH_MQ_RX_RSS = ETH_MQ_RX_RSS_FLAG,
+	/** For RX side,only DCB is on. */
+	ETH_MQ_RX_DCB = ETH_MQ_RX_DCB_FLAG,
+	/** Both DCB and RSS enable */
+	ETH_MQ_RX_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG,
+
+	/** Only VMDQ, no RSS nor DCB */
+	ETH_MQ_RX_VMDQ_ONLY = ETH_MQ_RX_VMDQ_FLAG,
+	/** RSS mode with VMDQ */
+	ETH_MQ_RX_VMDQ_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_VMDQ_FLAG,
+	/** Use VMDQ+DCB to route traffic to queues */
+	ETH_MQ_RX_VMDQ_DCB = ETH_MQ_RX_VMDQ_FLAG | ETH_MQ_RX_DCB_FLAG,
+	/** Enable both VMDQ and DCB in VMDq */
+	ETH_MQ_RX_VMDQ_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG |
+				 ETH_MQ_RX_VMDQ_FLAG,
+};
+
+/**
+ * for rx mq mode backward compatible
+ */
+#define ETH_RSS                       ETH_MQ_RX_RSS
+#define VMDQ_DCB                      ETH_MQ_RX_VMDQ_DCB
+#define ETH_DCB_RX                    ETH_MQ_RX_DCB
+
+/**
+ * A set of values to identify what method is to be used to transmit
+ * packets using multi-TCs.
+ */
+enum rte_eth_tx_mq_mode {
+	ETH_MQ_TX_NONE    = 0,  /**< It is in neither DCB nor VT mode. */
+	ETH_MQ_TX_DCB,          /**< For TX side,only DCB is on. */
+	ETH_MQ_TX_VMDQ_DCB,	/**< For TX side,both DCB and VT is on. */
+	ETH_MQ_TX_VMDQ_ONLY,    /**< Only VT on, no DCB */
+};
+
+/**
+ * for tx mq mode backward compatible
+ */
+#define ETH_DCB_NONE                ETH_MQ_TX_NONE
+#define ETH_VMDQ_DCB_TX             ETH_MQ_TX_VMDQ_DCB
+#define ETH_DCB_TX                  ETH_MQ_TX_DCB
+
+/**
+ * A structure used to configure the RX features of an Ethernet port.
+ */
+struct rte_eth_rxmode {
+	/** The multi-queue packet distribution mode to be used, e.g. RSS. */
+	enum rte_eth_rx_mq_mode mq_mode;
+	uint32_t max_rx_pkt_len;  /**< Only used if jumbo_frame enabled. */
+	uint16_t split_hdr_size;  /**< hdr buf size (header_split enabled).*/
+	uint8_t header_split : 1, /**< Header Split enable. */
+		hw_ip_checksum   : 1, /**< IP/UDP/TCP checksum offload enable. */
+		hw_vlan_filter   : 1, /**< VLAN filter enable. */
+		hw_vlan_strip    : 1, /**< VLAN strip enable. */
+		hw_vlan_extend   : 1, /**< Extended VLAN enable. */
+		jumbo_frame      : 1, /**< Jumbo Frame Receipt enable. */
+		hw_strip_crc     : 1, /**< Enable CRC stripping by hardware. */
+		enable_scatter   : 1; /**< Enable scatter packets rx handler */
+};
+
+/**
+ * A structure used to configure the Receive Side Scaling (RSS) feature
+ * of an Ethernet port.
+ * If not NULL, the *rss_key* pointer of the *rss_conf* structure points
+ * to an array holding the RSS key to use for hashing specific header
+ * fields of received packets. The length of this array should be indicated
+ * by *rss_key_len* below. Otherwise, a default random hash key is used by
+ * the device driver.
+ *
+ * The *rss_key_len* field of the *rss_conf* structure indicates the length
+ * in bytes of the array pointed by *rss_key*. To be compatible, this length
+ * will be checked in i40e only. Others assume 40 bytes to be used as before.
+ *
+ * The *rss_hf* field of the *rss_conf* structure indicates the different
+ * types of IPv4/IPv6 packets to which the RSS hashing must be applied.
+ * Supplying an *rss_hf* equal to zero disables the RSS feature.
+ */
+struct rte_eth_rss_conf {
+	uint8_t *rss_key;    /**< If not NULL, 40-byte hash key. */
+	uint8_t rss_key_len; /**< hash key length in bytes. */
+	uint64_t rss_hf;     /**< Hash functions to apply - see below. */
+};
+
+/* Supported RSS offloads */
+/* for 1G & 10G */
+#define ETH_RSS_IPV4_SHIFT                    0
+#define ETH_RSS_IPV4_TCP_SHIFT                1
+#define ETH_RSS_IPV6_SHIFT                    2
+#define ETH_RSS_IPV6_EX_SHIFT                 3
+#define ETH_RSS_IPV6_TCP_SHIFT                4
+#define ETH_RSS_IPV6_TCP_EX_SHIFT             5
+#define ETH_RSS_IPV4_UDP_SHIFT                6
+#define ETH_RSS_IPV6_UDP_SHIFT                7
+#define ETH_RSS_IPV6_UDP_EX_SHIFT             8
+/* for 40G only */
+#define ETH_RSS_NONF_IPV4_UDP_SHIFT           31
+#define ETH_RSS_NONF_IPV4_TCP_SHIFT           33
+#define ETH_RSS_NONF_IPV4_SCTP_SHIFT          34
+#define ETH_RSS_NONF_IPV4_OTHER_SHIFT         35
+#define ETH_RSS_FRAG_IPV4_SHIFT               36
+#define ETH_RSS_NONF_IPV6_UDP_SHIFT           41
+#define ETH_RSS_NONF_IPV6_TCP_SHIFT           43
+#define ETH_RSS_NONF_IPV6_SCTP_SHIFT          44
+#define ETH_RSS_NONF_IPV6_OTHER_SHIFT         45
+#define ETH_RSS_FRAG_IPV6_SHIFT               46
+#define ETH_RSS_FCOE_OX_SHIFT                 48
+#define ETH_RSS_FCOE_RX_SHIFT                 49
+#define ETH_RSS_FCOE_OTHER_SHIFT              50
+#define ETH_RSS_L2_PAYLOAD_SHIFT              63
+
+/* for 1G & 10G */
+#define ETH_RSS_IPV4                    (1 << ETH_RSS_IPV4_SHIFT)
+#define ETH_RSS_IPV4_TCP                (1 << ETH_RSS_IPV4_TCP_SHIFT)
+#define ETH_RSS_IPV6                    (1 << ETH_RSS_IPV6_SHIFT)
+#define ETH_RSS_IPV6_EX                 (1 << ETH_RSS_IPV6_EX_SHIFT)
+#define ETH_RSS_IPV6_TCP                (1 << ETH_RSS_IPV6_TCP_SHIFT)
+#define ETH_RSS_IPV6_TCP_EX             (1 << ETH_RSS_IPV6_TCP_EX_SHIFT)
+#define ETH_RSS_IPV4_UDP                (1 << ETH_RSS_IPV4_UDP_SHIFT)
+#define ETH_RSS_IPV6_UDP                (1 << ETH_RSS_IPV6_UDP_SHIFT)
+#define ETH_RSS_IPV6_UDP_EX             (1 << ETH_RSS_IPV6_UDP_EX_SHIFT)
+/* for 40G only */
+#define ETH_RSS_NONF_IPV4_UDP           (1ULL << ETH_RSS_NONF_IPV4_UDP_SHIFT)
+#define ETH_RSS_NONF_IPV4_TCP           (1ULL << ETH_RSS_NONF_IPV4_TCP_SHIFT)
+#define ETH_RSS_NONF_IPV4_SCTP          (1ULL << ETH_RSS_NONF_IPV4_SCTP_SHIFT)
+#define ETH_RSS_NONF_IPV4_OTHER         (1ULL << ETH_RSS_NONF_IPV4_OTHER_SHIFT)
+#define ETH_RSS_FRAG_IPV4               (1ULL << ETH_RSS_FRAG_IPV4_SHIFT)
+#define ETH_RSS_NONF_IPV6_UDP           (1ULL << ETH_RSS_NONF_IPV6_UDP_SHIFT)
+#define ETH_RSS_NONF_IPV6_TCP           (1ULL << ETH_RSS_NONF_IPV6_TCP_SHIFT)
+#define ETH_RSS_NONF_IPV6_SCTP          (1ULL << ETH_RSS_NONF_IPV6_SCTP_SHIFT)
+#define ETH_RSS_NONF_IPV6_OTHER         (1ULL << ETH_RSS_NONF_IPV6_OTHER_SHIFT)
+#define ETH_RSS_FRAG_IPV6               (1ULL << ETH_RSS_FRAG_IPV6_SHIFT)
+/* FCOE relevant should not be used */
+#define ETH_RSS_FCOE_OX                 (1ULL << ETH_RSS_FCOE_OX_SHIFT)
+#define ETH_RSS_FCOE_RX                 (1ULL << ETH_RSS_FCOE_RX_SHIFT)
+#define ETH_RSS_FCOE_OTHER              (1ULL << ETH_RSS_FCOE_OTHER_SHIFT)
+#define ETH_RSS_L2_PAYLOAD              (1ULL << ETH_RSS_L2_PAYLOAD_SHIFT)
+
+#define ETH_RSS_IP ( \
+		ETH_RSS_IPV4 | \
+		ETH_RSS_IPV6 | \
+		ETH_RSS_NONF_IPV4_OTHER | \
+		ETH_RSS_FRAG_IPV4 | \
+		ETH_RSS_NONF_IPV6_OTHER | \
+		ETH_RSS_FRAG_IPV6)
+#define ETH_RSS_UDP ( \
+		ETH_RSS_IPV4 | \
+		ETH_RSS_IPV6 | \
+		ETH_RSS_IPV4_UDP | \
+		ETH_RSS_IPV6_UDP | \
+		ETH_RSS_IPV6_UDP_EX | \
+		ETH_RSS_NONF_IPV4_UDP | \
+		ETH_RSS_NONF_IPV6_UDP)
+/**< Mask of valid RSS hash protocols */
+#define ETH_RSS_PROTO_MASK ( \
+		ETH_RSS_IPV4 | \
+		ETH_RSS_IPV4_TCP | \
+		ETH_RSS_IPV6 | \
+		ETH_RSS_IPV6_EX | \
+		ETH_RSS_IPV6_TCP | \
+		ETH_RSS_IPV6_TCP_EX | \
+		ETH_RSS_IPV4_UDP | \
+		ETH_RSS_IPV6_UDP | \
+		ETH_RSS_IPV6_UDP_EX | \
+		ETH_RSS_NONF_IPV4_UDP | \
+		ETH_RSS_NONF_IPV4_TCP | \
+		ETH_RSS_NONF_IPV4_SCTP | \
+		ETH_RSS_NONF_IPV4_OTHER | \
+		ETH_RSS_FRAG_IPV4 | \
+		ETH_RSS_NONF_IPV6_UDP | \
+		ETH_RSS_NONF_IPV6_TCP | \
+		ETH_RSS_NONF_IPV6_SCTP | \
+		ETH_RSS_NONF_IPV6_OTHER | \
+		ETH_RSS_FRAG_IPV6 | \
+		ETH_RSS_L2_PAYLOAD)
+
+/*
+ * Definitions used for redirection table entry size.
+ * Some RSS RETA sizes may not be supported by some drivers, check the
+ * documentation or the description of relevant functions for more details.
+ */
+#define ETH_RSS_RETA_SIZE_64  64
+#define ETH_RSS_RETA_SIZE_128 128
+#define ETH_RSS_RETA_SIZE_512 512
+#define RTE_RETA_GROUP_SIZE   64
+
+/* Definitions used for VMDQ and DCB functionality */
+#define ETH_VMDQ_MAX_VLAN_FILTERS   64 /**< Maximum nb. of VMDQ vlan filters. */
+#define ETH_DCB_NUM_USER_PRIORITIES 8  /**< Maximum nb. of DCB priorities. */
+#define ETH_VMDQ_DCB_NUM_QUEUES     128 /**< Maximum nb. of VMDQ DCB queues. */
+#define ETH_DCB_NUM_QUEUES          128 /**< Maximum nb. of DCB queues. */
+
+/* DCB capability defines */
+#define ETH_DCB_PG_SUPPORT      0x00000001 /**< Priority Group(ETS) support. */
+#define ETH_DCB_PFC_SUPPORT     0x00000002 /**< Priority Flow Control support. */
+
+/* Definitions used for VLAN Offload functionality */
+#define ETH_VLAN_STRIP_OFFLOAD   0x0001 /**< VLAN Strip  On/Off */
+#define ETH_VLAN_FILTER_OFFLOAD  0x0002 /**< VLAN Filter On/Off */
+#define ETH_VLAN_EXTEND_OFFLOAD  0x0004 /**< VLAN Extend On/Off */
+
+/* Definitions used for mask VLAN setting */
+#define ETH_VLAN_STRIP_MASK   0x0001 /**< VLAN Strip  setting mask */
+#define ETH_VLAN_FILTER_MASK  0x0002 /**< VLAN Filter  setting mask*/
+#define ETH_VLAN_EXTEND_MASK  0x0004 /**< VLAN Extend  setting mask*/
+#define ETH_VLAN_ID_MAX       0x0FFF /**< VLAN ID is in lower 12 bits*/
+
+/* Definitions used for receive MAC address   */
+#define ETH_NUM_RECEIVE_MAC_ADDR  128 /**< Maximum nb. of receive mac addr. */
+
+/* Definitions used for unicast hash  */
+#define ETH_VMDQ_NUM_UC_HASH_ARRAY  128 /**< Maximum nb. of UC hash array. */
+
+/* Definitions used for VMDQ pool rx mode setting */
+#define ETH_VMDQ_ACCEPT_UNTAG   0x0001 /**< accept untagged packets. */
+#define ETH_VMDQ_ACCEPT_HASH_MC 0x0002 /**< accept packets in multicast table . */
+#define ETH_VMDQ_ACCEPT_HASH_UC 0x0004 /**< accept packets in unicast table. */
+#define ETH_VMDQ_ACCEPT_BROADCAST   0x0008 /**< accept broadcast packets. */
+#define ETH_VMDQ_ACCEPT_MULTICAST   0x0010 /**< multicast promiscuous. */
+
+/* Definitions used for VMDQ mirror rules setting */
+#define ETH_VMDQ_NUM_MIRROR_RULE     4 /**< Maximum nb. of mirror rules. . */
+
+#define ETH_VMDQ_POOL_MIRROR    0x0001 /**< Virtual Pool Mirroring. */
+#define ETH_VMDQ_UPLINK_MIRROR  0x0002 /**< Uplink Port Mirroring. */
+#define ETH_VMDQ_DOWNLIN_MIRROR 0x0004 /**< Downlink Port Mirroring. */
+#define ETH_VMDQ_VLAN_MIRROR    0x0008 /**< VLAN Mirroring. */
+
+/**
+ * A structure used to configure VLAN traffic mirror of an Ethernet port.
+ */
+struct rte_eth_vlan_mirror {
+	uint64_t vlan_mask; /**< mask for valid VLAN ID. */
+	uint16_t vlan_id[ETH_VMDQ_MAX_VLAN_FILTERS];
+	/** VLAN ID list for vlan mirror. */
+};
+
+/**
+ * A structure used to configure traffic mirror of an Ethernet port.
+ */
+struct rte_eth_vmdq_mirror_conf {
+	uint8_t rule_type_mask; /**< Mirroring rule type mask we want to set */
+	uint8_t dst_pool; /**< Destination pool for this mirror rule. */
+	uint64_t pool_mask; /**< Bitmap of pool for pool mirroring */
+	struct rte_eth_vlan_mirror vlan; /**< VLAN ID setting for VLAN mirroring */
+};
+
+/**
+ * A structure used to configure 64 entries of Redirection Table of the
+ * Receive Side Scaling (RSS) feature of an Ethernet port. To configure
+ * more than 64 entries supported by hardware, an array of this structure
+ * is needed.
+ */
+struct rte_eth_rss_reta_entry64 {
+	uint64_t mask;
+	/**< Mask bits indicate which entries need to be updated/queried. */
+	uint8_t reta[RTE_RETA_GROUP_SIZE];
+	/**< Group of 64 redirection table entries. */
+};
+
+/**
+ * This enum indicates the possible number of traffic classes
+ * in DCB configratioins
+ */
+enum rte_eth_nb_tcs {
+	ETH_4_TCS = 4, /**< 4 TCs with DCB. */
+	ETH_8_TCS = 8  /**< 8 TCs with DCB. */
+};
+
+/**
+ * This enum indicates the possible number of queue pools
+ * in VMDQ configurations.
+ */
+enum rte_eth_nb_pools {
+	ETH_8_POOLS = 8,    /**< 8 VMDq pools. */
+	ETH_16_POOLS = 16,  /**< 16 VMDq pools. */
+	ETH_32_POOLS = 32,  /**< 32 VMDq pools. */
+	ETH_64_POOLS = 64   /**< 64 VMDq pools. */
+};
+
+/* This structure may be extended in future. */
+struct rte_eth_dcb_rx_conf {
+	enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs */
+	uint8_t dcb_queue[ETH_DCB_NUM_USER_PRIORITIES];
+	/**< Possible DCB queue,4 or 8. */
+};
+
+struct rte_eth_vmdq_dcb_tx_conf {
+	enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools. */
+	uint8_t dcb_queue[ETH_DCB_NUM_USER_PRIORITIES];
+	/**< Possible DCB queue,4 or 8. */
+};
+
+struct rte_eth_dcb_tx_conf {
+	enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs. */
+	uint8_t dcb_queue[ETH_DCB_NUM_USER_PRIORITIES];
+	/**< Possible DCB queue,4 or 8. */
+};
+
+struct rte_eth_vmdq_tx_conf {
+	enum rte_eth_nb_pools nb_queue_pools; /**< VMDq mode, 64 pools. */
+};
+
+/**
+ * A structure used to configure the VMDQ+DCB feature
+ * of an Ethernet port.
+ *
+ * Using this feature, packets are routed to a pool of queues, based
+ * on the vlan id in the vlan tag, and then to a specific queue within
+ * that pool, using the user priority vlan tag field.
+ *
+ * A default pool may be used, if desired, to route all traffic which
+ * does not match the vlan filter rules.
+ */
+struct rte_eth_vmdq_dcb_conf {
+	enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools */
+	uint8_t enable_default_pool; /**< If non-zero, use a default pool */
+	uint8_t default_pool; /**< The default pool, if applicable */
+	uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */
+	struct {
+		uint16_t vlan_id; /**< The vlan id of the received frame */
+		uint64_t pools;   /**< Bitmask of pools for packet rx */
+	} pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */
+	uint8_t dcb_queue[ETH_DCB_NUM_USER_PRIORITIES];
+	/**< Selects a queue in a pool */
+};
+
+struct rte_eth_vmdq_rx_conf {
+	enum rte_eth_nb_pools nb_queue_pools; /**< VMDq only mode, 8 or 64 pools */
+	uint8_t enable_default_pool; /**< If non-zero, use a default pool */
+	uint8_t default_pool; /**< The default pool, if applicable */
+	uint8_t enable_loop_back; /**< Enable VT loop back */
+	uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */
+	uint32_t rx_mode; /**< Flags from ETH_VMDQ_ACCEPT_* */
+	struct {
+		uint16_t vlan_id; /**< The vlan id of the received frame */
+		uint64_t pools;   /**< Bitmask of pools for packet rx */
+	} pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */
+};
+
+/**
+ * A structure used to configure the TX features of an Ethernet port.
+ */
+struct rte_eth_txmode {
+	enum rte_eth_tx_mq_mode mq_mode; /**< TX multi-queues mode. */
+
+	/* For i40e specifically */
+	uint16_t pvid;
+	uint8_t hw_vlan_reject_tagged : 1,
+		/**< If set, reject sending out tagged pkts */
+		hw_vlan_reject_untagged : 1,
+		/**< If set, reject sending out untagged pkts */
+		hw_vlan_insert_pvid : 1;
+		/**< If set, enable port based VLAN insertion */
+};
+
+/**
+ * A structure used to configure an RX ring of an Ethernet port.
+ */
+struct rte_eth_rxconf {
+	struct rte_eth_thresh rx_thresh; /**< RX ring threshold registers. */
+	uint16_t rx_free_thresh; /**< Drives the freeing of RX descriptors. */
+	uint8_t rx_drop_en; /**< Drop packets if no descriptors are available. */
+	uint8_t rx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */
+};
+
+#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */
+#define ETH_TXQ_FLAGS_NOREFCOUNT 0x0002 /**< refcnt can be ignored */
+#define ETH_TXQ_FLAGS_NOMULTMEMP 0x0004 /**< all bufs come from same mempool */
+#define ETH_TXQ_FLAGS_NOVLANOFFL 0x0100 /**< disable VLAN offload */
+#define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */
+#define ETH_TXQ_FLAGS_NOXSUMUDP  0x0400 /**< disable UDP checksum offload */
+#define ETH_TXQ_FLAGS_NOXSUMTCP  0x0800 /**< disable TCP checksum offload */
+#define ETH_TXQ_FLAGS_NOOFFLOADS \
+		(ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \
+		 ETH_TXQ_FLAGS_NOXSUMUDP  | ETH_TXQ_FLAGS_NOXSUMTCP)
+/**
+ * A structure used to configure a TX ring of an Ethernet port.
+ */
+struct rte_eth_txconf {
+	struct rte_eth_thresh tx_thresh; /**< TX ring threshold registers. */
+	uint16_t tx_rs_thresh; /**< Drives the setting of RS bit on TXDs. */
+	uint16_t tx_free_thresh; /**< Drives the freeing of TX buffers. */
+	uint32_t txq_flags; /**< Set flags for the Tx queue */
+	uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */
+};
+
+/**
+ * This enum indicates the flow control mode
+ */
+enum rte_eth_fc_mode {
+	RTE_FC_NONE = 0, /**< Disable flow control. */
+	RTE_FC_RX_PAUSE, /**< RX pause frame, enable flowctrl on TX side. */
+	RTE_FC_TX_PAUSE, /**< TX pause frame, enable flowctrl on RX side. */
+	RTE_FC_FULL      /**< Enable flow control on both side. */
+};
+
+/**
+ * A structure used to configure Ethernet flow control parameter.
+ * These parameters will be configured into the register of the NIC.
+ * Please refer to the corresponding data sheet for proper value.
+ */
+struct rte_eth_fc_conf {
+	uint32_t high_water;  /**< High threshold value to trigger XOFF */
+	uint32_t low_water;   /**< Low threshold value to trigger XON */
+	uint16_t pause_time;  /**< Pause quota in the Pause frame */
+	uint16_t send_xon;    /**< Is XON frame need be sent */
+	enum rte_eth_fc_mode mode;  /**< Link flow control mode */
+	uint8_t mac_ctrl_frame_fwd; /**< Forward MAC control frames */
+	uint8_t autoneg;      /**< Use Pause autoneg */
+};
+
+/**
+ * A structure used to configure Ethernet priority flow control parameter.
+ * These parameters will be configured into the register of the NIC.
+ * Please refer to the corresponding data sheet for proper value.
+ */
+struct rte_eth_pfc_conf {
+	struct rte_eth_fc_conf fc; /**< General flow control parameter. */
+	uint8_t priority;          /**< VLAN User Priority. */
+};
+
+/**
+ *  Memory space that can be configured to store Flow Director filters
+ *  in the board memory.
+ */
+enum rte_fdir_pballoc_type {
+	RTE_FDIR_PBALLOC_64K = 0,  /**< 64k. */
+	RTE_FDIR_PBALLOC_128K,     /**< 128k. */
+	RTE_FDIR_PBALLOC_256K,     /**< 256k. */
+};
+
+/**
+ *  Select report mode of FDIR hash information in RX descriptors.
+ */
+enum rte_fdir_status_mode {
+	RTE_FDIR_NO_REPORT_STATUS = 0, /**< Never report FDIR hash. */
+	RTE_FDIR_REPORT_STATUS, /**< Only report FDIR hash for matching pkts. */
+	RTE_FDIR_REPORT_STATUS_ALWAYS, /**< Always report FDIR hash. */
+};
+
+/**
+ * A structure used to configure the Flow Director (FDIR) feature
+ * of an Ethernet port.
+ *
+ * If mode is RTE_FDIR_DISABLE, the pballoc value is ignored.
+ */
+struct rte_fdir_conf {
+	enum rte_fdir_mode mode; /**< Flow Director mode. */
+	enum rte_fdir_pballoc_type pballoc; /**< Space for FDIR filters. */
+	enum rte_fdir_status_mode status;  /**< How to report FDIR hash. */
+	/** Offset of flexbytes field in RX packets (in 16-bit word units). */
+	uint8_t flexbytes_offset;
+	/** RX queue of packets matching a "drop" filter in perfect mode. */
+	uint8_t drop_queue;
+	struct rte_eth_fdir_flex_conf flex_conf;
+	/**< Flex payload configuration. */
+};
+
+/**
+ * UDP tunneling configuration.
+ */
+struct rte_eth_udp_tunnel {
+	uint16_t udp_port;
+	uint8_t prot_type;
+};
+
+/**
+ *  Possible l4type of FDIR filters.
+ */
+enum rte_l4type {
+	RTE_FDIR_L4TYPE_NONE = 0,       /**< None. */
+	RTE_FDIR_L4TYPE_UDP,            /**< UDP. */
+	RTE_FDIR_L4TYPE_TCP,            /**< TCP. */
+	RTE_FDIR_L4TYPE_SCTP,           /**< SCTP. */
+};
+
+/**
+ *  Select IPv4 or IPv6 FDIR filters.
+ */
+enum rte_iptype {
+	RTE_FDIR_IPTYPE_IPV4 = 0,     /**< IPv4. */
+	RTE_FDIR_IPTYPE_IPV6 ,        /**< IPv6. */
+};
+
+/**
+ *  A structure used to define a FDIR packet filter.
+ */
+struct rte_fdir_filter {
+	uint16_t flex_bytes; /**< Flex bytes value to match. */
+	uint16_t vlan_id; /**< VLAN ID value to match, 0 otherwise. */
+	uint16_t port_src; /**< Source port to match, 0 otherwise. */
+	uint16_t port_dst; /**< Destination port to match, 0 otherwise. */
+	union {
+		uint32_t ipv4_addr; /**< IPv4 source address to match. */
+		uint32_t ipv6_addr[4]; /**< IPv6 source address to match. */
+	} ip_src; /**< IPv4/IPv6 source address to match (union of above). */
+	union {
+		uint32_t ipv4_addr; /**< IPv4 destination address to match. */
+		uint32_t ipv6_addr[4]; /**< IPv6 destination address to match */
+	} ip_dst; /**< IPv4/IPv6 destination address to match (union of above). */
+	enum rte_l4type l4type; /**< l4type to match: NONE/UDP/TCP/SCTP. */
+	enum rte_iptype iptype; /**< IP packet type to match: IPv4 or IPv6. */
+};
+
+/**
+ *  A structure used to configure FDIR masks that are used by the device
+ *  to match the various fields of RX packet headers.
+ *  @note The only_ip_flow field has the opposite meaning compared to other
+ *  masks!
+ */
+struct rte_fdir_masks {
+	/** When set to 1, packet l4type is \b NOT relevant in filters, and
+	   source and destination port masks must be set to zero. */
+	uint8_t only_ip_flow;
+	/** If set to 1, vlan_id is relevant in filters. */
+	uint8_t vlan_id;
+	/** If set to 1, vlan_prio is relevant in filters. */
+	uint8_t vlan_prio;
+	/** If set to 1, flexbytes is relevant in filters. */
+	uint8_t flexbytes;
+	/** If set to 1, set the IPv6 masks. Otherwise set the IPv4 masks. */
+	uint8_t set_ipv6_mask;
+	/** When set to 1, comparison of destination IPv6 address with IP6AT
+	    registers is meaningful. */
+	uint8_t comp_ipv6_dst;
+	/** Mask of Destination IPv4 Address. All bits set to 1 define the
+	    relevant bits to use in the destination address of an IPv4 packet
+	    when matching it against FDIR filters. */
+	uint32_t dst_ipv4_mask;
+	/** Mask of Source IPv4 Address. All bits set to 1 define
+	    the relevant bits to use in the source address of an IPv4 packet
+	    when matching it against FDIR filters. */
+	uint32_t src_ipv4_mask;
+	/** Mask of Source IPv6 Address. All bits set to 1 define the
+	    relevant BYTES to use in the source address of an IPv6 packet
+	    when matching it against FDIR filters. */
+	uint16_t dst_ipv6_mask;
+	/** Mask of Destination IPv6 Address. All bits set to 1 define the
+	    relevant BYTES to use in the destination address of an IPv6 packet
+	    when matching it against FDIR filters. */
+	uint16_t src_ipv6_mask;
+	/** Mask of Source Port. All bits set to 1 define the relevant
+	    bits to use in the source port of an IP packets when matching it
+	    against FDIR filters. */
+	uint16_t src_port_mask;
+	/** Mask of Destination Port. All bits set to 1 define the relevant
+	    bits to use in the destination port of an IP packet when matching it
+	    against FDIR filters. */
+	uint16_t dst_port_mask;
+};
+
+/**
+ *  A structure used to report the status of the flow director filters in use.
+ */
+struct rte_eth_fdir {
+	/** Number of filters with collision indication. */
+	uint16_t collision;
+	/** Number of free (non programmed) filters. */
+	uint16_t free;
+	/** The Lookup hash value of the added filter that updated the value
+	   of the MAXLEN field */
+	uint16_t maxhash;
+	/** Longest linked list of filters in the table. */
+	uint8_t maxlen;
+	/** Number of added filters. */
+	uint64_t add;
+	/** Number of removed filters. */
+	uint64_t remove;
+	/** Number of failed added filters (no more space in device). */
+	uint64_t f_add;
+	/** Number of failed removed filters. */
+	uint64_t f_remove;
+};
+
+/**
+ * A structure used to enable/disable specific device interrupts.
+ */
+struct rte_intr_conf {
+	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
+	uint16_t lsc;
+};
+
+/**
+ * A structure used to configure an Ethernet port.
+ * Depending upon the RX multi-queue mode, extra advanced
+ * configuration settings may be needed.
+ */
+struct rte_eth_conf {
+	uint16_t link_speed;
+	/**< ETH_LINK_SPEED_10[0|00|000], or 0 for autonegotation */
+	uint16_t link_duplex;
+	/**< ETH_LINK_[HALF_DUPLEX|FULL_DUPLEX], or 0 for autonegotation */
+	struct rte_eth_rxmode rxmode; /**< Port RX configuration. */
+	struct rte_eth_txmode txmode; /**< Port TX configuration. */
+	uint32_t lpbk_mode; /**< Loopback operation mode. By default the value
+			         is 0, meaning the loopback mode is disabled.
+				 Read the datasheet of given ethernet controller
+				 for details. The possible values of this field
+				 are defined in implementation of each driver. */
+	struct {
+		struct rte_eth_rss_conf rss_conf; /**< Port RSS configuration */
+		struct rte_eth_vmdq_dcb_conf vmdq_dcb_conf;
+		/**< Port vmdq+dcb configuration. */
+		struct rte_eth_dcb_rx_conf dcb_rx_conf;
+		/**< Port dcb RX configuration. */
+		struct rte_eth_vmdq_rx_conf vmdq_rx_conf;
+		/**< Port vmdq RX configuration. */
+	} rx_adv_conf; /**< Port RX filtering configuration (union). */
+	union {
+		struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf;
+		/**< Port vmdq+dcb TX configuration. */
+		struct rte_eth_dcb_tx_conf dcb_tx_conf;
+		/**< Port dcb TX configuration. */
+		struct rte_eth_vmdq_tx_conf vmdq_tx_conf;
+		/**< Port vmdq TX configuration. */
+	} tx_adv_conf; /**< Port TX DCB configuration (union). */
+	/** Currently,Priority Flow Control(PFC) are supported,if DCB with PFC
+	    is needed,and the variable must be set ETH_DCB_PFC_SUPPORT. */
+	uint32_t dcb_capability_en;
+	struct rte_fdir_conf fdir_conf; /**< FDIR configuration. */
+	struct rte_intr_conf intr_conf; /**< Interrupt mode configuration. */
+};
+
+/**
+ * A structure used to retrieve the contextual information of
+ * an Ethernet device, such as the controlling driver of the device,
+ * its PCI context, etc...
+ */
+
+/**
+ * RX offload capabilities of a device.
+ */
+#define DEV_RX_OFFLOAD_VLAN_STRIP  0x00000001
+#define DEV_RX_OFFLOAD_IPV4_CKSUM  0x00000002
+#define DEV_RX_OFFLOAD_UDP_CKSUM   0x00000004
+#define DEV_RX_OFFLOAD_TCP_CKSUM   0x00000008
+#define DEV_RX_OFFLOAD_TCP_LRO     0x00000010
+
+/**
+ * TX offload capabilities of a device.
+ */
+#define DEV_TX_OFFLOAD_VLAN_INSERT 0x00000001
+#define DEV_TX_OFFLOAD_IPV4_CKSUM  0x00000002
+#define DEV_TX_OFFLOAD_UDP_CKSUM   0x00000004
+#define DEV_TX_OFFLOAD_TCP_CKSUM   0x00000008
+#define DEV_TX_OFFLOAD_SCTP_CKSUM  0x00000010
+#define DEV_TX_OFFLOAD_TCP_TSO     0x00000020
+#define DEV_TX_OFFLOAD_UDP_TSO     0x00000040
+#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */
+
+struct rte_eth_dev_info {
+	struct rte_pci_device *pci_dev; /**< Device PCI information. */
+	const char *driver_name; /**< Device Driver name. */
+	unsigned int if_index; /**< Index to bound host interface, or 0 if none.
+		Use if_indextoname() to translate into an interface name. */
+	uint32_t min_rx_bufsize; /**< Minimum size of RX buffer. */
+	uint32_t max_rx_pktlen; /**< Maximum configurable length of RX pkt. */
+	uint16_t max_rx_queues; /**< Maximum number of RX queues. */
+	uint16_t max_tx_queues; /**< Maximum number of TX queues. */
+	uint32_t max_mac_addrs; /**< Maximum number of MAC addresses. */
+	uint32_t max_hash_mac_addrs;
+	/** Maximum number of hash MAC addresses for MTA and UTA. */
+	uint16_t max_vfs; /**< Maximum number of VFs. */
+	uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */
+	uint32_t rx_offload_capa; /**< Device RX offload capabilities. */
+	uint32_t tx_offload_capa; /**< Device TX offload capabilities. */
+	uint16_t reta_size;
+	/**< Device redirection table size, the total number of entries. */
+	struct rte_eth_rxconf default_rxconf; /**< Default RX configuration */
+	struct rte_eth_txconf default_txconf; /**< Default TX configuration */
+	uint16_t vmdq_queue_base; /**< First queue ID for VMDQ pools. */
+	uint16_t vmdq_queue_num;  /**< Queue number for VMDQ pools. */
+	uint16_t vmdq_pool_base;  /**< First ID of VMDQ pools. */
+};
+
+/** Maximum name length for extended statistics counters */
+#define RTE_ETH_XSTATS_NAME_SIZE 64
+
+/**
+ * An Ethernet device extended statistic structure
+ *
+ * This structure is used by ethdev->eth_xstats_get() to provide
+ * statistics that are not provided in the generic rte_eth_stats
+ * structure.
+ */
+struct rte_eth_xstats {
+	char name[RTE_ETH_XSTATS_NAME_SIZE];
+	uint64_t value;
+};
+
+struct rte_eth_dev;
+
+struct rte_eth_dev_callback;
+/** @internal Structure to keep track of registered callbacks */
+TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback);
+
+#define TCP_UGR_FLAG 0x20
+#define TCP_ACK_FLAG 0x10
+#define TCP_PSH_FLAG 0x08
+#define TCP_RST_FLAG 0x04
+#define TCP_SYN_FLAG 0x02
+#define TCP_FIN_FLAG 0x01
+#define TCP_FLAG_ALL 0x3F
+
+/**
+ *  A structure used to define an syn filter.
+ */
+struct rte_syn_filter {
+	uint8_t hig_pri; /**< 1 means higher pri than 2tuple, 5tupe,
+			      and flex filter, 0 means lower pri. */
+};
+
+/**
+ *  A structure used to define a 2tuple filter.
+ */
+struct rte_2tuple_filter {
+	uint16_t dst_port;        /**< big endian. */
+	uint8_t protocol;
+	uint8_t tcp_flags;
+	uint16_t priority;        /**< used when more than one filter matches. */
+	uint8_t dst_port_mask:1,  /**< if mask is 1b, means not compare. */
+		protocol_mask:1;
+};
+
+/**
+ *  A structure used to define a flex filter.
+ */
+struct rte_flex_filter {
+	uint16_t len;
+	uint32_t dwords[32];  /**< flex bytes in big endian. */
+	uint8_t mask[16];     /**< if mask bit is 1b, do not compare
+				   corresponding byte in dwords. */
+	uint8_t priority;
+};
+
+/**
+ *  A structure used to define a 5tuple filter.
+ */
+struct rte_5tuple_filter {
+	uint32_t dst_ip;         /**< destination IP address in big endian. */
+	uint32_t src_ip;         /**< source IP address in big endian. */
+	uint16_t dst_port;       /**< destination port in big endian. */
+	uint16_t src_port;       /**< source Port big endian. */
+	uint8_t protocol;        /**< l4 protocol. */
+	uint8_t tcp_flags;       /**< tcp flags. */
+	uint16_t priority;       /**< seven evels (001b-111b), 111b is highest,
+				      used when more than one filter matches. */
+	uint8_t dst_ip_mask:1,   /**< if mask is 1b, do not compare dst ip. */
+		src_ip_mask:1,   /**< if mask is 1b, do not compare src ip. */
+		dst_port_mask:1, /**< if mask is 1b, do not compare dst port. */
+		src_port_mask:1, /**< if mask is 1b, do not compare src port. */
+		protocol_mask:1; /**< if mask is 1b, do not compare protocol. */
+};
+
+/*
+ * Definitions of all functions exported by an Ethernet driver through the
+ * the generic structure of type *eth_dev_ops* supplied in the *rte_eth_dev*
+ * structure associated with an Ethernet device.
+ */
+
+typedef int  (*eth_dev_configure_t)(struct rte_eth_dev *dev);
+/**< @internal Ethernet device configuration. */
+
+typedef int  (*eth_dev_start_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to start a configured Ethernet device. */
+
+typedef void (*eth_dev_stop_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to stop a configured Ethernet device. */
+
+typedef int  (*eth_dev_set_link_up_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to link up a configured Ethernet device. */
+
+typedef int  (*eth_dev_set_link_down_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to link down a configured Ethernet device. */
+
+typedef void (*eth_dev_close_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to close a configured Ethernet device. */
+
+typedef void (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to enable the RX promiscuous mode of an Ethernet device. */
+
+typedef void (*eth_promiscuous_disable_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to disable the RX promiscuous mode of an Ethernet device. */
+
+typedef void (*eth_allmulticast_enable_t)(struct rte_eth_dev *dev);
+/**< @internal Enable the receipt of all multicast packets by an Ethernet device. */
+
+typedef void (*eth_allmulticast_disable_t)(struct rte_eth_dev *dev);
+/**< @internal Disable the receipt of all multicast packets by an Ethernet device. */
+
+typedef int (*eth_link_update_t)(struct rte_eth_dev *dev,
+				int wait_to_complete);
+/**< @internal Get link speed, duplex mode and state (up/down) of an Ethernet device. */
+
+typedef void (*eth_stats_get_t)(struct rte_eth_dev *dev,
+				struct rte_eth_stats *igb_stats);
+/**< @internal Get global I/O statistics of an Ethernet device. */
+
+typedef void (*eth_stats_reset_t)(struct rte_eth_dev *dev);
+/**< @internal Reset global I/O statistics of an Ethernet device to 0. */
+
+typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev,
+	struct rte_eth_xstats *stats, unsigned n);
+/**< @internal Get extended stats of an Ethernet device. */
+
+typedef void (*eth_xstats_reset_t)(struct rte_eth_dev *dev);
+/**< @internal Reset extended stats of an Ethernet device. */
+
+typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev,
+					     uint16_t queue_id,
+					     uint8_t stat_idx,
+					     uint8_t is_rx);
+/**< @internal Set a queue statistics mapping for a tx/rx queue of an Ethernet device. */
+
+typedef void (*eth_dev_infos_get_t)(struct rte_eth_dev *dev,
+				    struct rte_eth_dev_info *dev_info);
+/**< @internal Get specific informations of an Ethernet device. */
+
+typedef int (*eth_queue_start_t)(struct rte_eth_dev *dev,
+				    uint16_t queue_id);
+/**< @internal Start rx and tx of a queue of an Ethernet device. */
+
+typedef int (*eth_queue_stop_t)(struct rte_eth_dev *dev,
+				    uint16_t queue_id);
+/**< @internal Stop rx and tx of a queue of an Ethernet device. */
+
+typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id,
+				    uint16_t nb_rx_desc,
+				    unsigned int socket_id,
+				    const struct rte_eth_rxconf *rx_conf,
+				    struct rte_mempool *mb_pool);
+/**< @internal Set up a receive queue of an Ethernet device. */
+
+typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
+				    uint16_t tx_queue_id,
+				    uint16_t nb_tx_desc,
+				    unsigned int socket_id,
+				    const struct rte_eth_txconf *tx_conf);
+/**< @internal Setup a transmit queue of an Ethernet device. */
+
+typedef void (*eth_queue_release_t)(void *queue);
+/**< @internal Release memory resources allocated by given RX/TX queue. */
+
+typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev,
+					 uint16_t rx_queue_id);
+/**< @Get number of available descriptors on a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset);
+/**< @Check DD bit of specific RX descriptor */
+
+typedef int (*mtu_set_t)(struct rte_eth_dev *dev, uint16_t mtu);
+/**< @internal Set MTU. */
+
+typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev,
+				  uint16_t vlan_id,
+				  int on);
+/**< @internal filtering of a VLAN Tag Identifier by an Ethernet device. */
+
+typedef void (*vlan_tpid_set_t)(struct rte_eth_dev *dev,
+				  uint16_t tpid);
+/**< @internal set the outer VLAN-TPID by an Ethernet device. */
+
+typedef void (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask);
+/**< @internal set VLAN offload function by an Ethernet device. */
+
+typedef int (*vlan_pvid_set_t)(struct rte_eth_dev *dev,
+			       uint16_t vlan_id,
+			       int on);
+/**< @internal set port based TX VLAN insertion by an Ethernet device. */
+
+typedef void (*vlan_strip_queue_set_t)(struct rte_eth_dev *dev,
+				  uint16_t rx_queue_id,
+				  int on);
+/**< @internal VLAN stripping enable/disable by an queue of Ethernet device. */
+
+typedef uint16_t (*eth_rx_burst_t)(void *rxq,
+				   struct rte_mbuf **rx_pkts,
+				   uint16_t nb_pkts);
+/**< @internal Retrieve input packets from a receive queue of an Ethernet device. */
+
+typedef uint16_t (*eth_tx_burst_t)(void *txq,
+				   struct rte_mbuf **tx_pkts,
+				   uint16_t nb_pkts);
+/**< @internal Send output packets on a transmit queue of an Ethernet device. */
+
+typedef int (*fdir_add_signature_filter_t)(struct rte_eth_dev *dev,
+					   struct rte_fdir_filter *fdir_ftr,
+					   uint8_t rx_queue);
+/**< @internal Setup a new signature filter rule on an Ethernet device */
+
+typedef int (*fdir_update_signature_filter_t)(struct rte_eth_dev *dev,
+					      struct rte_fdir_filter *fdir_ftr,
+					      uint8_t rx_queue);
+/**< @internal Update a signature filter rule on an Ethernet device */
+
+typedef int (*fdir_remove_signature_filter_t)(struct rte_eth_dev *dev,
+					      struct rte_fdir_filter *fdir_ftr);
+/**< @internal Remove a  signature filter rule on an Ethernet device */
+
+typedef void (*fdir_infos_get_t)(struct rte_eth_dev *dev,
+				 struct rte_eth_fdir *fdir);
+/**< @internal Get information about fdir status */
+
+typedef int (*fdir_add_perfect_filter_t)(struct rte_eth_dev *dev,
+					 struct rte_fdir_filter *fdir_ftr,
+					 uint16_t soft_id, uint8_t rx_queue,
+					 uint8_t drop);
+/**< @internal Setup a new perfect filter rule on an Ethernet device */
+
+typedef int (*fdir_update_perfect_filter_t)(struct rte_eth_dev *dev,
+					    struct rte_fdir_filter *fdir_ftr,
+					    uint16_t soft_id, uint8_t rx_queue,
+					    uint8_t drop);
+/**< @internal Update a perfect filter rule on an Ethernet device */
+
+typedef int (*fdir_remove_perfect_filter_t)(struct rte_eth_dev *dev,
+					    struct rte_fdir_filter *fdir_ftr,
+					    uint16_t soft_id);
+/**< @internal Remove a perfect filter rule on an Ethernet device */
+
+typedef int (*fdir_set_masks_t)(struct rte_eth_dev *dev,
+				struct rte_fdir_masks *fdir_masks);
+/**< @internal Setup flow director masks on an Ethernet device */
+
+typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
+			       struct rte_eth_fc_conf *fc_conf);
+/**< @internal Get current flow control parameter on an Ethernet device */
+
+typedef int (*flow_ctrl_set_t)(struct rte_eth_dev *dev,
+			       struct rte_eth_fc_conf *fc_conf);
+/**< @internal Setup flow control parameter on an Ethernet device */
+
+typedef int (*priority_flow_ctrl_set_t)(struct rte_eth_dev *dev,
+				struct rte_eth_pfc_conf *pfc_conf);
+/**< @internal Setup priority flow control parameter on an Ethernet device */
+
+typedef int (*reta_update_t)(struct rte_eth_dev *dev,
+			     struct rte_eth_rss_reta_entry64 *reta_conf,
+			     uint16_t reta_size);
+/**< @internal Update RSS redirection table on an Ethernet device */
+
+typedef int (*reta_query_t)(struct rte_eth_dev *dev,
+			    struct rte_eth_rss_reta_entry64 *reta_conf,
+			    uint16_t reta_size);
+/**< @internal Query RSS redirection table on an Ethernet device */
+
+typedef int (*rss_hash_update_t)(struct rte_eth_dev *dev,
+				 struct rte_eth_rss_conf *rss_conf);
+/**< @internal Update RSS hash configuration of an Ethernet device */
+
+typedef int (*rss_hash_conf_get_t)(struct rte_eth_dev *dev,
+				   struct rte_eth_rss_conf *rss_conf);
+/**< @internal Get current RSS hash configuration of an Ethernet device */
+
+typedef int (*eth_dev_led_on_t)(struct rte_eth_dev *dev);
+/**< @internal Turn on SW controllable LED on an Ethernet device */
+
+typedef int (*eth_dev_led_off_t)(struct rte_eth_dev *dev);
+/**< @internal Turn off SW controllable LED on an Ethernet device */
+
+typedef void (*eth_mac_addr_remove_t)(struct rte_eth_dev *dev, uint32_t index);
+/**< @internal Remove MAC address from receive address register */
+
+typedef void (*eth_mac_addr_add_t)(struct rte_eth_dev *dev,
+				  struct ether_addr *mac_addr,
+				  uint32_t index,
+				  uint32_t vmdq);
+/**< @internal Set a MAC address into Receive Address Address Register */
+
+typedef int (*eth_uc_hash_table_set_t)(struct rte_eth_dev *dev,
+				  struct ether_addr *mac_addr,
+				  uint8_t on);
+/**< @internal Set a Unicast Hash bitmap */
+
+typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev,
+				  uint8_t on);
+/**< @internal Set all Unicast Hash bitmap */
+
+typedef int (*eth_set_vf_rx_mode_t)(struct rte_eth_dev *dev,
+				  uint16_t vf,
+				  uint16_t rx_mode,
+				  uint8_t on);
+/**< @internal Set a VF receive mode */
+
+typedef int (*eth_set_vf_rx_t)(struct rte_eth_dev *dev,
+				uint16_t vf,
+				uint8_t on);
+/**< @internal Set a VF receive  mode */
+
+typedef int (*eth_set_vf_tx_t)(struct rte_eth_dev *dev,
+				uint16_t vf,
+				uint8_t on);
+/**< @internal Enable or disable a VF transmit   */
+
+typedef int (*eth_set_vf_vlan_filter_t)(struct rte_eth_dev *dev,
+				  uint16_t vlan,
+				  uint64_t vf_mask,
+				  uint8_t vlan_on);
+/**< @internal Set VF VLAN pool filter */
+
+typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev,
+				uint16_t queue_idx,
+				uint16_t tx_rate);
+/**< @internal Set queue TX rate */
+
+typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev,
+				uint16_t vf,
+				uint16_t tx_rate,
+				uint64_t q_msk);
+/**< @internal Set VF TX rate */
+
+typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev,
+				  struct rte_eth_vmdq_mirror_conf *mirror_conf,
+				  uint8_t rule_id,
+				  uint8_t on);
+/**< @internal Add a traffic mirroring rule on an Ethernet device */
+
+typedef int (*eth_mirror_rule_reset_t)(struct rte_eth_dev *dev,
+				  uint8_t rule_id);
+/**< @internal Remove a traffic mirroring rule on an Ethernet device */
+
+typedef int (*eth_udp_tunnel_add_t)(struct rte_eth_dev *dev,
+				    struct rte_eth_udp_tunnel *tunnel_udp);
+/**< @internal Add tunneling UDP info */
+
+typedef int (*eth_udp_tunnel_del_t)(struct rte_eth_dev *dev,
+				    struct rte_eth_udp_tunnel *tunnel_udp);
+/**< @internal Delete tunneling UDP info */
+
+
+#ifdef RTE_NIC_BYPASS
+
+enum {
+	RTE_BYPASS_MODE_NONE,
+	RTE_BYPASS_MODE_NORMAL,
+	RTE_BYPASS_MODE_BYPASS,
+	RTE_BYPASS_MODE_ISOLATE,
+	RTE_BYPASS_MODE_NUM,
+};
+
+#define	RTE_BYPASS_MODE_VALID(x)	\
+	((x) > RTE_BYPASS_MODE_NONE && (x) < RTE_BYPASS_MODE_NUM)
+
+enum {
+	RTE_BYPASS_EVENT_NONE,
+	RTE_BYPASS_EVENT_START,
+	RTE_BYPASS_EVENT_OS_ON = RTE_BYPASS_EVENT_START,
+	RTE_BYPASS_EVENT_POWER_ON,
+	RTE_BYPASS_EVENT_OS_OFF,
+	RTE_BYPASS_EVENT_POWER_OFF,
+	RTE_BYPASS_EVENT_TIMEOUT,
+	RTE_BYPASS_EVENT_NUM
+};
+
+#define	RTE_BYPASS_EVENT_VALID(x)	\
+	((x) > RTE_BYPASS_EVENT_NONE && (x) < RTE_BYPASS_MODE_NUM)
+
+enum {
+	RTE_BYPASS_TMT_OFF,     /* timeout disabled. */
+	RTE_BYPASS_TMT_1_5_SEC, /* timeout for 1.5 seconds */
+	RTE_BYPASS_TMT_2_SEC,   /* timeout for 2 seconds */
+	RTE_BYPASS_TMT_3_SEC,   /* timeout for 3 seconds */
+	RTE_BYPASS_TMT_4_SEC,   /* timeout for 4 seconds */
+	RTE_BYPASS_TMT_8_SEC,   /* timeout for 8 seconds */
+	RTE_BYPASS_TMT_16_SEC,  /* timeout for 16 seconds */
+	RTE_BYPASS_TMT_32_SEC,  /* timeout for 32 seconds */
+	RTE_BYPASS_TMT_NUM
+};
+
+#define	RTE_BYPASS_TMT_VALID(x)	\
+	((x) == RTE_BYPASS_TMT_OFF || \
+	((x) > RTE_BYPASS_TMT_OFF && (x) < RTE_BYPASS_TMT_NUM))
+
+typedef void (*bypass_init_t)(struct rte_eth_dev *dev);
+typedef int32_t (*bypass_state_set_t)(struct rte_eth_dev *dev, uint32_t *new_state);
+typedef int32_t (*bypass_state_show_t)(struct rte_eth_dev *dev, uint32_t *state);
+typedef int32_t (*bypass_event_set_t)(struct rte_eth_dev *dev, uint32_t state, uint32_t event);
+typedef int32_t (*bypass_event_show_t)(struct rte_eth_dev *dev, uint32_t event_shift, uint32_t *event);
+typedef int32_t (*bypass_wd_timeout_set_t)(struct rte_eth_dev *dev, uint32_t timeout);
+typedef int32_t (*bypass_wd_timeout_show_t)(struct rte_eth_dev *dev, uint32_t *wd_timeout);
+typedef int32_t (*bypass_ver_show_t)(struct rte_eth_dev *dev, uint32_t *ver);
+typedef int32_t (*bypass_wd_reset_t)(struct rte_eth_dev *dev);
+#endif
+
+typedef int (*eth_add_syn_filter_t)(struct rte_eth_dev *dev,
+			struct rte_syn_filter *filter, uint16_t rx_queue);
+/**< @internal add syn filter rule on an Ethernet device */
+
+typedef int (*eth_remove_syn_filter_t)(struct rte_eth_dev *dev);
+/**< @internal remove syn filter rule on an Ethernet device */
+
+typedef int (*eth_get_syn_filter_t)(struct rte_eth_dev *dev,
+			struct rte_syn_filter *filter, uint16_t *rx_queue);
+/**< @internal Get syn filter rule on an Ethernet device */
+
+typedef int (*eth_add_2tuple_filter_t)(struct rte_eth_dev *dev,
+			uint16_t index, struct rte_2tuple_filter *filter,
+			uint16_t rx_queue);
+/**< @internal Setup a new 2tuple filter rule on an Ethernet device */
+
+typedef int (*eth_remove_2tuple_filter_t)(struct rte_eth_dev *dev,
+			uint16_t index);
+/**< @internal Remove a 2tuple filter rule on an Ethernet device */
+
+typedef int (*eth_get_2tuple_filter_t)(struct rte_eth_dev *dev,
+			uint16_t index, struct rte_2tuple_filter *filter,
+			uint16_t *rx_queue);
+/**< @internal Get a 2tuple filter rule on an Ethernet device */
+
+typedef int (*eth_add_5tuple_filter_t)(struct rte_eth_dev *dev,
+			uint16_t index, struct rte_5tuple_filter *filter,
+			uint16_t rx_queue);
+/**< @internal Setup a new 5tuple filter rule on an Ethernet device */
+
+typedef int (*eth_remove_5tuple_filter_t)(struct rte_eth_dev *dev,
+			uint16_t index);
+/**< @internal Remove a 5tuple filter rule on an Ethernet device */
+
+typedef int (*eth_get_5tuple_filter_t)(struct rte_eth_dev *dev,
+			uint16_t index, struct rte_5tuple_filter *filter,
+			uint16_t *rx_queue);
+/**< @internal Get a 5tuple filter rule on an Ethernet device */
+
+typedef int (*eth_add_flex_filter_t)(struct rte_eth_dev *dev,
+			uint16_t index, struct rte_flex_filter *filter,
+			uint16_t rx_queue);
+/**< @internal Setup a new flex filter rule on an Ethernet device */
+
+typedef int (*eth_remove_flex_filter_t)(struct rte_eth_dev *dev,
+			uint16_t index);
+/**< @internal Remove a flex filter rule on an Ethernet device */
+
+typedef int (*eth_get_flex_filter_t)(struct rte_eth_dev *dev,
+			uint16_t index, struct rte_flex_filter *filter,
+			uint16_t *rx_queue);
+/**< @internal Get a flex filter rule on an Ethernet device */
+
+typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev,
+				 enum rte_filter_type filter_type,
+				 enum rte_filter_op filter_op,
+				 void *arg);
+/**< @internal Take operations to assigned filter type on an Ethernet device */
+
+/**
+ * @internal A structure containing the functions exported by an Ethernet driver.
+ */
+struct eth_dev_ops {
+	eth_dev_configure_t        dev_configure; /**< Configure device. */
+	eth_dev_start_t            dev_start;     /**< Start device. */
+	eth_dev_stop_t             dev_stop;      /**< Stop device. */
+	eth_dev_set_link_up_t      dev_set_link_up;   /**< Device link up. */
+	eth_dev_set_link_down_t    dev_set_link_down; /**< Device link down. */
+	eth_dev_close_t            dev_close;     /**< Close device. */
+	eth_promiscuous_enable_t   promiscuous_enable; /**< Promiscuous ON. */
+	eth_promiscuous_disable_t  promiscuous_disable;/**< Promiscuous OFF. */
+	eth_allmulticast_enable_t  allmulticast_enable;/**< RX multicast ON. */
+	eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */
+	eth_link_update_t          link_update;   /**< Get device link state. */
+	eth_stats_get_t            stats_get;     /**< Get generic device statistics. */
+	eth_stats_reset_t          stats_reset;   /**< Reset generic device statistics. */
+	eth_xstats_get_t           xstats_get;    /**< Get extended device statistics. */
+	eth_xstats_reset_t         xstats_reset;  /**< Reset extended device statistics. */
+	eth_queue_stats_mapping_set_t queue_stats_mapping_set;
+	/**< Configure per queue stat counter mapping. */
+	eth_dev_infos_get_t        dev_infos_get; /**< Get device info. */
+	mtu_set_t                  mtu_set; /**< Set MTU. */
+	vlan_filter_set_t          vlan_filter_set;  /**< Filter VLAN Setup. */
+	vlan_tpid_set_t            vlan_tpid_set;      /**< Outer VLAN TPID Setup. */
+	vlan_strip_queue_set_t     vlan_strip_queue_set; /**< VLAN Stripping on queue. */
+	vlan_offload_set_t         vlan_offload_set; /**< Set VLAN Offload. */
+	vlan_pvid_set_t            vlan_pvid_set; /**< Set port based TX VLAN insertion */
+	eth_queue_start_t          rx_queue_start;/**< Start RX for a queue.*/
+	eth_queue_stop_t           rx_queue_stop;/**< Stop RX for a queue.*/
+	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue.*/
+	eth_queue_stop_t           tx_queue_stop;/**< Stop TX for a queue.*/
+	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue.*/
+	eth_queue_release_t        rx_queue_release;/**< Release RX queue.*/
+	eth_rx_queue_count_t       rx_queue_count; /**< Get Rx queue count. */
+	eth_rx_descriptor_done_t   rx_descriptor_done;  /**< Check rxd DD bit */
+	eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX queue.*/
+	eth_queue_release_t        tx_queue_release;/**< Release TX queue.*/
+	eth_dev_led_on_t           dev_led_on;    /**< Turn on LED. */
+	eth_dev_led_off_t          dev_led_off;   /**< Turn off LED. */
+	flow_ctrl_get_t            flow_ctrl_get; /**< Get flow control. */
+	flow_ctrl_set_t            flow_ctrl_set; /**< Setup flow control. */
+	priority_flow_ctrl_set_t   priority_flow_ctrl_set; /**< Setup priority flow control.*/
+	eth_mac_addr_remove_t      mac_addr_remove; /**< Remove MAC address */
+	eth_mac_addr_add_t         mac_addr_add;  /**< Add a MAC address */
+	eth_uc_hash_table_set_t    uc_hash_table_set;  /**< Set Unicast Table Array */
+	eth_uc_all_hash_table_set_t uc_all_hash_table_set;  /**< Set Unicast hash bitmap */
+	eth_mirror_rule_set_t	   mirror_rule_set;  /**< Add a traffic mirror rule.*/
+	eth_mirror_rule_reset_t	   mirror_rule_reset;  /**< reset a traffic mirror rule.*/
+	eth_set_vf_rx_mode_t       set_vf_rx_mode;   /**< Set VF RX mode */
+	eth_set_vf_rx_t            set_vf_rx;  /**< enable/disable a VF receive */
+	eth_set_vf_tx_t            set_vf_tx;  /**< enable/disable a VF transmit */
+	eth_set_vf_vlan_filter_t   set_vf_vlan_filter;  /**< Set VF VLAN filter */
+	eth_udp_tunnel_add_t       udp_tunnel_add;
+	eth_udp_tunnel_del_t       udp_tunnel_del;
+	eth_set_queue_rate_limit_t set_queue_rate_limit;   /**< Set queue rate limit */
+	eth_set_vf_rate_limit_t    set_vf_rate_limit;   /**< Set VF rate limit */
+
+	/** Add a signature filter. */
+	fdir_add_signature_filter_t fdir_add_signature_filter;
+	/** Update a signature filter. */
+	fdir_update_signature_filter_t fdir_update_signature_filter;
+	/** Remove a signature filter. */
+	fdir_remove_signature_filter_t fdir_remove_signature_filter;
+	/** Get information about FDIR status. */
+	fdir_infos_get_t fdir_infos_get;
+	/** Add a perfect filter. */
+	fdir_add_perfect_filter_t fdir_add_perfect_filter;
+	/** Update a perfect filter. */
+	fdir_update_perfect_filter_t fdir_update_perfect_filter;
+	/** Remove a perfect filter. */
+	fdir_remove_perfect_filter_t fdir_remove_perfect_filter;
+	/** Setup masks for FDIR filtering. */
+	fdir_set_masks_t fdir_set_masks;
+	/** Update redirection table. */
+	reta_update_t reta_update;
+	/** Query redirection table. */
+	reta_query_t reta_query;
+  /* bypass control */
+#ifdef RTE_NIC_BYPASS
+  bypass_init_t bypass_init;
+  bypass_state_set_t bypass_state_set;
+  bypass_state_show_t bypass_state_show;
+  bypass_event_set_t bypass_event_set;
+  bypass_event_show_t bypass_event_show;
+  bypass_wd_timeout_set_t bypass_wd_timeout_set;
+  bypass_wd_timeout_show_t bypass_wd_timeout_show;
+  bypass_ver_show_t bypass_ver_show;
+  bypass_wd_reset_t bypass_wd_reset;
+#endif
+
+	/** Configure RSS hash protocols. */
+	rss_hash_update_t rss_hash_update;
+	/** Get current RSS hash configuration. */
+	rss_hash_conf_get_t rss_hash_conf_get;
+	eth_add_syn_filter_t           add_syn_filter;       /**< add syn filter. */
+	eth_remove_syn_filter_t        remove_syn_filter;    /**< remove syn filter. */
+	eth_get_syn_filter_t           get_syn_filter;       /**< get syn filter. */
+	eth_add_2tuple_filter_t        add_2tuple_filter;    /**< add 2tuple filter. */
+	eth_remove_2tuple_filter_t     remove_2tuple_filter; /**< remove 2tuple filter. */
+	eth_get_2tuple_filter_t        get_2tuple_filter;    /**< get 2tuple filter. */
+	eth_add_5tuple_filter_t        add_5tuple_filter;    /**< add 5tuple filter. */
+	eth_remove_5tuple_filter_t     remove_5tuple_filter; /**< remove 5tuple filter. */
+	eth_get_5tuple_filter_t        get_5tuple_filter;    /**< get 5tuple filter. */
+	eth_add_flex_filter_t          add_flex_filter;      /**< add flex filter. */
+	eth_remove_flex_filter_t       remove_flex_filter;   /**< remove flex filter. */
+	eth_get_flex_filter_t          get_flex_filter;      /**< get flex filter. */
+	eth_filter_ctrl_t              filter_ctrl;          /**< common filter control*/
+};
+
+/**
+ * @internal
+ * The generic data structure associated with each ethernet device.
+ *
+ * Pointers to burst-oriented packet receive and transmit functions are
+ * located at the beginning of the structure, along with the pointer to
+ * where all the data elements for the particular device are stored in shared
+ * memory. This split allows the function pointer and driver data to be per-
+ * process, while the actual configuration data for the device is shared.
+ */
+struct rte_eth_dev {
+	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
+	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+	struct rte_eth_dev_data *data;  /**< Pointer to device data */
+	const struct eth_driver *driver;/**< Driver for this device */
+	struct eth_dev_ops *dev_ops;    /**< Functions exported by PMD */
+	struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
+	struct rte_eth_dev_cb_list callbacks; /**< User application callbacks */
+};
+
+struct rte_eth_dev_sriov {
+	uint8_t active;               /**< SRIOV is active with 16, 32 or 64 pools */
+	uint8_t nb_q_per_pool;        /**< rx queue number per pool */
+	uint16_t def_vmdq_idx;        /**< Default pool num used for PF */
+	uint16_t def_pool_q_idx;      /**< Default pool queue start reg index */
+};
+#define RTE_ETH_DEV_SRIOV(dev)         ((dev)->data->sriov)
+
+#define RTE_ETH_NAME_MAX_LEN (32)
+
+/**
+ * @internal
+ * The data part, with no function pointers, associated with each ethernet device.
+ *
+ * This structure is safe to place in shared memory to be common among different
+ * processes in a multi-process configuration.
+ */
+struct rte_eth_dev_data {
+	char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */
+
+	void **rx_queues; /**< Array of pointers to RX queues. */
+	void **tx_queues; /**< Array of pointers to TX queues. */
+	uint16_t nb_rx_queues; /**< Number of RX queues. */
+	uint16_t nb_tx_queues; /**< Number of TX queues. */
+
+	struct rte_eth_dev_sriov sriov;    /**< SRIOV data */
+
+	void *dev_private;              /**< PMD-specific private data */
+
+	struct rte_eth_link dev_link;
+	/**< Link-level information & status */
+
+	struct rte_eth_conf dev_conf;   /**< Configuration applied to device. */
+	uint16_t mtu;                   /**< Maximum Transmission Unit. */
+
+	uint32_t min_rx_buf_size;
+	/**< Common rx buffer size handled by all queues */
+
+	uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */
+	struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */
+	uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR];
+	/** bitmap array of associating Ethernet MAC addresses to pools */
+	struct ether_addr* hash_mac_addrs;
+	/** Device Ethernet MAC addresses of hash filtering. */
+	uint16_t port_id;           /**< Device [external] port identifier. */
+	uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
+		scattered_rx : 1,  /**< RX of scattered packets is ON(1) / OFF(0) */
+		all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */
+		dev_started : 1;   /**< Device state: STARTED(1) / STOPPED(0). */
+};
+
+/**
+ * @internal
+ * The pool of *rte_eth_dev* structures. The size of the pool
+ * is configured at compile-time in the <rte_ethdev.c> file.
+ */
+extern struct rte_eth_dev rte_eth_devices[];
+
+/**
+ * Get the total number of Ethernet devices that have been successfully
+ * initialized by the [matching] Ethernet driver during the PCI probing phase.
+ * All devices whose port identifier is in the range
+ * [0,  rte_eth_dev_count() - 1] can be operated on by network applications.
+ *
+ * @return
+ *   - The total number of usable Ethernet devices.
+ */
+extern uint16_t rte_eth_dev_count(void);
+
+/**
+ * Function for internal use by dummy drivers primarily, e.g. ring-based
+ * driver.
+ * Allocates a new ethdev slot for an ethernet device and returns the pointer
+ * to that slot for the driver to use.
+ *
+ * @param	name	Unique identifier name for each Ethernet device
+ * @return
+ *   - Slot in the rte_dev_devices array for a new device;
+ */
+struct rte_eth_dev *rte_eth_dev_allocate(const char *name);
+
+struct eth_driver;
+/**
+ * @internal
+ * Initialization function of an Ethernet driver invoked for each matching
+ * Ethernet PCI device detected during the PCI probing phase.
+ *
+ * @param eth_drv
+ *   The pointer to the [matching] Ethernet driver structure supplied by
+ *   the PMD when it registered itself.
+ * @param eth_dev
+ *   The *eth_dev* pointer is the address of the *rte_eth_dev* structure
+ *   associated with the matching device and which have been [automatically]
+ *   allocated in the *rte_eth_devices* array.
+ *   The *eth_dev* structure is supplied to the driver initialization function
+ *   with the following fields already initialized:
+ *
+ *   - *pci_dev*: Holds the pointers to the *rte_pci_device* structure which
+ *     contains the generic PCI information of the matching device.
+ *
+ *   - *dev_private*: Holds a pointer to the device private data structure.
+ *
+ *   - *mtu*: Contains the default Ethernet maximum frame length (1500).
+ *
+ *   - *port_id*: Contains the port index of the device (actually the index
+ *     of the *eth_dev* structure in the *rte_eth_devices* array).
+ *
+ * @return
+ *   - 0: Success, the device is properly initialized by the driver.
+ *        In particular, the driver MUST have set up the *dev_ops* pointer
+ *        of the *eth_dev* structure.
+ *   - <0: Error code of the device initialization failure.
+ */
+typedef int (*eth_dev_init_t)(struct eth_driver  *eth_drv,
+			      struct rte_eth_dev *eth_dev);
+
+/**
+ * @internal
+ * The structure associated with a PMD Ethernet driver.
+ *
+ * Each Ethernet driver acts as a PCI driver and is represented by a generic
+ * *eth_driver* structure that holds:
+ *
+ * - An *rte_pci_driver* structure (which must be the first field).
+ *
+ * - The *eth_dev_init* function invoked for each matching PCI device.
+ *
+ * - The size of the private data to allocate for each matching device.
+ */
+struct eth_driver {
+	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+	eth_dev_init_t eth_dev_init;      /**< Device init function. */
+	unsigned int dev_private_size;    /**< Size of device private data. */
+};
+
+/**
+ * @internal
+ * A function invoked by the initialization function of an Ethernet driver
+ * to simultaneously register itself as a PCI driver and as an Ethernet
+ * Poll Mode Driver (PMD).
+ *
+ * @param eth_drv
+ *   The pointer to the *eth_driver* structure associated with
+ *   the Ethernet driver.
+ */
+extern void rte_eth_driver_register(struct eth_driver *eth_drv);
+
+/**
+ * Configure an Ethernet device.
+ * This function must be invoked first before any other function in the
+ * Ethernet API. This function can also be re-invoked when a device is in the
+ * stopped state.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device to configure.
+ * @param nb_rx_queue
+ *   The number of receive queues to set up for the Ethernet device.
+ * @param nb_tx_queue
+ *   The number of transmit queues to set up for the Ethernet device.
+ * @param eth_conf
+ *   The pointer to the configuration data to be used for the Ethernet device.
+ *   The *rte_eth_conf* structure includes:
+ *     -  the hardware offload features to activate, with dedicated fields for
+ *        each statically configurable offload hardware feature provided by
+ *        Ethernet devices, such as IP checksum or VLAN tag stripping for
+ *        example.
+ *     - the Receive Side Scaling (RSS) configuration when using multiple RX
+ *         queues per port.
+ *
+ *   Embedding all configuration information in a single data structure
+ *   is the more flexible method that allows the addition of new features
+ *   without changing the syntax of the API.
+ * @return
+ *   - 0: Success, device configured.
+ *   - <0: Error code returned by the driver configuration function.
+ */
+extern int rte_eth_dev_configure(uint16_t port_id,
+				 uint16_t nb_rx_queue,
+				 uint16_t nb_tx_queue,
+				 const struct rte_eth_conf *eth_conf);
+
+/**
+ * Allocate and set up a receive queue for an Ethernet device.
+ *
+ * The function allocates a contiguous block of memory for *nb_rx_desc*
+ * receive descriptors from a memory zone associated with *socket_id*
+ * and initializes each receive descriptor with a network buffer allocated
+ * from the memory pool *mb_pool*.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rx_queue_id
+ *   The index of the receive queue to set up.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param nb_rx_desc
+ *   The number of receive descriptors to allocate for the receive ring.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in case of NUMA.
+ *   The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for
+ *   the DMA memory allocated for the receive descriptors of the ring.
+ * @param rx_conf
+ *   The pointer to the configuration data to be used for the receive queue.
+ *   NULL value is allowed, in which case default RX configuration
+ *   will be used.
+ *   The *rx_conf* structure contains an *rx_thresh* structure with the values
+ *   of the Prefetch, Host, and Write-Back threshold registers of the receive
+ *   ring.
+ * @param mb_pool
+ *   The pointer to the memory pool from which to allocate *rte_mbuf* network
+ *   memory buffers to populate each descriptor of the receive ring.
+ * @return
+ *   - 0: Success, receive queue correctly set up.
+ *   - -EINVAL: The size of network buffers which can be allocated from the
+ *      memory pool does not fit the various buffer sizes allowed by the
+ *      device controller.
+ *   - -ENOMEM: Unable to allocate the receive ring descriptors or to
+ *      allocate network memory buffers from the memory pool when
+ *      initializing receive descriptors.
+ */
+extern int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
+				  uint16_t nb_rx_desc, unsigned int socket_id,
+				  const struct rte_eth_rxconf *rx_conf,
+				  struct rte_mempool *mb_pool);
+
+/**
+ * Allocate and set up a transmit queue for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tx_queue_id
+ *   The index of the transmit queue to set up.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param nb_tx_desc
+ *   The number of transmit descriptors to allocate for the transmit ring.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in case of NUMA.
+ *   Its value can be *SOCKET_ID_ANY* if there is no NUMA constraint for
+ *   the DMA memory allocated for the transmit descriptors of the ring.
+ * @param tx_conf
+ *   The pointer to the configuration data to be used for the transmit queue.
+ *   NULL value is allowed, in which case default RX configuration
+ *   will be used.
+ *   The *tx_conf* structure contains the following data:
+ *   - The *tx_thresh* structure with the values of the Prefetch, Host, and
+ *     Write-Back threshold registers of the transmit ring.
+ *     When setting Write-Back threshold to the value greater then zero,
+ *     *tx_rs_thresh* value should be explicitly set to one.
+ *   - The *tx_free_thresh* value indicates the [minimum] number of network
+ *     buffers that must be pending in the transmit ring to trigger their
+ *     [implicit] freeing by the driver transmit function.
+ *   - The *tx_rs_thresh* value indicates the [minimum] number of transmit
+ *     descriptors that must be pending in the transmit ring before setting the
+ *     RS bit on a descriptor by the driver transmit function.
+ *     The *tx_rs_thresh* value should be less or equal then
+ *     *tx_free_thresh* value, and both of them should be less then
+ *     *nb_tx_desc* - 3.
+ *   - The *txq_flags* member contains flags to pass to the TX queue setup
+ *     function to configure the behavior of the TX queue. This should be set
+ *     to 0 if no special configuration is required.
+ *
+ *     Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces
+ *     the transmit function to use default values.
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -ENOMEM: Unable to allocate the transmit ring descriptors.
+ */
+extern int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
+				  uint16_t nb_tx_desc, unsigned int socket_id,
+				  const struct rte_eth_txconf *tx_conf);
+
+/*
+ * Return the NUMA socket to which an Ethernet device is connected
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @return
+ *   The NUMA socket id to which the Ethernet device is connected or
+ *   a default of zero if the socket could not be determined.
+ *   -1 is returned is the port_id value is out of range.
+ */
+extern int rte_eth_dev_socket_id(uint16_t port_id);
+
+/*
+ * Allocate mbuf from mempool, setup the DMA physical address
+ * and then start RX for specified queue of a port. It is used
+ * when rx_deferred_start flag of the specified queue is true.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @param rx_queue_id
+ *   The index of the rx queue to update the ring.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+extern int rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id);
+
+/*
+ * Stop specified RX queue of a port
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @param rx_queue_id
+ *   The index of the rx queue to update the ring.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+extern int rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id);
+
+/*
+ * Start TX for specified queue of a port. It is used when tx_deferred_start
+ * flag of the specified queue is true.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @param tx_queue_id
+ *   The index of the tx queue to update the ring.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+extern int rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id);
+
+/*
+ * Stop specified TX queue of a port
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @param tx_queue_id
+ *   The index of the tx queue to update the ring.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+extern int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id);
+
+
+
+/**
+ * Start an Ethernet device.
+ *
+ * The device start step is the last one and consists of setting the configured
+ * offload features and in starting the transmit and the receive units of the
+ * device.
+ * On success, all basic functions exported by the Ethernet API (link status,
+ * receive/transmit, and so on) can be invoked.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - 0: Success, Ethernet device started.
+ *   - <0: Error code of the driver device start function.
+ */
+extern int rte_eth_dev_start(uint16_t port_id);
+
+/**
+ * Stop an Ethernet device. The device can be restarted with a call to
+ * rte_eth_dev_start()
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+extern void rte_eth_dev_stop(uint16_t port_id);
+
+
+/**
+ * Link up an Ethernet device.
+ *
+ * Set device link up will re-enable the device rx/tx
+ * functionality after it is previously set device linked down.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - 0: Success, Ethernet device linked up.
+ *   - <0: Error code of the driver device link up function.
+ */
+extern int rte_eth_dev_set_link_up(uint16_t port_id);
+
+/**
+ * Link down an Ethernet device.
+ * The device rx/tx functionality will be disabled if success,
+ * and it can be re-enabled with a call to
+ * rte_eth_dev_set_link_up()
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+extern int rte_eth_dev_set_link_down(uint16_t port_id);
+
+/**
+ * Close an Ethernet device. The device cannot be restarted!
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+extern void rte_eth_dev_close(uint16_t port_id);
+
+/**
+ * Enable receipt in promiscuous mode for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+extern void rte_eth_promiscuous_enable(uint16_t port_id);
+
+/**
+ * Disable receipt in promiscuous mode for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+extern void rte_eth_promiscuous_disable(uint16_t port_id);
+
+/**
+ * Return the value of promiscuous mode for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (1) if promiscuous is enabled
+ *   - (0) if promiscuous is disabled.
+ *   - (-1) on error
+ */
+extern int rte_eth_promiscuous_get(uint16_t port_id);
+
+/**
+ * Enable the receipt of any multicast frame by an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+extern void rte_eth_allmulticast_enable(uint16_t port_id);
+
+/**
+ * Disable the receipt of all multicast frames by an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+extern void rte_eth_allmulticast_disable(uint16_t port_id);
+
+/**
+ * Return the value of allmulticast mode for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (1) if allmulticast is enabled
+ *   - (0) if allmulticast is disabled.
+ *   - (-1) on error
+ */
+extern int rte_eth_allmulticast_get(uint16_t port_id);
+
+/**
+ * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX
+ * or FULL-DUPLEX) of the physical link of an Ethernet device. It might need
+ * to wait up to 9 seconds in it.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param link
+ *   A pointer to an *rte_eth_link* structure to be filled with
+ *   the status, the speed and the mode of the Ethernet device link.
+ */
+extern void rte_eth_link_get(uint16_t port_id, struct rte_eth_link *link);
+
+/**
+ * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX
+ * or FULL-DUPLEX) of the physical link of an Ethernet device. It is a no-wait
+ * version of rte_eth_link_get().
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param link
+ *   A pointer to an *rte_eth_link* structure to be filled with
+ *   the status, the speed and the mode of the Ethernet device link.
+ */
+extern void rte_eth_link_get_nowait(uint16_t port_id,
+				struct rte_eth_link *link);
+
+/**
+ * Retrieve the general I/O statistics of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled with
+ *   the values of device counters for the following set of statistics:
+ *   - *ipackets* with the total of successfully received packets.
+ *   - *opackets* with the total of successfully transmitted packets.
+ *   - *ibytes*   with the total of successfully received bytes.
+ *   - *obytes*   with the total of successfully transmitted bytes.
+ *   - *ierrors*  with the total of erroneous received packets.
+ *   - *oerrors*  with the total of failed transmitted packets.
+ */
+extern void rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats);
+
+/**
+ * Reset the general I/O statistics of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+extern void rte_eth_stats_reset(uint16_t port_id);
+
+/**
+ * Retrieve extended statistics of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param xstats
+ *   A pointer to a table of structure of type *rte_eth_xstats*
+ *   to be filled with device statistics names and values.
+ *   This parameter can be set to NULL if n is 0.
+ * @param n
+ *   The size of the stats table, which should be large enough to store
+ *   all the statistics of the device.
+ * @return
+ *   - positive value lower or equal to n: success. The return value
+ *     is the number of entries filled in the stats table.
+ *   - positive value higher than n: error, the given statistics table
+ *     is too small. The return value corresponds to the size that should
+ *     be given to succeed. The entries in the table are not valid and
+ *     shall not be used by the caller.
+ *   - negative value on error (invalid port id)
+ */
+extern int rte_eth_xstats_get(uint16_t port_id,
+	struct rte_eth_xstats *xstats, unsigned n);
+
+/**
+ * Reset extended statistics of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+extern void rte_eth_xstats_reset(uint16_t port_id);
+
+/**
+ *  Set a mapping for the specified transmit queue to the specified per-queue
+ *  statistics counter.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tx_queue_id
+ *   The index of the transmit queue for which a queue stats mapping is required.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param stat_idx
+ *   The per-queue packet statistics functionality number that the transmit
+ *   queue is to be assigned.
+ *   The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+extern int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id,
+						  uint16_t tx_queue_id,
+						  uint8_t stat_idx);
+
+/**
+ *  Set a mapping for the specified receive queue to the specified per-queue
+ *  statistics counter.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rx_queue_id
+ *   The index of the receive queue for which a queue stats mapping is required.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param stat_idx
+ *   The per-queue packet statistics functionality number that the receive
+ *   queue is to be assigned.
+ *   The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+extern int rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id,
+						  uint16_t rx_queue_id,
+						  uint8_t stat_idx);
+
+/**
+ * Retrieve the Ethernet address of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mac_addr
+ *   A pointer to a structure of type *ether_addr* to be filled with
+ *   the Ethernet address of the Ethernet device.
+ */
+extern void rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr);
+
+/**
+ * Retrieve the contextual information of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param dev_info
+ *   A pointer to a structure of type *rte_eth_dev_info* to be filled with
+ *   the contextual information of the Ethernet device.
+ */
+extern void rte_eth_dev_info_get(uint16_t port_id,
+				 struct rte_eth_dev_info *dev_info);
+
+/**
+ * Retrieve the MTU of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mtu
+ *   A pointer to a uint16_t where the retrieved MTU is to be stored.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+extern int rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu);
+
+/**
+ * Change the MTU of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mtu
+ *   A uint16_t for the MTU to be applied.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if operation is not supported.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if *mtu* invalid.
+ */
+extern int rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu);
+
+/**
+ * Enable/Disable hardware filtering by an Ethernet device of received
+ * VLAN packets tagged with a given VLAN Tag Identifier.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param vlan_id
+ *   The VLAN Tag Identifier whose filtering must be enabled or disabled.
+ * @param on
+ *   If > 0, enable VLAN filtering of VLAN packets tagged with *vlan_id*.
+ *   Otherwise, disable VLAN filtering of VLAN packets tagged with *vlan_id*.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if hardware-assisted VLAN filtering not configured.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if VLAN filtering on *port_id* disabled.
+ *   - (-EINVAL) if *vlan_id* > 4095.
+ */
+extern int rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id , int on);
+
+/**
+ * Enable/Disable hardware VLAN Strip by a rx queue of an Ethernet device.
+ * 82599/X540/X550 can support VLAN stripping at the rx queue level
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rx_queue_id
+ *   The index of the receive queue for which a queue stats mapping is required.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param on
+ *   If 1, Enable VLAN Stripping of the receive queue of the Ethernet port.
+ *   If 0, Disable VLAN Stripping of the receive queue of the Ethernet port.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if hardware-assisted VLAN stripping not configured.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if *rx_queue_id* invalid.
+ */
+extern int rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id,
+		uint16_t rx_queue_id, int on);
+
+/**
+ * Set the Outer VLAN Ether Type by an Ethernet device, it can be inserted to
+ * the VLAN Header. This is a register setup available on some Intel NIC, not
+ * but all, please check the data sheet for availability.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tag_type
+ *   The Tag Protocol ID
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if hardware-assisted VLAN TPID setup is not supported.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+extern int rte_eth_dev_set_vlan_ether_type(uint16_t port_id, uint16_t tag_type);
+
+/**
+ * Set VLAN offload configuration on an Ethernet device
+ * Enable/Disable Extended VLAN by an Ethernet device, This is a register setup
+ * available on some Intel NIC, not but all, please check the data sheet for
+ * availability.
+ * Enable/Disable VLAN Strip can be done on rx queue for certain NIC, but here
+ * the configuration is applied on the port level.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param offload_mask
+ *   The VLAN Offload bit mask can be mixed use with "OR"
+ *       ETH_VLAN_STRIP_OFFLOAD
+ *       ETH_VLAN_FILTER_OFFLOAD
+ *       ETH_VLAN_EXTEND_OFFLOAD
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if hardware-assisted VLAN filtering not configured.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+extern int rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask);
+
+/**
+ * Read VLAN Offload configuration from an Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (>0) if successful. Bit mask to indicate
+ *       ETH_VLAN_STRIP_OFFLOAD
+ *       ETH_VLAN_FILTER_OFFLOAD
+ *       ETH_VLAN_EXTEND_OFFLOAD
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+extern int rte_eth_dev_get_vlan_offload(uint16_t port_id);
+
+/**
+ * Set port based TX VLAN insersion on or off.
+ *
+ * @param port_id
+ *  The port identifier of the Ethernet device.
+ * @param pvid
+ *  Port based TX VLAN identifier togeth with user priority.
+ * @param on
+ *  Turn on or off the port based TX VLAN insertion.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - negative if failed.
+ */
+extern int rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on);
+
+/**
+ *
+ * Retrieve a burst of input packets from a receive queue of an Ethernet
+ * device. The retrieved packets are stored in *rte_mbuf* structures whose
+ * pointers are supplied in the *rx_pkts* array.
+ *
+ * The rte_eth_rx_burst() function loops, parsing the RX ring of the
+ * receive queue, up to *nb_pkts* packets, and for each completed RX
+ * descriptor in the ring, it performs the following operations:
+ *
+ * - Initialize the *rte_mbuf* data structure associated with the
+ *   RX descriptor according to the information provided by the NIC into
+ *   that RX descriptor.
+ *
+ * - Store the *rte_mbuf* data structure into the next entry of the
+ *   *rx_pkts* array.
+ *
+ * - Replenish the RX descriptor with a new *rte_mbuf* buffer
+ *   allocated from the memory pool associated with the receive queue at
+ *   initialization time.
+ *
+ * When retrieving an input packet that was scattered by the controller
+ * into multiple receive descriptors, the rte_eth_rx_burst() function
+ * appends the associated *rte_mbuf* buffers to the first buffer of the
+ * packet.
+ *
+ * The rte_eth_rx_burst() function returns the number of packets
+ * actually retrieved, which is the number of *rte_mbuf* data structures
+ * effectively supplied into the *rx_pkts* array.
+ * A return value equal to *nb_pkts* indicates that the RX queue contained
+ * at least *rx_pkts* packets, and this is likely to signify that other
+ * received packets remain in the input queue. Applications implementing
+ * a "retrieve as much received packets as possible" policy can check this
+ * specific case and keep invoking the rte_eth_rx_burst() function until
+ * a value less than *nb_pkts* is returned.
+ *
+ * This receive method has the following advantages:
+ *
+ * - It allows a run-to-completion network stack engine to retrieve and
+ *   to immediately process received packets in a fast burst-oriented
+ *   approach, avoiding the overhead of unnecessary intermediate packet
+ *   queue/dequeue operations.
+ *
+ * - Conversely, it also allows an asynchronous-oriented processing
+ *   method to retrieve bursts of received packets and to immediately
+ *   queue them for further parallel processing by another logical core,
+ *   for instance. However, instead of having received packets being
+ *   individually queued by the driver, this approach allows the invoker
+ *   of the rte_eth_rx_burst() function to queue a burst of retrieved
+ *   packets at a time and therefore dramatically reduce the cost of
+ *   enqueue/dequeue operations per packet.
+ *
+ * - It allows the rte_eth_rx_burst() function of the driver to take
+ *   advantage of burst-oriented hardware features (CPU cache,
+ *   prefetch instructions, and so on) to minimize the number of CPU
+ *   cycles per packet.
+ *
+ * To summarize, the proposed receive API enables many
+ * burst-oriented optimizations in both synchronous and asynchronous
+ * packet processing environments with no overhead in both cases.
+ *
+ * The rte_eth_rx_burst() function does not provide any error
+ * notification to avoid the corresponding overhead. As a hint, the
+ * upper-level application might check the status of the device link once
+ * being systematically returned a 0 value for a given number of tries.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param rx_pkts
+ *   The address of an array of pointers to *rte_mbuf* structures that
+ *   must be large enough to store *nb_pkts* pointers in it.
+ * @param nb_pkts
+ *   The maximum number of packets to retrieve.
+ * @return
+ *   The number of packets actually retrieved, which is the number
+ *   of pointers to *rte_mbuf* structures effectively supplied to the
+ *   *rx_pkts* array.
+ */
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+extern uint16_t rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
+				 struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+#else
+static inline uint16_t
+rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
+		 struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	struct rte_eth_dev *dev;
+
+	dev = &rte_eth_devices[port_id];
+	return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+}
+#endif
+
+/**
+ * Get the number of used descriptors in a specific queue
+ *
+ * @param port_id
+ *  The port identifier of the Ethernet device.
+ * @param queue_id
+ *  The queue id on the specific port.
+ * @return
+ *  The number of used descriptors in the specific queue.
+ */
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+extern uint32_t rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id);
+#else
+static inline uint32_t
+rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id)
+{
+        struct rte_eth_dev *dev;
+
+        dev = &rte_eth_devices[port_id];
+        return (*dev->dev_ops->rx_queue_count)(dev, queue_id);
+}
+#endif
+
+/**
+ * Check if the DD bit of the specific RX descriptor in the queue has been set
+ *
+ * @param port_id
+ *  The port identifier of the Ethernet device.
+ * @param queue_id
+ *  The queue id on the specific port.
+ * @offset
+ *  The offset of the descriptor ID from tail.
+ * @return
+ *  - (1) if the specific DD bit is set.
+ *  - (0) if the specific DD bit is not set.
+ *  - (-ENODEV) if *port_id* invalid.
+ */
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+extern int rte_eth_rx_descriptor_done(uint16_t port_id,
+				      uint16_t queue_id,
+				      uint16_t offset);
+#else
+static inline int
+rte_eth_rx_descriptor_done(uint16_t port_id, uint16_t queue_id, uint16_t offset)
+{
+	struct rte_eth_dev *dev;
+
+	dev = &rte_eth_devices[port_id];
+	return (*dev->dev_ops->rx_descriptor_done)( \
+		dev->data->rx_queues[queue_id], offset);
+}
+#endif
+
+/**
+ * Send a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_burst() function is invoked to transmit output packets
+ * on the output queue *queue_id* of the Ethernet device designated by its
+ * *port_id*.
+ * The *nb_pkts* parameter is the number of packets to send which are
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures.
+ * The rte_eth_tx_burst() function loops, sending *nb_pkts* packets,
+ * up to the number of transmit descriptors available in the TX ring of the
+ * transmit queue.
+ * For each packet to send, the rte_eth_tx_burst() function performs
+ * the following operations:
+ *
+ * - Pick up the next available descriptor in the transmit ring.
+ *
+ * - Free the network buffer previously sent with that descriptor, if any.
+ *
+ * - Initialize the transmit descriptor with the information provided
+ *   in the *rte_mbuf data structure.
+ *
+ * In the case of a segmented packet composed of a list of *rte_mbuf* buffers,
+ * the rte_eth_tx_burst() function uses several transmit descriptors
+ * of the ring.
+ *
+ * The rte_eth_tx_burst() function returns the number of packets it
+ * actually sent. A return value equal to *nb_pkts* means that all packets
+ * have been sent, and this is likely to signify that other output packets
+ * could be immediately transmitted again. Applications that implement a
+ * "send as many packets to transmit as possible" policy can check this
+ * specific case and keep invoking the rte_eth_tx_burst() function until
+ * a value less than *nb_pkts* is returned.
+ *
+ * It is the responsibility of the rte_eth_tx_burst() function to
+ * transparently free the memory buffers of packets previously sent.
+ * This feature is driven by the *tx_free_thresh* value supplied to the
+ * rte_eth_dev_configure() function at device configuration time.
+ * When the number of previously sent packets reached the "minimum transmit
+ * packets to free" threshold, the rte_eth_tx_burst() function must
+ * [attempt to] free the *rte_mbuf*  buffers of those packets whose
+ * transmission was effectively completed.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the transmit queue through which output packets must be
+ *   sent.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param tx_pkts
+ *   The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
+ *   which contain the output packets.
+ * @param nb_pkts
+ *   The maximum number of packets to transmit.
+ * @return
+ *   The number of output packets actually stored in transmit descriptors of
+ *   the transmit ring. The return value can be less than the value of the
+ *   *tx_pkts* parameter when the transmit ring is full or has been filled up.
+ */
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+extern uint16_t rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
+				 struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+#else
+static inline uint16_t
+rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
+		 struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct rte_eth_dev *dev;
+
+	dev = &rte_eth_devices[port_id];
+	return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
+}
+#endif
+
+/**
+ * Setup a new signature filter rule on an Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fdir_filter
+ *   The pointer to the fdir filter structure describing the signature filter
+ *   rule.
+ *   The *rte_fdir_filter* structure includes the values of the different fields
+ *   to match: source and destination IP addresses, vlan id, flexbytes, source
+ *   and destination ports, and so on.
+ * @param rx_queue
+ *   The index of the RX queue where to store RX packets matching the added
+ *   signature filter defined in fdir_filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow director mode.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if the FDIR mode is not configured in signature mode
+ *               on *port_id*.
+ *   - (-EINVAL) if the fdir_filter information is not correct.
+ */
+int rte_eth_dev_fdir_add_signature_filter(uint16_t port_id,
+					  struct rte_fdir_filter *fdir_filter,
+					  uint8_t rx_queue);
+
+/**
+ * Update a signature filter rule on an Ethernet device.
+ * If the rule doesn't exits, it is created.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fdir_ftr
+ *   The pointer to the structure describing the signature filter rule.
+ *   The *rte_fdir_filter* structure includes the values of the different fields
+ *   to match: source and destination IP addresses, vlan id, flexbytes, source
+ *   and destination ports, and so on.
+ * @param rx_queue
+ *   The index of the RX queue where to store RX packets matching the added
+ *   signature filter defined in fdir_ftr.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow director mode.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if the flow director mode is not configured in signature mode
+ *     on *port_id*.
+ *   - (-EINVAL) if the fdir_filter information is not correct.
+ */
+int rte_eth_dev_fdir_update_signature_filter(uint16_t port_id,
+					     struct rte_fdir_filter *fdir_ftr,
+					     uint8_t rx_queue);
+
+/**
+ * Remove a signature filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fdir_ftr
+ *   The pointer to the structure describing the signature filter rule.
+ *   The *rte_fdir_filter* structure includes the values of the different fields
+ *   to match: source and destination IP addresses, vlan id, flexbytes, source
+ *   and destination ports, and so on.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow director mode.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if the flow director mode is not configured in signature mode
+ *     on *port_id*.
+ *   - (-EINVAL) if the fdir_filter information is not correct.
+ */
+int rte_eth_dev_fdir_remove_signature_filter(uint16_t port_id,
+					     struct rte_fdir_filter *fdir_ftr);
+
+/**
+ * Retrieve the flow director information of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fdir
+ *   A pointer to a structure of type *rte_eth_dev_fdir* to be filled with
+ *   the flow director information of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow director mode.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if the flow director mode is not configured on *port_id*.
+ */
+int rte_eth_dev_fdir_get_infos(uint16_t port_id, struct rte_eth_fdir *fdir);
+
+/**
+ * Add a new perfect filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fdir_filter
+ *   The pointer to the structure describing the perfect filter rule.
+ *   The *rte_fdir_filter* structure includes the values of the different fields
+ *   to match: source and destination IP addresses, vlan id, flexbytes, source
+ *   and destination ports, and so on.
+ *   IPv6 are not supported.
+ * @param soft_id
+ *    The 16-bit value supplied in the field hash.fdir.id of mbuf for RX
+ *    packets matching the perfect filter.
+ * @param rx_queue
+ *   The index of the RX queue where to store RX packets matching the added
+ *   perfect filter defined in fdir_filter.
+ * @param drop
+ *    If drop is set to 1, matching RX packets are stored into the RX drop
+ *    queue defined in the rte_fdir_conf.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow director mode.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if the flow director mode is not configured in perfect mode
+ *               on *port_id*.
+ *   - (-EINVAL) if the fdir_filter information is not correct.
+ */
+int rte_eth_dev_fdir_add_perfect_filter(uint16_t port_id,
+					struct rte_fdir_filter *fdir_filter,
+					uint16_t soft_id, uint8_t rx_queue,
+					uint8_t drop);
+
+/**
+ * Update a perfect filter rule on an Ethernet device.
+ * If the rule doesn't exits, it is created.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fdir_filter
+ *   The pointer to the structure describing the perfect filter rule.
+ *   The *rte_fdir_filter* structure includes the values of the different fields
+ *   to match: source and destination IP addresses, vlan id, flexbytes, source
+ *   and destination ports, and so on.
+ *   IPv6 are not supported.
+ * @param soft_id
+ *    The 16-bit value supplied in the field hash.fdir.id of mbuf for RX
+ *    packets matching the perfect filter.
+ * @param rx_queue
+ *   The index of the RX queue where to store RX packets matching the added
+ *   perfect filter defined in fdir_filter.
+ * @param drop
+ *    If drop is set to 1, matching RX packets are stored into the RX drop
+ *    queue defined in the rte_fdir_conf.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow director mode.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if the flow director mode is not configured in perfect mode
+ *      on *port_id*.
+ *   - (-EINVAL) if the fdir_filter information is not correct.
+ */
+int rte_eth_dev_fdir_update_perfect_filter(uint16_t port_id,
+					   struct rte_fdir_filter *fdir_filter,
+					   uint16_t soft_id, uint8_t rx_queue,
+					   uint8_t drop);
+
+/**
+ * Remove a perfect filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fdir_filter
+ *   The pointer to the structure describing the perfect filter rule.
+ *   The *rte_fdir_filter* structure includes the values of the different fields
+ *   to match: source and destination IP addresses, vlan id, flexbytes, source
+ *   and destination ports, and so on.
+ *   IPv6 are not supported.
+ * @param soft_id
+ *    The soft_id value provided when adding/updating the removed filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow director mode.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if the flow director mode is not configured in perfect mode
+ *      on *port_id*.
+ *   - (-EINVAL) if the fdir_filter information is not correct.
+ */
+int rte_eth_dev_fdir_remove_perfect_filter(uint16_t port_id,
+					   struct rte_fdir_filter *fdir_filter,
+					   uint16_t soft_id);
+/**
+ * Configure globally the masks for flow director mode for an Ethernet device.
+ * For example, the device can match packets with only the first 24 bits of
+ * the IPv4 source address.
+ *
+ * The following fields can be masked: IPv4 addresses and L4 port numbers.
+ * The following fields can be either enabled or disabled completely for the
+ * matching functionality: VLAN ID tag; VLAN Priority + CFI bit; Flexible 2-byte
+ * tuple.
+ * IPv6 masks are not supported.
+ *
+ * All filters must comply with the masks previously configured.
+ * For example, with a mask equal to 255.255.255.0 for the source IPv4 address,
+ * all IPv4 filters must be created with a source IPv4 address that fits the
+ * "X.X.X.0" format.
+ *
+ * This function flushes all filters that have been previously added in
+ * the device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fdir_mask
+ *   The pointer to the fdir mask structure describing relevant headers fields
+ *   and relevant bits to use when matching packets addresses and ports.
+ *   IPv6 masks are not supported.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow director mode.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if the flow director mode is not configured in perfect
+ *      mode on *port_id*.
+ *   - (-EINVAL) if the fdir_filter information is not correct
+ */
+int rte_eth_dev_fdir_set_masks(uint16_t port_id,
+			       struct rte_fdir_masks *fdir_mask);
+
+/**
+ * The eth device event type for interrupt, and maybe others in the future.
+ */
+enum rte_eth_event_type {
+	RTE_ETH_EVENT_UNKNOWN,  /**< unknown event type */
+	RTE_ETH_EVENT_INTR_LSC, /**< lsc interrupt event */
+	RTE_ETH_EVENT_MAX       /**< max value of this enum */
+};
+
+typedef void (*rte_eth_dev_cb_fn)(uint16_t port_id, \
+		enum rte_eth_event_type event, void *cb_arg);
+/**< user application callback to be registered for interrupts */
+
+
+
+/**
+ * Register a callback function for specific port id.
+ *
+ * @param port_id
+ *  Port id.
+ * @param event
+ *  Event interested.
+ * @param cb_fn
+ *  User supplied callback function to be called.
+ * @param cb_arg
+ *  Pointer to the parameters for the registered callback.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int rte_eth_dev_callback_register(uint16_t port_id,
+			enum rte_eth_event_type event,
+		rte_eth_dev_cb_fn cb_fn, void *cb_arg);
+
+/**
+ * Unregister a callback function for specific port id.
+ *
+ * @param port_id
+ *  Port id.
+ * @param event
+ *  Event interested.
+ * @param cb_fn
+ *  User supplied callback function to be called.
+ * @param cb_arg
+ *  Pointer to the parameters for the registered callback. -1 means to
+ *  remove all for the same callback address and same event.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int rte_eth_dev_callback_unregister(uint16_t port_id,
+			enum rte_eth_event_type event,
+		rte_eth_dev_cb_fn cb_fn, void *cb_arg);
+
+/**
+ * @internal Executes all the user application registered callbacks for
+ * the specific device. It is for DPDK internal user only. User
+ * application should not call it directly.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ * @param event
+ *  Eth device interrupt event type.
+ *
+ * @return
+ *  void
+ */
+void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
+				enum rte_eth_event_type event);
+
+/**
+ * Turn on the LED on the Ethernet device.
+ * This function turns on the LED on the Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int  rte_eth_led_on(uint16_t port_id);
+
+/**
+ * Turn off the LED on the Ethernet device.
+ * This function turns off the LED on the Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int  rte_eth_led_off(uint16_t port_id);
+
+/**
+ * Get current status of the Ethernet link flow control for Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fc_conf
+ *   The pointer to the structure where to store the flow control parameters.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow control.
+ *   - (-ENODEV)  if *port_id* invalid.
+ */
+int rte_eth_dev_flow_ctrl_get(uint16_t port_id,
+			      struct rte_eth_fc_conf *fc_conf);
+
+/**
+ * Configure the Ethernet link flow control for Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fc_conf
+ *   The pointer to the structure of the flow control parameters.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow control mode.
+ *   - (-ENODEV)  if *port_id* invalid.
+ *   - (-EINVAL)  if bad parameter
+ *   - (-EIO)     if flow control setup failure
+ */
+int rte_eth_dev_flow_ctrl_set(uint16_t port_id,
+			      struct rte_eth_fc_conf *fc_conf);
+
+/**
+ * Configure the Ethernet priority flow control under DCB environment
+ * for Ethernet device.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param pfc_conf
+ * The pointer to the structure of the priority flow control parameters.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support priority flow control mode.
+ *   - (-ENODEV)  if *port_id* invalid.
+ *   - (-EINVAL)  if bad parameter
+ *   - (-EIO)     if flow control setup failure
+ */
+int rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id,
+				struct rte_eth_pfc_conf *pfc_conf);
+
+/**
+ * Add a MAC address to an internal array of addresses used to enable whitelist
+ * filtering to accept packets only if the destination MAC address matches.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param mac_addr
+ *   The MAC address to add.
+ * @param pool
+ *   VMDq pool index to associate address with (if VMDq is enabled). If VMDq is
+ *   not enabled, this should be set to 0.
+ * @return
+ *   - (0) if successfully added or *mac_addr" was already added.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port* is invalid.
+ *   - (-ENOSPC) if no more MAC addresses can be added.
+ *   - (-EINVAL) if MAC address is invalid.
+ */
+int rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *mac_addr,
+				uint32_t pool);
+
+/**
+ * Remove a MAC address from the internal array of addresses.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param mac_addr
+ *   MAC address to remove.
+ * @return
+ *   - (0) if successful, or *mac_addr* didn't exist.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EADDRINUSE) if attempting to remove the default MAC address
+ */
+int rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *mac_addr);
+
+/**
+ * Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param reta_conf
+ *   RETA to update.
+ * @param reta_size
+ *   Redirection table size. The table size can be queried by
+ *   rte_eth_dev_info_get().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_rss_reta_update(uint16_t port_id,
+				struct rte_eth_rss_reta_entry64 *reta_conf,
+				uint16_t reta_size);
+
+ /**
+ * Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param reta_conf
+ *   RETA to query.
+ * @param reta_size
+ *   Redirection table size. The table size can be queried by
+ *   rte_eth_dev_info_get().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_rss_reta_query(uint16_t port_id,
+			       struct rte_eth_rss_reta_entry64 *reta_conf,
+			       uint16_t reta_size);
+
+ /**
+ * Updates unicast hash table for receiving packet with the given destination
+ * MAC address, and the packet is routed to all VFs for which the RX mode is
+ * accept packets that match the unicast hash table.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param addr
+ *   Unicast MAC address.
+ * @param on
+ *    1 - Set an unicast hash bit for receiving packets with the MAC address.
+ *    0 - Clear an unicast hash bit.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+  *  - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_uc_hash_table_set(uint16_t port_id,struct ether_addr *addr,
+					uint8_t on);
+
+ /**
+ * Updates all unicast hash bitmaps for receiving packet with any Unicast
+ * Ethernet MAC addresses,the packet is routed to all VFs for which the RX
+ * mode is accept packets that match the unicast hash table.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param on
+ *    1 - Set all unicast hash bitmaps for receiving all the Ethernet
+ *         MAC addresses
+ *    0 - Clear all unicast hash bitmaps
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+  *  - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_uc_all_hash_table_set(uint16_t port_id,uint8_t on);
+
+ /**
+ * Set RX L2 Filtering mode of a VF of an Ethernet device.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param vf
+ *   VF id.
+ * @param rx_mode
+ *    The RX mode mask, which  is one or more of  accepting Untagged Packets,
+ *    packets that match the PFUTA table, Broadcast and Multicast Promiscuous.
+ *    ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC,
+ *    ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used
+ *    in rx_mode.
+ * @param on
+ *    1 - Enable a VF RX mode.
+ *    0 - Disable a VF RX mode.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_set_vf_rxmode(uint16_t port_id, uint16_t vf, uint16_t rx_mode,
+				uint8_t on);
+
+/**
+* Enable or disable a VF traffic transmit of the Ethernet device.
+*
+* @param port
+*   The port identifier of the Ethernet device.
+* @param vf
+*   VF id.
+* @param on
+*    1 - Enable a VF traffic transmit.
+*    0 - Disable a VF traffic transmit.
+* @return
+*   - (0) if successful.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_eth_dev_set_vf_tx(uint16_t port_id,uint16_t vf, uint8_t on);
+
+/**
+* Enable or disable a VF traffic receive of an Ethernet device.
+*
+* @param port
+*   The port identifier of the Ethernet device.
+* @param vf
+*   VF id.
+* @param on
+*    1 - Enable a VF traffic receive.
+*    0 - Disable a VF traffic receive.
+* @return
+*   - (0) if successful.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_eth_dev_set_vf_rx(uint16_t port_id,uint16_t vf, uint8_t on);
+
+/**
+* Enable/Disable hardware VF VLAN filtering by an Ethernet device of
+* received VLAN packets tagged with a given VLAN Tag Identifier.
+*
+* @param port id
+*   The port identifier of the Ethernet device.
+* @param vlan_id
+*   The VLAN Tag Identifier whose filtering must be enabled or disabled.
+* @param vf_mask
+*    Bitmap listing which VFs participate in the VLAN filtering.
+* @param vlan_on
+*    1 - Enable VFs VLAN filtering.
+*    0 - Disable VFs VLAN filtering.
+* @return
+*   - (0) if successful.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_eth_dev_set_vf_vlan_filter(uint16_t port_id, uint16_t vlan_id,
+				uint64_t vf_mask,
+				uint8_t vlan_on);
+
+/**
+ * Set a traffic mirroring rule on an Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mirror_conf
+ *   The pointer to the traffic mirroring structure describing the mirroring rule.
+ *   The *rte_eth_vm_mirror_conf* structure includes the type of mirroring rule,
+ *   destination pool and the value of rule if enable vlan or pool mirroring.
+ *
+ * @param rule_id
+ *   The index of traffic mirroring rule, we support four separated rules.
+ * @param on
+ *   1 - Enable a mirroring rule.
+ *   0 - Disable a mirroring rule.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the mr_conf information is not correct.
+ */
+int rte_eth_mirror_rule_set(uint16_t port_id,
+			struct rte_eth_vmdq_mirror_conf *mirror_conf,
+			uint8_t rule_id,
+			uint8_t on);
+
+/**
+ * Reset a traffic mirroring rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rule_id
+ *   The index of traffic mirroring rule, we support four separated rules.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_mirror_rule_reset(uint16_t port_id,
+					 uint8_t rule_id);
+
+/**
+ * Set the rate limitation for a queue on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_idx
+ *   The queue id.
+ * @param tx_rate
+ *   The tx rate allocated from the total link speed for this queue.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
+			uint16_t tx_rate);
+
+/**
+ * Set the rate limitation for a vf on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param vf
+ *   VF id.
+ * @param tx_rate
+ *   The tx rate allocated from the total link speed for this VF id.
+ * @param q_msk
+ *   The queue mask which need to set the rate.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_set_vf_rate_limit(uint16_t port_id, uint16_t vf,
+			uint16_t tx_rate, uint64_t q_msk);
+
+/**
+ * Initialize bypass logic. This function needs to be called before
+ * executing any other bypass API.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_init(uint16_t port_id);
+
+/**
+ * Return bypass state.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param state
+ *   The return bypass state.
+ *   - (1) Normal mode
+ *   - (2) Bypass mode
+ *   - (3) Isolate mode
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_state_show(uint16_t port_id, uint32_t *state);
+
+/**
+ * Set bypass state
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param state
+ *   The current bypass state.
+ *   - (1) Normal mode
+ *   - (2) Bypass mode
+ *   - (3) Isolate mode
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_state_set(uint16_t port_id, uint32_t *new_state);
+
+/**
+ * Return bypass state when given event occurs.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param event
+ *   The bypass event
+ *   - (1) Main power on (power button is pushed)
+ *   - (2) Auxiliary power on (power supply is being plugged)
+ *   - (3) Main power off (system shutdown and power supply is left plugged in)
+ *   - (4) Auxiliary power off (power supply is being unplugged)
+ *   - (5) Display or set the watchdog timer
+ * @param state
+ *   The bypass state when given event occurred.
+ *   - (1) Normal mode
+ *   - (2) Bypass mode
+ *   - (3) Isolate mode
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_event_show(uint16_t port_id, uint32_t event, uint32_t *state);
+
+/**
+ * Set bypass state when given event occurs.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param event
+ *   The bypass event
+ *   - (1) Main power on (power button is pushed)
+ *   - (2) Auxiliary power on (power supply is being plugged)
+ *   - (3) Main power off (system shutdown and power supply is left plugged in)
+ *   - (4) Auxiliary power off (power supply is being unplugged)
+ *   - (5) Display or set the watchdog timer
+ * @param state
+ *   The assigned state when given event occurs.
+ *   - (1) Normal mode
+ *   - (2) Bypass mode
+ *   - (3) Isolate mode
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_event_store(uint16_t port_id, uint32_t event, uint32_t state);
+
+/**
+ * Set bypass watchdog timeout count.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param state
+ *   The timeout to be set.
+ *   - (0) 0 seconds (timer is off)
+ *   - (1) 1.5 seconds
+ *   - (2) 2 seconds
+ *   - (3) 3 seconds
+ *   - (4) 4 seconds
+ *   - (5) 8 seconds
+ *   - (6) 16 seconds
+ *   - (7) 32 seconds
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_wd_timeout_store(uint16_t port_id, uint32_t timeout);
+
+/**
+ * Get bypass firmware version.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param ver
+ *   The firmware version
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_ver_show(uint16_t port_id, uint32_t *ver);
+
+/**
+ * Return bypass watchdog timeout in seconds
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param wd_timeout
+ *   The return watchdog timeout. "0" represents timer expired
+ *   - (0) 0 seconds (timer is off)
+ *   - (1) 1.5 seconds
+ *   - (2) 2 seconds
+ *   - (3) 3 seconds
+ *   - (4) 4 seconds
+ *   - (5) 8 seconds
+ *   - (6) 16 seconds
+ *   - (7) 32 seconds
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_wd_timeout_show(uint16_t port_id, uint32_t *wd_timeout);
+
+/**
+ * Reset bypass watchdog timer
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_wd_reset(uint16_t port_id);
+
+ /**
+ * Configuration of Receive Side Scaling hash computation of Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rss_conf
+ *   The new configuration to use for RSS hash computation on the port.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_rss_hash_update(uint16_t port_id,
+				struct rte_eth_rss_conf *rss_conf);
+
+ /**
+ * Retrieve current configuration of Receive Side Scaling hash computation
+ * of Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rss_conf
+ *   Where to store the current RSS hash configuration of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support RSS.
+ */
+int
+rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
+			      struct rte_eth_rss_conf *rss_conf);
+
+ /**
+ * Add UDP tunneling port of an Ethernet device for filtering a specific
+ * tunneling packet by UDP port number.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tunnel_udp
+ *   UDP tunneling configuration.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support tunnel type.
+ */
+int
+rte_eth_dev_udp_tunnel_add(uint16_t port_id,
+			   struct rte_eth_udp_tunnel *tunnel_udp);
+
+ /**
+ * Detete UDP tunneling port configuration of Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tunnel_udp
+ *   UDP tunneling configuration.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support tunnel type.
+ */
+int
+rte_eth_dev_udp_tunnel_delete(uint16_t port_id,
+			      struct rte_eth_udp_tunnel *tunnel_udp);
+
+/**
+ * add syn filter
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rx_queue
+ *   The index of RX queue where to store RX packets matching the syn filter.
+ * @param filter
+ *   The pointer to the structure describing the syn filter rule.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_add_syn_filter(uint16_t port_id,
+			struct rte_syn_filter *filter, uint16_t rx_queue);
+
+/**
+ * remove syn filter
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_remove_syn_filter(uint16_t port_id);
+
+/**
+ * get syn filter
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param filter
+ *   The pointer to the structure describing the syn filter.
+ * @param rx_queue
+ *   A pointer to get the queue index of syn filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_get_syn_filter(uint16_t port_id,
+			struct rte_syn_filter *filter, uint16_t *rx_queue);
+
+/**
+ * Add a new 2tuple filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param index
+ *   The identifier of 2tuple filter.
+ * @param filter
+ *   The pointer to the structure describing the 2tuple filter rule.
+ *   The *rte_2tuple_filter* structure includes the values of the different
+ *   fields to match: protocol, dst_port and
+ *   tcp_flags if the protocol is tcp type.
+ * @param rx_queue
+ *   The index of the RX queue where to store RX packets matching the added
+ *   2tuple filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support 2tuple filter.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the filter information is not correct.
+ */
+int rte_eth_dev_add_2tuple_filter(uint16_t port_id, uint16_t index,
+			struct rte_2tuple_filter *filter, uint16_t rx_queue);
+
+/**
+ * remove a 2tuple filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param index
+ *   The identifier of 2tuple filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support 2tuple filter.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the filter information is not correct.
+ */
+int rte_eth_dev_remove_2tuple_filter(uint16_t port_id, uint16_t index);
+
+/**
+ * Get an 2tuple filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param index
+ *   The identifier of 2tuple filter.
+ * @param filter
+ *   A pointer to a structure of type *rte_2tuple_filter* to be filled with
+ *   the information of the 2tuple filter.
+ * @param rx_queue
+ *   A pointer to get the queue index.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support 2tuple filter.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the filter information is not correct.
+ *   - (-ENOENT) if no enabled filter in this index.
+ */
+int rte_eth_dev_get_2tuple_filter(uint16_t port_id, uint16_t index,
+			struct rte_2tuple_filter *filter, uint16_t *rx_queue);
+
+/**
+ * Add a new 5tuple filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param index
+ *   The identifier of 5tuple filter.
+ * @param filter
+ *   The pointer to the structure describing the 5tuple filter rule.
+ *   The *rte_5tuple_filter* structure includes the values of the different
+ *   fields to match: dst src IP, dst src port, protocol and relative masks
+ * @param rx_queue
+ *   The index of the RX queue where to store RX packets matching the added
+ *   5tuple filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support 5tuple filter.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the filter information is not correct.
+ */
+int rte_eth_dev_add_5tuple_filter(uint16_t port_id, uint16_t index,
+			struct rte_5tuple_filter *filter, uint16_t rx_queue);
+
+/**
+ * remove a 5tuple filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param index
+ *   The identifier of 5tuple filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support 5tuple filter.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the filter information is not correct.
+ */
+int rte_eth_dev_remove_5tuple_filter(uint16_t port_id, uint16_t index);
+
+/**
+ * Get an 5tuple filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param index
+ *   The identifier of 5tuple filter.
+ * @param filter
+ *   A pointer to a structure of type *rte_5tuple_filter* to be filled with
+ *   the information of the 5tuple filter.
+ * @param rx_queue
+ *   A pointer to get the queue index.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support 5tuple filter.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the filter information is not correct.
+ */
+int rte_eth_dev_get_5tuple_filter(uint16_t port_id, uint16_t index,
+			struct rte_5tuple_filter *filter, uint16_t *rx_queue);
+
+/**
+ * Add a new flex filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param index
+ *   The identifier of flex filter.
+ * @param filter
+ *   The pointer to the structure describing the flex filter rule.
+ *   The *rte_flex_filter* structure includes the values of the different fields
+ *   to match: the dwords (first len bytes of packet ) and relative masks.
+ * @param rx_queue
+ *   The index of the RX queue where to store RX packets matching the added
+ *   flex filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flex filter.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the filter information is not correct.
+ *   - (-ENOENT) if no enabled filter in this index.
+ */
+int rte_eth_dev_add_flex_filter(uint16_t port_id, uint16_t index,
+			struct rte_flex_filter *filter, uint16_t rx_queue);
+
+/**
+ * remove a flex filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param index
+ *   The identifier of flex filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flex filter.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the filter information is not correct.
+ */
+int rte_eth_dev_remove_flex_filter(uint16_t port_id, uint16_t index);
+
+/**
+ * Get an flex filter rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param index
+ *   The identifier of flex filter.
+ * @param filter
+ *   A pointer to a structure of type *rte_flex_filter* to be filled with
+ *   the information of the flex filter.
+ * @param rx_queue
+ *   A pointer to get the queue index.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flex filter.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the filter information is not correct.
+ *   - (-ENOENT) if no enabled filter in this index.
+ */
+int rte_eth_dev_get_flex_filter(uint16_t port_id, uint16_t index,
+			struct rte_flex_filter *filter, uint16_t *rx_queue);
+
+/**
+ * Check whether the filter type is supported on an Ethernet device.
+ * All the supported filter types are defined in 'rte_eth_ctrl.h'.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param filter_type
+ *   Filter type.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this filter type.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_filter_supported(uint16_t port_id, enum rte_filter_type filter_type);
+
+/**
+ * Take operations to assigned filter type on an Ethernet device.
+ * All the supported operations and filter types are defined in 'rte_eth_ctrl.h'.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Type of operation.
+ * @param arg
+ *   A pointer to arguments defined specifically for the operation.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - others depends on the specific operations implementation.
+ */
+int rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type,
+			enum rte_filter_op filter_op, void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_H_ */
-- 
1.9.1

^ permalink raw reply	[relevance 1%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18 10:03  3%             ` Bruce Richardson
@ 2015-02-18 10:58  0%               ` Tetsuya Mukawa
  2015-02-18 12:23  0%                 ` Bruce Richardson
  2015-02-18 12:33  0%                 ` Iremonger, Bernard
  0 siblings, 2 replies; 200+ results
From: Tetsuya Mukawa @ 2015-02-18 10:58 UTC (permalink / raw)
  To: Bruce Richardson, Thomas Monjalon; +Cc: dev, Neil Horman

On 2015/02/18 19:03, Bruce Richardson wrote:
> On Wed, Feb 18, 2015 at 10:57:25AM +0100, Thomas Monjalon wrote:
>> 2015-02-18 15:10, Tetsuya Mukawa:
>>> On 2015/02/18 10:54, Tetsuya Mukawa wrote:
>>>> On 2015/02/18 9:31, Thomas Monjalon wrote:
>>>>> 2015-02-17 15:14, Tetsuya Mukawa:
>>>>>> On 2015/02/17 9:36, Thomas Monjalon wrote:
>>>>>>> 2015-02-16 13:14, Tetsuya Mukawa:
>>>>>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
>>>>>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
>>>>>> as port id.
>>>>>> If someone reports it doesn't enough, I guess it will be the time to
>>>>>> write a patch to change all uint_8 in one patch.
>>>>> It's a big ABI breakage. So if we feel it's going to be required,
>>>>> it's better to do it now in 2.0 release I think.
>>>>>
>>>>> Any opinion?
>>>>>
>>>> Hi Thomas,
>>>>
>>>> I agree with it.
>>>> I will add an one more patch to change uint8_t to uint16_t.
>>>>
>>>> Thanks,
>>>> Tetsuya
>>>>
>>> Hi Thomas,
>>>
>>> Could I make sure.
>>> After changing uint8_t to uint16_t in "rte_ethdev.[ch]", must I also
>>> need to change other applications and libraries that call ethdev APIs?
>>> If so, I would not finish it by 23rd.
>>>
>>> I've counted how many lines call ethdev APIs that are related to port_id.
>>> Could you please check an attached file?
>>> It's over 1200 lines. Probably to fix  one of caller, I will need to
>>> check how port_id is used, and fix more related lines. So probably
>>> thousands lines may need to be fixed.
>>>
>>> When is deadline for fixing this changing?
>>> Also, if you have a good idea to fix it easier, could you please let me
>>> know?
>> It was an open question.
>> If everybody is fine with 255 ports maximum, let's keep it as is.
>>
> I think we are probably ok for now (and forseeable future) with 255 max.
>
> However, if we did change it, I agree that in 2.0 is a very good time to do so.
> Since we are expanding the field, rather than shrinking it, I don't see why we
> can't just make the change at the ethdev level (and in libs API) in 2.0 and then in
> later releases (e.g. 2.1) update the apps and examples to match. That way the
> ABI stays the same from 2.0 onwards, and we don't have a huge amount of churn
> changing it everywhere late in the 2.0 release cycle.

Hi Bruce,

Could you please check my RFC patch I will send soon?
I wrote the patch like below.

1. Copy header file like below.
$ cp lib/librte_ether/rte_ethdev.h lib/librte_ether/rte_ethdev_internal.h
2. Change "rte_ethdev.c" to include "rte_ethdev_internal.h"
3. Change type of port id in "rte_ethdev.c" and "rte_ethdev_internal.h".

If the patch is OK, I wll send it with hotplug patches.

Thanks,
Tetsuya


> /Bruce

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18  9:57  0%           ` Thomas Monjalon
@ 2015-02-18 10:03  3%             ` Bruce Richardson
  2015-02-18 10:58  0%               ` Tetsuya Mukawa
  0 siblings, 1 reply; 200+ results
From: Bruce Richardson @ 2015-02-18 10:03 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, Neil Horman

On Wed, Feb 18, 2015 at 10:57:25AM +0100, Thomas Monjalon wrote:
> 2015-02-18 15:10, Tetsuya Mukawa:
> > On 2015/02/18 10:54, Tetsuya Mukawa wrote:
> > > On 2015/02/18 9:31, Thomas Monjalon wrote:
> > >> 2015-02-17 15:14, Tetsuya Mukawa:
> > >>> On 2015/02/17 9:36, Thomas Monjalon wrote:
> > >>>> 2015-02-16 13:14, Tetsuya Mukawa:
> > >>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
> > >>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
> > >>> as port id.
> > >>> If someone reports it doesn't enough, I guess it will be the time to
> > >>> write a patch to change all uint_8 in one patch.
> > >> It's a big ABI breakage. So if we feel it's going to be required,
> > >> it's better to do it now in 2.0 release I think.
> > >>
> > >> Any opinion?
> > >>
> > > Hi Thomas,
> > >
> > > I agree with it.
> > > I will add an one more patch to change uint8_t to uint16_t.
> > >
> > > Thanks,
> > > Tetsuya
> > >
> > 
> > Hi Thomas,
> > 
> > Could I make sure.
> > After changing uint8_t to uint16_t in "rte_ethdev.[ch]", must I also
> > need to change other applications and libraries that call ethdev APIs?
> > If so, I would not finish it by 23rd.
> > 
> > I've counted how many lines call ethdev APIs that are related to port_id.
> > Could you please check an attached file?
> > It's over 1200 lines. Probably to fix  one of caller, I will need to
> > check how port_id is used, and fix more related lines. So probably
> > thousands lines may need to be fixed.
> > 
> > When is deadline for fixing this changing?
> > Also, if you have a good idea to fix it easier, could you please let me
> > know?
> 
> It was an open question.
> If everybody is fine with 255 ports maximum, let's keep it as is.
> 
I think we are probably ok for now (and forseeable future) with 255 max.

However, if we did change it, I agree that in 2.0 is a very good time to do so.
Since we are expanding the field, rather than shrinking it, I don't see why we
can't just make the change at the ethdev level (and in libs API) in 2.0 and then in
later releases (e.g. 2.1) update the apps and examples to match. That way the
ABI stays the same from 2.0 onwards, and we don't have a huge amount of churn
changing it everywhere late in the 2.0 release cycle.

/Bruce

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18  6:10  0%         ` Tetsuya Mukawa
  2015-02-18  9:27  0%           ` Iremonger, Bernard
@ 2015-02-18  9:57  0%           ` Thomas Monjalon
  2015-02-18 10:03  3%             ` Bruce Richardson
  1 sibling, 1 reply; 200+ results
From: Thomas Monjalon @ 2015-02-18  9:57 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev, Neil Horman

2015-02-18 15:10, Tetsuya Mukawa:
> On 2015/02/18 10:54, Tetsuya Mukawa wrote:
> > On 2015/02/18 9:31, Thomas Monjalon wrote:
> >> 2015-02-17 15:14, Tetsuya Mukawa:
> >>> On 2015/02/17 9:36, Thomas Monjalon wrote:
> >>>> 2015-02-16 13:14, Tetsuya Mukawa:
> >>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
> >>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
> >>> as port id.
> >>> If someone reports it doesn't enough, I guess it will be the time to
> >>> write a patch to change all uint_8 in one patch.
> >> It's a big ABI breakage. So if we feel it's going to be required,
> >> it's better to do it now in 2.0 release I think.
> >>
> >> Any opinion?
> >>
> > Hi Thomas,
> >
> > I agree with it.
> > I will add an one more patch to change uint8_t to uint16_t.
> >
> > Thanks,
> > Tetsuya
> >
> 
> Hi Thomas,
> 
> Could I make sure.
> After changing uint8_t to uint16_t in "rte_ethdev.[ch]", must I also
> need to change other applications and libraries that call ethdev APIs?
> If so, I would not finish it by 23rd.
> 
> I've counted how many lines call ethdev APIs that are related to port_id.
> Could you please check an attached file?
> It's over 1200 lines. Probably to fix  one of caller, I will need to
> check how port_id is used, and fix more related lines. So probably
> thousands lines may need to be fixed.
> 
> When is deadline for fixing this changing?
> Also, if you have a good idea to fix it easier, could you please let me
> know?

It was an open question.
If everybody is fine with 255 ports maximum, let's keep it as is.

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18  6:10  0%         ` Tetsuya Mukawa
@ 2015-02-18  9:27  0%           ` Iremonger, Bernard
  2015-02-18  9:57  0%           ` Thomas Monjalon
  1 sibling, 0 replies; 200+ results
From: Iremonger, Bernard @ 2015-02-18  9:27 UTC (permalink / raw)
  To: Tetsuya Mukawa, Thomas Monjalon; +Cc: dev, Neil Horman



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Tetsuya Mukawa
> Sent: Wednesday, February 18, 2015 6:10 AM
> To: Thomas Monjalon
> Cc: dev@dpdk.org; Neil Horman
> Subject: Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be
> detached
> 
> On 2015/02/18 10:54, Tetsuya Mukawa wrote:
> > On 2015/02/18 9:31, Thomas Monjalon wrote:
> >> 2015-02-17 15:14, Tetsuya Mukawa:
> >>> On 2015/02/17 9:36, Thomas Monjalon wrote:
> >>>> 2015-02-16 13:14, Tetsuya Mukawa:
> >>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
> >>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
> >>> as port id.
> >>> If someone reports it doesn't enough, I guess it will be the time to
> >>> write a patch to change all uint_8 in one patch.
> >> It's a big ABI breakage. So if we feel it's going to be required,
> >> it's better to do it now in 2.0 release I think.
> >>
> >> Any opinion?
> >>
> > Hi Thomas,
> >
> > I agree with it.
> > I will add an one more patch to change uint8_t to uint16_t.
> >
> > Thanks,
> > Tetsuya
> >
> 
> Hi Thomas,
> 
> Could I make sure.
> After changing uint8_t to uint16_t in "rte_ethdev.[ch]", must I also need to change other applications
> and libraries that call ethdev APIs?
> If so, I would not finish it by 23rd.
> 
> I've counted how many lines call ethdev APIs that are related to port_id.
> Could you please check an attached file?
> It's over 1200 lines. Probably to fix  one of caller, I will need to check how port_id is used, and fix more
> related lines. So probably thousands lines may need to be fixed.
> 
> When is deadline for fixing this changing?
> Also, if you have a good idea to fix it easier, could you please let me know?
> 
> Thanks,
> Tetsuya

Hi Tetsuya, Thomas,

As uint8_t is already widely used for port_id, I don't think it should be changed in this patchset.
If it is to be changed to uint16_t it should be done as a separate task (in a new patchset).

Regards,

Bernard.

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18  1:54  0%       ` Tetsuya Mukawa
@ 2015-02-18  6:10  0%         ` Tetsuya Mukawa
  2015-02-18  9:27  0%           ` Iremonger, Bernard
  2015-02-18  9:57  0%           ` Thomas Monjalon
  0 siblings, 2 replies; 200+ results
From: Tetsuya Mukawa @ 2015-02-18  6:10 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, Neil Horman

[-- Attachment #1: Type: text/plain, Size: 1446 bytes --]

On 2015/02/18 10:54, Tetsuya Mukawa wrote:
> On 2015/02/18 9:31, Thomas Monjalon wrote:
>> 2015-02-17 15:14, Tetsuya Mukawa:
>>> On 2015/02/17 9:36, Thomas Monjalon wrote:
>>>> 2015-02-16 13:14, Tetsuya Mukawa:
>>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
>>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
>>> as port id.
>>> If someone reports it doesn't enough, I guess it will be the time to
>>> write a patch to change all uint_8 in one patch.
>> It's a big ABI breakage. So if we feel it's going to be required,
>> it's better to do it now in 2.0 release I think.
>>
>> Any opinion?
>>
> Hi Thomas,
>
> I agree with it.
> I will add an one more patch to change uint8_t to uint16_t.
>
> Thanks,
> Tetsuya
>

Hi Thomas,

Could I make sure.
After changing uint8_t to uint16_t in "rte_ethdev.[ch]", must I also
need to change other applications and libraries that call ethdev APIs?
If so, I would not finish it by 23rd.

I've counted how many lines call ethdev APIs that are related to port_id.
Could you please check an attached file?
It's over 1200 lines. Probably to fix  one of caller, I will need to
check how port_id is used, and fix more related lines. So probably
thousands lines may need to be fixed.

When is deadline for fixing this changing?
Also, if you have a good idea to fix it easier, could you please let me
know?

Thanks,
Tetsuya


[-- Attachment #2: caller.txt --]
[-- Type: text/plain, Size: 72070 bytes --]

rte_eth_dev_configure	app/test-pipeline/init.c	240
rte_eth_dev_configure	app/test-pmd/testpmd.c	1304
rte_eth_dev_configure	app/test/test_kni.c	523
rte_eth_dev_configure	app/test/test_link_bonding.c	238
rte_eth_dev_configure	app/test/test_link_bonding.c	240
rte_eth_dev_configure	app/test/test_pmd_perf.c	751
rte_eth_dev_configure	app/test/test_pmd_ring.c	67
rte_eth_dev_configure	app/test/test_pmd_ring.c	73
rte_eth_dev_configure	app/test/test_pmd_ring.c	77
rte_eth_dev_configure	app/test/test_pmd_ring.c	81
rte_eth_dev_configure	app/test/test_pmd_ring.c	256
rte_eth_dev_configure	app/test/test_pmd_ring.c	257
rte_eth_dev_configure	examples/distributor/main.c	125
rte_eth_dev_configure	examples/dpdk_qat/main.c	726
rte_eth_dev_configure	examples/exception_path/main.c	433
rte_eth_dev_configure	examples/ip_fragmentation/main.c	890
rte_eth_dev_configure	examples/ip_pipeline/init.c	486
rte_eth_dev_configure	examples/ip_reassembly/main.c	1095
rte_eth_dev_configure	examples/ipv4_multicast/main.c	755
rte_eth_dev_configure	examples/kni/main.c	617
rte_eth_dev_configure	examples/kni/main.c	725
rte_eth_dev_configure	examples/l2fwd-ivshmem/host/host.c	745
rte_eth_dev_configure	examples/l2fwd/main.c	650
rte_eth_dev_configure	examples/l3fwd-acl/main.c	1991
rte_eth_dev_configure	examples/l3fwd-power/main.c	1534
rte_eth_dev_configure	examples/l3fwd-vf/main.c	1013
rte_eth_dev_configure	examples/l3fwd/main.c	2457
rte_eth_dev_configure	examples/link_status_interrupt/main.c	696
rte_eth_dev_configure	examples/link_status_interrupt/main.c	702
rte_eth_dev_configure	examples/load_balancer/init.c	446
rte_eth_dev_configure	examples/multi_process/client_server_mp/mp_server/init.c	144
rte_eth_dev_configure	examples/multi_process/l2fwd_fork/main.c	1121
rte_eth_dev_configure	examples/multi_process/symmetric_mp/main.c	248
rte_eth_dev_configure	examples/netmap_compat/lib/compat_netmap.c	706
rte_eth_dev_configure	examples/qos_meter/main.c	370
rte_eth_dev_configure	examples/qos_meter/main.c	386
rte_eth_dev_configure	examples/qos_sched/init.c	129
rte_eth_dev_configure	examples/quota_watermark/qw/init.c	82
rte_eth_dev_configure	examples/skeleton/basicfwd.c	69
rte_eth_dev_configure	examples/vhost/main.c	442
rte_eth_dev_configure	examples/vhost_xen/main.c	306
rte_eth_dev_configure	examples/vmdq/main.c	254
rte_eth_dev_configure	examples/vmdq_dcb/main.c	177
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.c	728
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	91
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	102
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	1726
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	1744
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	1783
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	1844
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	1860
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	1877
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	1893
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	2112
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	2133
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	2225
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	2377
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	2492
rte_eth_dev_configure	lib/librte_ether/rte_ethdev.h	2504
rte_eth_dev_configure	lib/librte_pmd_bond/rte_eth_bond_pmd.c	950
rte_eth_rx_queue_setup	app/test-pipeline/init.c	251
rte_eth_rx_queue_setup	app/test-pmd/testpmd.c	1359
rte_eth_rx_queue_setup	app/test-pmd/testpmd.c	1364
rte_eth_rx_queue_setup	app/test/test_kni.c	529
rte_eth_rx_queue_setup	app/test/test_link_bonding.c	243
rte_eth_rx_queue_setup	app/test/test_link_bonding.c	246
rte_eth_rx_queue_setup	app/test/test_pmd_perf.c	772
rte_eth_rx_queue_setup	app/test/test_pmd_perf.c	776
rte_eth_rx_queue_setup	app/test/test_pmd_ring.c	90
rte_eth_rx_queue_setup	app/test/test_pmd_ring.c	99
rte_eth_rx_queue_setup	app/test/test_pmd_ring.c	268
rte_eth_rx_queue_setup	app/test/test_pmd_ring.c	269
rte_eth_rx_queue_setup	examples/distributor/main.c	130
rte_eth_rx_queue_setup	examples/dpdk_qat/main.c	768
rte_eth_rx_queue_setup	examples/dpdk_qat/main.c	773
rte_eth_rx_queue_setup	examples/exception_path/main.c	438
rte_eth_rx_queue_setup	examples/ip_fragmentation/main.c	900
rte_eth_rx_queue_setup	examples/ip_fragmentation/main.c	905
rte_eth_rx_queue_setup	examples/ip_pipeline/init.c	496
rte_eth_rx_queue_setup	examples/ip_reassembly/main.c	1105
rte_eth_rx_queue_setup	examples/ip_reassembly/main.c	1110
rte_eth_rx_queue_setup	examples/ipv4_multicast/main.c	769
rte_eth_rx_queue_setup	examples/kni/main.c	622
rte_eth_rx_queue_setup	examples/l2fwd-ivshmem/host/host.c	754
rte_eth_rx_queue_setup	examples/l2fwd-ivshmem/host/host.c	759
rte_eth_rx_queue_setup	examples/l2fwd/main.c	659
rte_eth_rx_queue_setup	examples/l2fwd/main.c	664
rte_eth_rx_queue_setup	examples/l3fwd-acl/main.c	2060
rte_eth_rx_queue_setup	examples/l3fwd-acl/main.c	2065
rte_eth_rx_queue_setup	examples/l3fwd-power/main.c	1616
rte_eth_rx_queue_setup	examples/l3fwd-power/main.c	1621
rte_eth_rx_queue_setup	examples/l3fwd-vf/main.c	1066
rte_eth_rx_queue_setup	examples/l3fwd-vf/main.c	1070
rte_eth_rx_queue_setup	examples/l3fwd/main.c	2530
rte_eth_rx_queue_setup	examples/l3fwd/main.c	2535
rte_eth_rx_queue_setup	examples/link_status_interrupt/main.c	714
rte_eth_rx_queue_setup	examples/link_status_interrupt/main.c	719
rte_eth_rx_queue_setup	examples/load_balancer/init.c	469
rte_eth_rx_queue_setup	examples/multi_process/client_server_mp/mp_server/init.c	149
rte_eth_rx_queue_setup	examples/multi_process/l2fwd_fork/main.c	1130
rte_eth_rx_queue_setup	examples/multi_process/l2fwd_fork/main.c	1135
rte_eth_rx_queue_setup	examples/multi_process/symmetric_mp/main.c	253
rte_eth_rx_queue_setup	examples/netmap_compat/lib/compat_netmap.c	726
rte_eth_rx_queue_setup	examples/qos_meter/main.c	374
rte_eth_rx_queue_setup	examples/qos_meter/main.c	390
rte_eth_rx_queue_setup	examples/qos_sched/init.c	136
rte_eth_rx_queue_setup	examples/quota_watermark/qw/init.c	88
rte_eth_rx_queue_setup	examples/skeleton/basicfwd.c	74
rte_eth_rx_queue_setup	examples/vhost/main.c	448
rte_eth_rx_queue_setup	examples/vhost_xen/main.c	315
rte_eth_rx_queue_setup	examples/vmdq/main.c	262
rte_eth_rx_queue_setup	examples/vmdq_dcb/main.c	182
rte_eth_rx_queue_setup	lib/librte_ether/rte_ethdev.c	1043
rte_eth_rx_queue_setup	lib/librte_ether/rte_ethdev.h	93
rte_eth_rx_queue_setup	lib/librte_ether/rte_ethdev.h	103
rte_eth_rx_queue_setup	lib/librte_ether/rte_ethdev.h	1770
rte_eth_rx_queue_setup	lib/librte_pmd_bond/rte_eth_bond_pmd.c	964
rte_eth_rx_queue_setup	lib/librte_pmd_bond/rte_eth_bond_pmd.c	970
rte_eth_tx_queue_setup	app/test-pipeline/init.c	263
rte_eth_tx_queue_setup	app/test-pmd/testpmd.c	1323
rte_eth_tx_queue_setup	app/test-pmd/testpmd.c	1327
rte_eth_tx_queue_setup	app/test/test_kni.c	535
rte_eth_tx_queue_setup	app/test/test_link_bonding.c	249
rte_eth_tx_queue_setup	app/test/test_link_bonding.c	251
rte_eth_tx_queue_setup	app/test/test_pmd_perf.c	764
rte_eth_tx_queue_setup	app/test/test_pmd_perf.c	768
rte_eth_tx_queue_setup	app/test/test_pmd_ring.c	86
rte_eth_tx_queue_setup	app/test/test_pmd_ring.c	95
rte_eth_tx_queue_setup	app/test/test_pmd_ring.c	262
rte_eth_tx_queue_setup	app/test/test_pmd_ring.c	263
rte_eth_tx_queue_setup	examples/distributor/main.c	138
rte_eth_tx_queue_setup	examples/dpdk_qat/main.c	742
rte_eth_tx_queue_setup	examples/dpdk_qat/main.c	746
rte_eth_tx_queue_setup	examples/exception_path/main.c	445
rte_eth_tx_queue_setup	examples/ip_fragmentation/main.c	927
rte_eth_tx_queue_setup	examples/ip_fragmentation/main.c	931
rte_eth_tx_queue_setup	examples/ip_pipeline/init.c	508
rte_eth_tx_queue_setup	examples/ip_reassembly/main.c	1134
rte_eth_tx_queue_setup	examples/ip_reassembly/main.c	1137
rte_eth_tx_queue_setup	examples/ipv4_multicast/main.c	774
rte_eth_tx_queue_setup	examples/ipv4_multicast/main.c	789
rte_eth_tx_queue_setup	examples/ipv4_multicast/main.c	792
rte_eth_tx_queue_setup	examples/kni/main.c	628
rte_eth_tx_queue_setup	examples/l2fwd-ivshmem/host/host.c	764
rte_eth_tx_queue_setup	examples/l2fwd-ivshmem/host/host.c	768
rte_eth_tx_queue_setup	examples/l2fwd/main.c	669
rte_eth_tx_queue_setup	examples/l2fwd/main.c	673
rte_eth_tx_queue_setup	examples/l3fwd-acl/main.c	2026
rte_eth_tx_queue_setup	examples/l3fwd-acl/main.c	2030
rte_eth_tx_queue_setup	examples/l3fwd-power/main.c	1568
rte_eth_tx_queue_setup	examples/l3fwd-power/main.c	1572
rte_eth_tx_queue_setup	examples/l3fwd-vf/main.c	1036
rte_eth_tx_queue_setup	examples/l3fwd-vf/main.c	1039
rte_eth_tx_queue_setup	examples/l3fwd/main.c	2498
rte_eth_tx_queue_setup	examples/l3fwd/main.c	2501
rte_eth_tx_queue_setup	examples/link_status_interrupt/main.c	724
rte_eth_tx_queue_setup	examples/link_status_interrupt/main.c	728
rte_eth_tx_queue_setup	examples/load_balancer/init.c	490
rte_eth_tx_queue_setup	examples/multi_process/client_server_mp/mp_server/init.c	156
rte_eth_tx_queue_setup	examples/multi_process/l2fwd_fork/main.c	1140
rte_eth_tx_queue_setup	examples/multi_process/l2fwd_fork/main.c	1144
rte_eth_tx_queue_setup	examples/multi_process/symmetric_mp/main.c	262
rte_eth_tx_queue_setup	examples/netmap_compat/lib/compat_netmap.c	715
rte_eth_tx_queue_setup	examples/qos_meter/main.c	380
rte_eth_tx_queue_setup	examples/qos_meter/main.c	396
rte_eth_tx_queue_setup	examples/qos_sched/init.c	139
rte_eth_tx_queue_setup	examples/qos_sched/init.c	144
rte_eth_tx_queue_setup	examples/qos_sched/init.c	147
rte_eth_tx_queue_setup	examples/quota_watermark/qw/init.c	97
rte_eth_tx_queue_setup	examples/skeleton/basicfwd.c	81
rte_eth_tx_queue_setup	examples/vhost/main.c	456
rte_eth_tx_queue_setup	examples/vhost_xen/main.c	322
rte_eth_tx_queue_setup	examples/vmdq/main.c	273
rte_eth_tx_queue_setup	examples/vmdq_dcb/main.c	191
rte_eth_tx_queue_setup	lib/librte_ether/rte_ethdev.c	1121
rte_eth_tx_queue_setup	lib/librte_ether/rte_ethdev.h	92
rte_eth_tx_queue_setup	lib/librte_ether/rte_ethdev.h	102
rte_eth_tx_queue_setup	lib/librte_ether/rte_ethdev.h	1818
rte_eth_tx_queue_setup	lib/librte_pmd_bond/rte_eth_bond_pmd.c	980
rte_eth_tx_queue_setup	lib/librte_pmd_bond/rte_eth_bond_pmd.c	986
rte_eth_dev_socket_id	app/test-pipeline/init.c	255
rte_eth_dev_socket_id	app/test-pipeline/init.c	267
rte_eth_dev_socket_id	app/test-pmd/testpmd.c	571
rte_eth_dev_socket_id	app/test-pmd/testpmd.c	672
rte_eth_dev_socket_id	app/test/test_link_bonding.c	244
rte_eth_dev_socket_id	app/test/test_link_bonding.c	250
rte_eth_dev_socket_id	app/test/test_pmd_perf.c	735
rte_eth_dev_socket_id	app/test/test_pmd_perf.c	745
rte_eth_dev_socket_id	app/test/test_pmd_perf.c	826
rte_eth_dev_socket_id	examples/distributor/main.c	131
rte_eth_dev_socket_id	examples/distributor/main.c	139
rte_eth_dev_socket_id	examples/distributor/main.c	212
rte_eth_dev_socket_id	examples/distributor/main.c	213
rte_eth_dev_socket_id	examples/distributor/main.c	312
rte_eth_dev_socket_id	examples/distributor/main.c	313
rte_eth_dev_socket_id	examples/exception_path/main.c	438
rte_eth_dev_socket_id	examples/exception_path/main.c	445
rte_eth_dev_socket_id	examples/ip_pipeline/init.c	500
rte_eth_dev_socket_id	examples/ip_pipeline/init.c	512
rte_eth_dev_socket_id	examples/ipv4_multicast/main.c	770
rte_eth_dev_socket_id	examples/kni/main.c	623
rte_eth_dev_socket_id	examples/kni/main.c	629
rte_eth_dev_socket_id	examples/l2fwd-ivshmem/host/host.c	755
rte_eth_dev_socket_id	examples/l2fwd-ivshmem/host/host.c	765
rte_eth_dev_socket_id	examples/l2fwd/main.c	660
rte_eth_dev_socket_id	examples/l2fwd/main.c	670
rte_eth_dev_socket_id	examples/link_status_interrupt/main.c	715
rte_eth_dev_socket_id	examples/link_status_interrupt/main.c	725
rte_eth_dev_socket_id	examples/multi_process/client_server_mp/mp_server/init.c	150
rte_eth_dev_socket_id	examples/multi_process/client_server_mp/mp_server/init.c	157
rte_eth_dev_socket_id	examples/multi_process/l2fwd_fork/main.c	1131
rte_eth_dev_socket_id	examples/multi_process/l2fwd_fork/main.c	1141
rte_eth_dev_socket_id	examples/multi_process/symmetric_mp/main.c	254
rte_eth_dev_socket_id	examples/multi_process/symmetric_mp/main.c	263
rte_eth_dev_socket_id	examples/qos_meter/main.c	375
rte_eth_dev_socket_id	examples/qos_meter/main.c	381
rte_eth_dev_socket_id	examples/qos_meter/main.c	391
rte_eth_dev_socket_id	examples/qos_meter/main.c	397
rte_eth_dev_socket_id	examples/qos_sched/init.c	137
rte_eth_dev_socket_id	examples/qos_sched/init.c	145
rte_eth_dev_socket_id	examples/qos_sched/init.c	343
rte_eth_dev_socket_id	examples/quota_watermark/qw/init.c	89
rte_eth_dev_socket_id	examples/quota_watermark/qw/init.c	98
rte_eth_dev_socket_id	examples/skeleton/basicfwd.c	75
rte_eth_dev_socket_id	examples/skeleton/basicfwd.c	82
rte_eth_dev_socket_id	examples/skeleton/basicfwd.c	115
rte_eth_dev_socket_id	examples/skeleton/basicfwd.c	116
rte_eth_dev_socket_id	examples/vhost/main.c	449
rte_eth_dev_socket_id	examples/vhost/main.c	457
rte_eth_dev_socket_id	examples/vhost_xen/main.c	316
rte_eth_dev_socket_id	examples/vhost_xen/main.c	323
rte_eth_dev_socket_id	examples/vmdq/main.c	263
rte_eth_dev_socket_id	examples/vmdq/main.c	274
rte_eth_dev_socket_id	examples/vmdq_dcb/main.c	183
rte_eth_dev_socket_id	examples/vmdq_dcb/main.c	192
rte_eth_dev_socket_id	lib/librte_ether/rte_ethdev.c	345
rte_eth_dev_socket_id	lib/librte_ether/rte_ethdev.h	1832
rte_eth_dev_socket_id	lib/librte_pmd_bond/rte_eth_bond_pmd.c	966
rte_eth_dev_socket_id	lib/librte_pmd_bond/rte_eth_bond_pmd.c	982
rte_eth_dev_rx_queue_start	app/test-pmd/cmdline.c	1671
rte_eth_dev_rx_queue_start	examples/vhost/main.c	2684
rte_eth_dev_rx_queue_start	lib/librte_ether/rte_ethdev.c	397
rte_eth_dev_rx_queue_start	lib/librte_ether/rte_ethdev.h	1850
rte_eth_dev_rx_queue_stop	app/test-pmd/cmdline.c	1673
rte_eth_dev_rx_queue_stop	examples/vhost/main.c	2377
rte_eth_dev_rx_queue_stop	lib/librte_ether/rte_ethdev.c	423
rte_eth_dev_rx_queue_stop	lib/librte_ether/rte_ethdev.h	1866
rte_eth_dev_tx_queue_start	app/test-pmd/cmdline.c	1675
rte_eth_dev_tx_queue_start	examples/vhost/main.c	2670
rte_eth_dev_tx_queue_start	lib/librte_ether/rte_ethdev.c	449
rte_eth_dev_tx_queue_start	lib/librte_ether/rte_ethdev.h	1883
rte_eth_dev_tx_queue_stop	app/test-pmd/cmdline.c	1677
rte_eth_dev_tx_queue_stop	examples/vhost/main.c	2393
rte_eth_dev_tx_queue_stop	examples/vhost/main.c	2693
rte_eth_dev_tx_queue_stop	lib/librte_ether/rte_ethdev.c	475
rte_eth_dev_tx_queue_stop	lib/librte_ether/rte_ethdev.h	1899
rte_eth_dev_start	app/test-pipeline/init.c	274
rte_eth_dev_start	app/test-pmd/testpmd.c	1386
rte_eth_dev_start	app/test/test_kni.c	541
rte_eth_dev_start	app/test/test_link_bonding.c	254
rte_eth_dev_start	app/test/test_link_bonding.c	255
rte_eth_dev_start	app/test/test_link_bonding.c	619
rte_eth_dev_start	app/test/test_link_bonding.c	808
rte_eth_dev_start	app/test/test_link_bonding.c	1008
rte_eth_dev_start	app/test/test_link_bonding.c	1049
rte_eth_dev_start	app/test/test_link_bonding.c	1140
rte_eth_dev_start	app/test/test_link_bonding.c	1764
rte_eth_dev_start	app/test/test_link_bonding.c	2362
rte_eth_dev_start	app/test/test_link_bonding.c	3264
rte_eth_dev_start	app/test/test_link_bonding.c	3843
rte_eth_dev_start	app/test/test_link_bonding.c	4344
rte_eth_dev_start	app/test/test_pmd_perf.c	781
rte_eth_dev_start	app/test/test_pmd_perf.c	784
rte_eth_dev_start	app/test/test_pmd_ring.c	105
rte_eth_dev_start	app/test/test_pmd_ring.c	109
rte_eth_dev_start	app/test/test_pmd_ring.c	113
rte_eth_dev_start	app/test/test_pmd_ring.c	274
rte_eth_dev_start	app/test/test_pmd_ring.c	275
rte_eth_dev_start	examples/distributor/main.c	145
rte_eth_dev_start	examples/dpdk_qat/main.c	785
rte_eth_dev_start	examples/dpdk_qat/main.c	787
rte_eth_dev_start	examples/exception_path/main.c	451
rte_eth_dev_start	examples/ip_fragmentation/main.c	951
rte_eth_dev_start	examples/ip_fragmentation/main.c	953
rte_eth_dev_start	examples/ip_pipeline/init.c	519
rte_eth_dev_start	examples/ip_reassembly/main.c	1156
rte_eth_dev_start	examples/ip_reassembly/main.c	1158
rte_eth_dev_start	examples/ipv4_multicast/main.c	801
rte_eth_dev_start	examples/ipv4_multicast/main.c	803
rte_eth_dev_start	examples/kni/main.c	634
rte_eth_dev_start	examples/kni/main.c	732
rte_eth_dev_start	examples/kni/main.c	757
rte_eth_dev_start	examples/l2fwd-ivshmem/host/host.c	772
rte_eth_dev_start	examples/l2fwd-ivshmem/host/host.c	774
rte_eth_dev_start	examples/l2fwd/main.c	677
rte_eth_dev_start	examples/l2fwd/main.c	679
rte_eth_dev_start	examples/l3fwd-acl/main.c	2078
rte_eth_dev_start	examples/l3fwd-acl/main.c	2081
rte_eth_dev_start	examples/l3fwd-power/main.c	1634
rte_eth_dev_start	examples/l3fwd-power/main.c	1636
rte_eth_dev_start	examples/l3fwd-vf/main.c	1082
rte_eth_dev_start	examples/l3fwd-vf/main.c	1084
rte_eth_dev_start	examples/l3fwd/main.c	2548
rte_eth_dev_start	examples/l3fwd/main.c	2550
rte_eth_dev_start	examples/link_status_interrupt/main.c	732
rte_eth_dev_start	examples/link_status_interrupt/main.c	734
rte_eth_dev_start	examples/load_balancer/init.c	504
rte_eth_dev_start	examples/multi_process/client_server_mp/mp_server/init.c	164
rte_eth_dev_start	examples/multi_process/l2fwd_fork/main.c	450
rte_eth_dev_start	examples/multi_process/l2fwd_fork/main.c	1148
rte_eth_dev_start	examples/multi_process/l2fwd_fork/main.c	1150
rte_eth_dev_start	examples/multi_process/symmetric_mp/main.c	271
rte_eth_dev_start	examples/netmap_compat/lib/compat_netmap.c	371
rte_eth_dev_start	examples/qos_meter/main.c	402
rte_eth_dev_start	examples/qos_meter/main.c	406
rte_eth_dev_start	examples/qos_sched/init.c	151
rte_eth_dev_start	examples/quota_watermark/qw/init.c	111
rte_eth_dev_start	examples/skeleton/basicfwd.c	87
rte_eth_dev_start	examples/vhost/main.c	464
rte_eth_dev_start	examples/vhost_xen/main.c	330
rte_eth_dev_start	examples/vmdq/main.c	282
rte_eth_dev_start	examples/vmdq_dcb/main.c	198
rte_eth_dev_start	lib/librte_ether/rte_ethdev.c	916
rte_eth_dev_start	lib/librte_ether/rte_ethdev.h	94
rte_eth_dev_start	lib/librte_ether/rte_ethdev.h	104
rte_eth_dev_start	lib/librte_ether/rte_ethdev.h	109
rte_eth_dev_start	lib/librte_ether/rte_ethdev.h	121
rte_eth_dev_start	lib/librte_ether/rte_ethdev.h	633
rte_eth_dev_start	lib/librte_ether/rte_ethdev.h	654
rte_eth_dev_start	lib/librte_ether/rte_ethdev.h	1918
rte_eth_dev_start	lib/librte_ether/rte_ethdev.h	1922
rte_eth_dev_start	lib/librte_pmd_bond/rte_eth_bond_pmd.c	993
rte_eth_dev_start	lib/librte_pmd_bond/rte_eth_bond_pmd.c	995
rte_eth_dev_stop	app/test-pmd/testpmd.c	1446
rte_eth_dev_stop	app/test/test_kni.c	675
rte_eth_dev_stop	app/test/test_link_bonding.c	666
rte_eth_dev_stop	app/test/test_link_bonding.c	696
rte_eth_dev_stop	app/test/test_link_bonding.c	806
rte_eth_dev_stop	app/test/test_link_bonding.c	1048
rte_eth_dev_stop	app/test/test_link_bonding.c	1079
rte_eth_dev_stop	app/test/test_link_bonding.c	1762
rte_eth_dev_stop	app/test/test_link_bonding.c	2360
rte_eth_dev_stop	app/test/test_link_bonding.c	3262
rte_eth_dev_stop	app/test/test_link_bonding.c	3841
rte_eth_dev_stop	app/test/test_link_bonding.c	4342
rte_eth_dev_stop	app/test/test_pmd_perf.c	829
rte_eth_dev_stop	app/test/test_pmd_ring.c	400
rte_eth_dev_stop	app/test/test_pmd_ring.c	401
rte_eth_dev_stop	app/test/test_pmd_ring.c	436
rte_eth_dev_stop	app/test/test_pmd_ring.c	437
rte_eth_dev_stop	app/test/test_pmd_ring.c	438
rte_eth_dev_stop	examples/kni/main.c	713
rte_eth_dev_stop	examples/kni/main.c	756
rte_eth_dev_stop	examples/kni/main.c	759
rte_eth_dev_stop	examples/kni/main.c	839
rte_eth_dev_stop	examples/multi_process/l2fwd_fork/main.c	440
rte_eth_dev_stop	examples/netmap_compat/lib/compat_netmap.c	417
rte_eth_dev_stop	examples/quota_watermark/qw/init.c	80
rte_eth_dev_stop	lib/librte_ether/rte_ethdev.c	953
rte_eth_dev_stop	lib/librte_ether/rte_ethdev.h	103
rte_eth_dev_stop	lib/librte_ether/rte_ethdev.h	109
rte_eth_dev_stop	lib/librte_ether/rte_ethdev.h	1927
rte_eth_dev_stop	lib/librte_pmd_bond/rte_eth_bond_pmd.c	943
rte_eth_dev_set_link_up	app/test-pmd/testpmd.c	1238
rte_eth_dev_set_link_up	lib/librte_ether/rte_ethdev.c	982
rte_eth_dev_set_link_up	lib/librte_ether/rte_ethdev.h	1942
rte_eth_dev_set_link_up	lib/librte_ether/rte_ethdev.h	1948
rte_eth_dev_set_link_down	app/test-pmd/testpmd.c	1245
rte_eth_dev_set_link_down	lib/librte_ether/rte_ethdev.c	1002
rte_eth_dev_set_link_down	lib/librte_ether/rte_ethdev.h	1953
rte_eth_dev_close	app/test-pmd/testpmd.c	1483
rte_eth_dev_close	app/test-pmd/testpmd.c	1528
rte_eth_dev_close	app/test/test_link_bonding.c	4028
rte_eth_dev_close	examples/l3fwd-vf/main.c	694
rte_eth_dev_close	lib/librte_ether/rte_ethdev.c	1022
rte_eth_dev_close	lib/librte_ether/rte_ethdev.h	124
rte_eth_dev_close	lib/librte_ether/rte_ethdev.h	1961
rte_eth_promiscuous_enable	app/test-pipeline/init.c	248
rte_eth_promiscuous_enable	app/test-pmd/cmdline.c	4043
rte_eth_promiscuous_enable	app/test-pmd/cmdline.c	4334
rte_eth_promiscuous_enable	app/test-pmd/cmdline.c	4341
rte_eth_promiscuous_enable	app/test-pmd/testpmd.c	1922
rte_eth_promiscuous_enable	app/test/test_kni.c	546
rte_eth_promiscuous_enable	app/test/test_link_bonding.c	1813
rte_eth_promiscuous_enable	app/test/test_link_bonding.c	2261
rte_eth_promiscuous_enable	app/test/test_link_bonding.c	3172
rte_eth_promiscuous_enable	app/test/test_link_bonding.c	3767
rte_eth_promiscuous_enable	app/test/test_link_bonding.c	4242
rte_eth_promiscuous_enable	app/test/test_pmd_perf.c	788
rte_eth_promiscuous_enable	examples/distributor/main.c	170
rte_eth_promiscuous_enable	examples/dpdk_qat/main.c	808
rte_eth_promiscuous_enable	examples/exception_path/main.c	455
rte_eth_promiscuous_enable	examples/ip_fragmentation/main.c	956
rte_eth_promiscuous_enable	examples/ip_pipeline/init.c	493
rte_eth_promiscuous_enable	examples/ip_reassembly/main.c	1161
rte_eth_promiscuous_enable	examples/kni/main.c	640
rte_eth_promiscuous_enable	examples/l2fwd-ivshmem/host/host.c	779
rte_eth_promiscuous_enable	examples/l2fwd/main.c	684
rte_eth_promiscuous_enable	examples/l3fwd-acl/main.c	2091
rte_eth_promiscuous_enable	examples/l3fwd-power/main.c	1646
rte_eth_promiscuous_enable	examples/l3fwd/main.c	2560
rte_eth_promiscuous_enable	examples/load_balancer/init.c	454
rte_eth_promiscuous_enable	examples/multi_process/client_server_mp/mp_server/init.c	162
rte_eth_promiscuous_enable	examples/multi_process/l2fwd_fork/main.c	1155
rte_eth_promiscuous_enable	examples/multi_process/symmetric_mp/main.c	269
rte_eth_promiscuous_enable	examples/netmap_compat/bridge/bridge.c	302
rte_eth_promiscuous_enable	examples/qos_meter/main.c	410
rte_eth_promiscuous_enable	examples/qos_meter/main.c	412
rte_eth_promiscuous_enable	examples/qos_sched/init.c	168
rte_eth_promiscuous_enable	examples/quota_watermark/qw/init.c	117
rte_eth_promiscuous_enable	examples/skeleton/basicfwd.c	100
rte_eth_promiscuous_enable	examples/vhost/main.c	471
rte_eth_promiscuous_enable	lib/librte_ether/rte_ethdev.c	904
rte_eth_promiscuous_enable	lib/librte_ether/rte_ethdev.c	1162
rte_eth_promiscuous_enable	lib/librte_ether/rte_ethdev.h	1969
rte_eth_promiscuous_enable	lib/librte_pmd_bond/rte_eth_bond_8023ad.c	873
rte_eth_promiscuous_enable	lib/librte_pmd_bond/rte_eth_bond_pmd.c	1417
rte_eth_promiscuous_enable	lib/librte_pmd_bond/rte_eth_bond_pmd.c	1426
rte_eth_promiscuous_disable	app/test-pmd/cmdline.c	4336
rte_eth_promiscuous_disable	app/test-pmd/cmdline.c	4343
rte_eth_promiscuous_disable	app/test/test_link_bonding.c	1828
rte_eth_promiscuous_disable	app/test/test_link_bonding.c	2282
rte_eth_promiscuous_disable	app/test/test_link_bonding.c	3185
rte_eth_promiscuous_disable	app/test/test_link_bonding.c	3781
rte_eth_promiscuous_disable	app/test/test_link_bonding.c	4263
rte_eth_promiscuous_disable	lib/librte_ether/rte_ethdev.c	906
rte_eth_promiscuous_disable	lib/librte_ether/rte_ethdev.c	1179
rte_eth_promiscuous_disable	lib/librte_ether/rte_ethdev.h	1977
rte_eth_promiscuous_disable	lib/librte_pmd_bond/rte_eth_bond_pmd.c	1446
rte_eth_promiscuous_disable	lib/librte_pmd_bond/rte_eth_bond_pmd.c	1455
rte_eth_promiscuous_get	app/test-pmd/config.c	331
rte_eth_promiscuous_get	app/test/test_link_bonding.c	1815
rte_eth_promiscuous_get	app/test/test_link_bonding.c	1821
rte_eth_promiscuous_get	app/test/test_link_bonding.c	1830
rte_eth_promiscuous_get	app/test/test_link_bonding.c	1836
rte_eth_promiscuous_get	app/test/test_link_bonding.c	2263
rte_eth_promiscuous_get	app/test/test_link_bonding.c	2268
rte_eth_promiscuous_get	app/test/test_link_bonding.c	2284
rte_eth_promiscuous_get	app/test/test_link_bonding.c	2289
rte_eth_promiscuous_get	app/test/test_link_bonding.c	3174
rte_eth_promiscuous_get	app/test/test_link_bonding.c	3179
rte_eth_promiscuous_get	app/test/test_link_bonding.c	3187
rte_eth_promiscuous_get	app/test/test_link_bonding.c	3192
rte_eth_promiscuous_get	app/test/test_link_bonding.c	3770
rte_eth_promiscuous_get	app/test/test_link_bonding.c	3775
rte_eth_promiscuous_get	app/test/test_link_bonding.c	3783
rte_eth_promiscuous_get	app/test/test_link_bonding.c	3788
rte_eth_promiscuous_get	app/test/test_link_bonding.c	4244
rte_eth_promiscuous_get	app/test/test_link_bonding.c	4249
rte_eth_promiscuous_get	app/test/test_link_bonding.c	4265
rte_eth_promiscuous_get	app/test/test_link_bonding.c	4271
rte_eth_promiscuous_get	lib/librte_ether/rte_ethdev.c	903
rte_eth_promiscuous_get	lib/librte_ether/rte_ethdev.c	905
rte_eth_promiscuous_get	lib/librte_ether/rte_ethdev.c	1196
rte_eth_promiscuous_get	lib/librte_ether/rte_ethdev.h	1989
rte_eth_allmulticast_enable	app/test-pmd/cmdline.c	4414
rte_eth_allmulticast_enable	app/test-pmd/cmdline.c	4421
rte_eth_allmulticast_enable	lib/librte_ether/rte_ethdev.c	910
rte_eth_allmulticast_enable	lib/librte_ether/rte_ethdev.c	1210
rte_eth_allmulticast_enable	lib/librte_ether/rte_ethdev.h	1997
rte_eth_allmulticast_disable	app/test-pmd/cmdline.c	4416
rte_eth_allmulticast_disable	app/test-pmd/cmdline.c	4423
rte_eth_allmulticast_disable	lib/librte_ether/rte_ethdev.c	912
rte_eth_allmulticast_disable	lib/librte_ether/rte_ethdev.c	1227
rte_eth_allmulticast_disable	lib/librte_ether/rte_ethdev.h	2005
rte_eth_allmulticast_get	app/test-pmd/config.c	333
rte_eth_allmulticast_get	lib/librte_ether/rte_ethdev.c	909
rte_eth_allmulticast_get	lib/librte_ether/rte_ethdev.c	911
rte_eth_allmulticast_get	lib/librte_ether/rte_ethdev.c	1244
rte_eth_allmulticast_get	lib/librte_ether/rte_ethdev.h	2017
rte_eth_link_get	app/test-pipeline/init.c	212
rte_eth_link_get	app/test-pmd/config.c	311
rte_eth_link_get	app/test-pmd/config.c	2134
rte_eth_link_get	app/test-pmd/config.c	2159
rte_eth_link_get	app/test-pmd/testpmd.c	1559
rte_eth_link_get	app/test/test_link_bonding.c	650
rte_eth_link_get	app/test/test_link_bonding.c	668
rte_eth_link_get	app/test/test_pmd_perf.c	179
rte_eth_link_get	app/test/test_pmd_ring.c	118
rte_eth_link_get	app/test/test_pmd_ring.c	119
rte_eth_link_get	app/test/test_pmd_ring.c	120
rte_eth_link_get	examples/distributor/main.c	150
rte_eth_link_get	examples/distributor/main.c	153
rte_eth_link_get	examples/dpdk_qat/main.c	793
rte_eth_link_get	examples/exception_path/main.c	475
rte_eth_link_get	examples/ip_fragmentation/main.c	623
rte_eth_link_get	examples/ip_pipeline/init.c	458
rte_eth_link_get	examples/ip_reassembly/main.c	752
rte_eth_link_get	examples/ipv4_multicast/main.c	631
rte_eth_link_get	examples/kni/main.c	660
rte_eth_link_get	examples/l2fwd-ivshmem/host/host.c	361
rte_eth_link_get	examples/l2fwd/main.c	501
rte_eth_link_get	examples/l3fwd-acl/main.c	1890
rte_eth_link_get	examples/l3fwd-power/main.c	1429
rte_eth_link_get	examples/l3fwd/main.c	2360
rte_eth_link_get	examples/link_status_interrupt/main.c	176
rte_eth_link_get	examples/link_status_interrupt/main.c	528
rte_eth_link_get	examples/link_status_interrupt/main.c	555
rte_eth_link_get	examples/load_balancer/init.c	386
rte_eth_link_get	examples/multi_process/client_server_mp/mp_server/init.c	220
rte_eth_link_get	examples/multi_process/l2fwd_fork/main.c	924
rte_eth_link_get	examples/multi_process/symmetric_mp/main.c	378
rte_eth_link_get	examples/qos_sched/init.c	159
rte_eth_link_get	examples/qos_sched/init.c	247
rte_eth_link_get	lib/librte_ether/rte_ethdev.c	1272
rte_eth_link_get	lib/librte_ether/rte_ethdev.c	1293
rte_eth_link_get	lib/librte_ether/rte_ethdev.h	2030
rte_eth_link_get	lib/librte_ether/rte_ethdev.h	2035
rte_eth_link_get	lib/librte_ether/rte_ethdev.h	2043
rte_eth_link_get	lib/librte_pmd_bond/rte_eth_bond_8023ad.c	758
rte_eth_link_get	lib/librte_pmd_bond/rte_eth_bond_api.c	416
rte_eth_link_get	lib/librte_pmd_bond/rte_eth_bond_pmd.c	425
rte_eth_link_get	lib/librte_pmd_bond/rte_eth_bond_pmd.c	1511
rte_eth_link_get_nowait	app/test-pipeline/init.c	212
rte_eth_link_get_nowait	app/test-pmd/config.c	311
rte_eth_link_get_nowait	app/test-pmd/config.c	2134
rte_eth_link_get_nowait	app/test-pmd/config.c	2159
rte_eth_link_get_nowait	app/test-pmd/testpmd.c	1559
rte_eth_link_get_nowait	app/test/test_pmd_perf.c	179
rte_eth_link_get_nowait	examples/distributor/main.c	150
rte_eth_link_get_nowait	examples/distributor/main.c	153
rte_eth_link_get_nowait	examples/exception_path/main.c	475
rte_eth_link_get_nowait	examples/ip_fragmentation/main.c	623
rte_eth_link_get_nowait	examples/ip_pipeline/init.c	458
rte_eth_link_get_nowait	examples/ip_reassembly/main.c	752
rte_eth_link_get_nowait	examples/ipv4_multicast/main.c	631
rte_eth_link_get_nowait	examples/kni/main.c	660
rte_eth_link_get_nowait	examples/l2fwd-ivshmem/host/host.c	361
rte_eth_link_get_nowait	examples/l2fwd/main.c	501
rte_eth_link_get_nowait	examples/l3fwd-acl/main.c	1890
rte_eth_link_get_nowait	examples/l3fwd-power/main.c	1429
rte_eth_link_get_nowait	examples/l3fwd/main.c	2360
rte_eth_link_get_nowait	examples/link_status_interrupt/main.c	176
rte_eth_link_get_nowait	examples/link_status_interrupt/main.c	528
rte_eth_link_get_nowait	examples/link_status_interrupt/main.c	555
rte_eth_link_get_nowait	examples/load_balancer/init.c	386
rte_eth_link_get_nowait	examples/multi_process/client_server_mp/mp_server/init.c	220
rte_eth_link_get_nowait	examples/multi_process/l2fwd_fork/main.c	924
rte_eth_link_get_nowait	examples/multi_process/symmetric_mp/main.c	378
rte_eth_link_get_nowait	lib/librte_ether/rte_ethdev.c	1293
rte_eth_link_get_nowait	lib/librte_ether/rte_ethdev.h	2043
rte_eth_link_get_nowait	lib/librte_pmd_bond/rte_eth_bond_api.c	416
rte_eth_link_get_nowait	lib/librte_pmd_bond/rte_eth_bond_pmd.c	1511
rte_eth_stats_get	app/test-pmd/config.c	134
rte_eth_stats_get	app/test-pmd/testpmd.c	1052
rte_eth_stats_get	app/test-pmd/testpmd.c	1182
rte_eth_stats_get	app/test/test_link_bonding.c	1397
rte_eth_stats_get	app/test/test_link_bonding.c	1405
rte_eth_stats_get	app/test/test_link_bonding.c	1518
rte_eth_stats_get	app/test/test_link_bonding.c	1532
rte_eth_stats_get	app/test/test_link_bonding.c	1593
rte_eth_stats_get	app/test/test_link_bonding.c	1604
rte_eth_stats_get	app/test/test_link_bonding.c	1679
rte_eth_stats_get	app/test/test_link_bonding.c	1687
rte_eth_stats_get	app/test/test_link_bonding.c	1693
rte_eth_stats_get	app/test/test_link_bonding.c	1699
rte_eth_stats_get	app/test/test_link_bonding.c	1705
rte_eth_stats_get	app/test/test_link_bonding.c	1917
rte_eth_stats_get	app/test/test_link_bonding.c	1923
rte_eth_stats_get	app/test/test_link_bonding.c	1928
rte_eth_stats_get	app/test/test_link_bonding.c	1933
rte_eth_stats_get	app/test/test_link_bonding.c	1938
rte_eth_stats_get	app/test/test_link_bonding.c	1967
rte_eth_stats_get	app/test/test_link_bonding.c	2119
rte_eth_stats_get	app/test/test_link_bonding.c	2129
rte_eth_stats_get	app/test/test_link_bonding.c	2199
rte_eth_stats_get	app/test/test_link_bonding.c	2207
rte_eth_stats_get	app/test/test_link_bonding.c	2222
rte_eth_stats_get	app/test/test_link_bonding.c	2491
rte_eth_stats_get	app/test/test_link_bonding.c	2496
rte_eth_stats_get	app/test/test_link_bonding.c	2501
rte_eth_stats_get	app/test/test_link_bonding.c	2506
rte_eth_stats_get	app/test/test_link_bonding.c	2527
rte_eth_stats_get	app/test/test_link_bonding.c	2532
rte_eth_stats_get	app/test/test_link_bonding.c	2537
rte_eth_stats_get	app/test/test_link_bonding.c	2542
rte_eth_stats_get	app/test/test_link_bonding.c	2547
rte_eth_stats_get	app/test/test_link_bonding.c	2671
rte_eth_stats_get	app/test/test_link_bonding.c	2680
rte_eth_stats_get	app/test/test_link_bonding.c	2686
rte_eth_stats_get	app/test/test_link_bonding.c	2753
rte_eth_stats_get	app/test/test_link_bonding.c	2760
rte_eth_stats_get	app/test/test_link_bonding.c	2766
rte_eth_stats_get	app/test/test_link_bonding.c	2866
rte_eth_stats_get	app/test/test_link_bonding.c	2873
rte_eth_stats_get	app/test/test_link_bonding.c	2879
rte_eth_stats_get	app/test/test_link_bonding.c	3022
rte_eth_stats_get	app/test/test_link_bonding.c	3036
rte_eth_stats_get	app/test/test_link_bonding.c	3050
rte_eth_stats_get	app/test/test_link_bonding.c	3115
rte_eth_stats_get	app/test/test_link_bonding.c	3124
rte_eth_stats_get	app/test/test_link_bonding.c	3130
rte_eth_stats_get	app/test/test_link_bonding.c	3136
rte_eth_stats_get	app/test/test_link_bonding.c	3142
rte_eth_stats_get	app/test/test_link_bonding.c	3382
rte_eth_stats_get	app/test/test_link_bonding.c	3388
rte_eth_stats_get	app/test/test_link_bonding.c	3394
rte_eth_stats_get	app/test/test_link_bonding.c	3418
rte_eth_stats_get	app/test/test_link_bonding.c	3425
rte_eth_stats_get	app/test/test_link_bonding.c	3455
rte_eth_stats_get	app/test/test_link_bonding.c	3517
rte_eth_stats_get	app/test/test_link_bonding.c	3526
rte_eth_stats_get	app/test/test_link_bonding.c	3624
rte_eth_stats_get	app/test/test_link_bonding.c	3635
rte_eth_stats_get	app/test/test_link_bonding.c	3645
rte_eth_stats_get	app/test/test_link_bonding.c	3710
rte_eth_stats_get	app/test/test_link_bonding.c	3719
rte_eth_stats_get	app/test/test_link_bonding.c	3725
rte_eth_stats_get	app/test/test_link_bonding.c	3731
rte_eth_stats_get	app/test/test_link_bonding.c	3737
rte_eth_stats_get	app/test/test_link_bonding.c	3942
rte_eth_stats_get	app/test/test_link_bonding.c	3948
rte_eth_stats_get	app/test/test_link_bonding.c	3953
rte_eth_stats_get	app/test/test_link_bonding.c	3958
rte_eth_stats_get	app/test/test_link_bonding.c	3964
rte_eth_stats_get	app/test/test_link_bonding.c	3986
rte_eth_stats_get	app/test/test_link_bonding.c	4097
rte_eth_stats_get	app/test/test_link_bonding.c	4110
rte_eth_stats_get	app/test/test_link_bonding.c	4184
rte_eth_stats_get	app/test/test_link_bonding.c	4192
rte_eth_stats_get	app/test/test_link_bonding.c	4207
rte_eth_stats_get	app/test/test_link_bonding.c	4475
rte_eth_stats_get	app/test/test_link_bonding.c	4480
rte_eth_stats_get	app/test/test_link_bonding.c	4485
rte_eth_stats_get	app/test/test_link_bonding.c	4490
rte_eth_stats_get	app/test/test_link_bonding.c	4515
rte_eth_stats_get	app/test/test_pmd_perf.c	346
rte_eth_stats_get	app/test/test_pmd_ring.c	165
rte_eth_stats_get	app/test/test_pmd_ring.c	183
rte_eth_stats_get	app/test/test_pmd_ring.c	204
rte_eth_stats_get	app/test/test_pmd_ring.c	223
rte_eth_stats_get	app/test/test_pmd_ring.c	234
rte_eth_stats_get	app/test/test_pmd_ring.c	294
rte_eth_stats_get	app/test/test_pmd_ring.c	295
rte_eth_stats_get	app/test/test_pmd_ring.c	324
rte_eth_stats_get	app/test/test_pmd_ring.c	325
rte_eth_stats_get	app/test/test_pmd_ring.c	354
rte_eth_stats_get	app/test/test_pmd_ring.c	355
rte_eth_stats_get	app/test/test_pmd_ring.c	384
rte_eth_stats_get	app/test/test_pmd_ring.c	385
rte_eth_stats_get	examples/distributor/main.c	390
rte_eth_stats_get	examples/load_balancer/runtime.c	213
rte_eth_stats_get	examples/qos_sched/main.c	188
rte_eth_stats_get	examples/qos_sched/main.c	197
rte_eth_stats_get	lib/librte_ether/rte_ethdev.c	1314
rte_eth_stats_get	lib/librte_ether/rte_ethdev.c	1380
rte_eth_stats_get	lib/librte_ether/rte_ethdev.h	2061
rte_eth_stats_get	lib/librte_pmd_bond/rte_eth_bond_pmd.c	454
rte_eth_stats_get	lib/librte_pmd_bond/rte_eth_bond_pmd.c	1372
rte_eth_stats_reset	app/test-pmd/config.c	208
rte_eth_stats_reset	app/test/test_link_bonding.c	465
rte_eth_stats_reset	app/test/test_link_bonding.c	697
rte_eth_stats_reset	app/test/test_link_bonding.c	1169
rte_eth_stats_reset	app/test/test_link_bonding.c	1619
rte_eth_stats_reset	app/test/test_link_bonding.c	1622
rte_eth_stats_reset	app/test/test_link_bonding.c	1910
rte_eth_stats_reset	app/test/test_link_bonding.c	2239
rte_eth_stats_reset	app/test/test_link_bonding.c	3929
rte_eth_stats_reset	app/test/test_link_bonding.c	4220
rte_eth_stats_reset	app/test/test_pmd_ring.c	201
rte_eth_stats_reset	app/test/test_pmd_ring.c	231
rte_eth_stats_reset	lib/librte_ether/rte_ethdev.c	1332
rte_eth_stats_reset	lib/librte_ether/rte_ethdev.c	1442
rte_eth_stats_reset	lib/librte_ether/rte_ethdev.h	2069
rte_eth_stats_reset	lib/librte_pmd_bond/rte_eth_bond_pmd.c	1398
rte_eth_xstats_get	app/test-pmd/config.c	220
rte_eth_xstats_get	app/test-pmd/config.c	230
rte_eth_xstats_get	lib/librte_ether/rte_ethdev.c	1349
rte_eth_xstats_get	lib/librte_ether/rte_ethdev.h	2092
rte_eth_xstats_reset	app/test-pmd/config.c	244
rte_eth_xstats_reset	lib/librte_ether/rte_ethdev.c	1424
rte_eth_xstats_reset	lib/librte_ether/rte_ethdev.h	2101
rte_eth_dev_set_tx_queue_stats_mapping	app/test-pmd/testpmd.c	1605
rte_eth_dev_set_tx_queue_stats_mapping	lib/librte_ether/rte_ethdev.c	1465
rte_eth_dev_set_tx_queue_stats_mapping	lib/librte_ether/rte_ethdev.h	2120
rte_eth_dev_set_rx_queue_stats_mapping	app/test-pmd/testpmd.c	1628
rte_eth_dev_set_rx_queue_stats_mapping	lib/librte_ether/rte_ethdev.c	1474
rte_eth_dev_set_rx_queue_stats_mapping	lib/librte_ether/rte_ethdev.h	2141
rte_eth_macaddr_get	app/test-pmd/config.c	314
rte_eth_macaddr_get	app/test-pmd/testpmd.c	1401
rte_eth_macaddr_get	app/test-pmd/testpmd.c	1752
rte_eth_macaddr_get	app/test-pmd/testpmd.c	1879
rte_eth_macaddr_get	app/test/test_link_bonding.c	459
rte_eth_macaddr_get	app/test/test_link_bonding.c	816
rte_eth_macaddr_get	app/test/test_link_bonding.c	822
rte_eth_macaddr_get	app/test/test_link_bonding.c	830
rte_eth_macaddr_get	app/test/test_link_bonding.c	896
rte_eth_macaddr_get	app/test/test_link_bonding.c	902
rte_eth_macaddr_get	app/test/test_link_bonding.c	1017
rte_eth_macaddr_get	app/test/test_link_bonding.c	1022
rte_eth_macaddr_get	app/test/test_link_bonding.c	1028
rte_eth_macaddr_get	app/test/test_link_bonding.c	1034
rte_eth_macaddr_get	app/test/test_link_bonding.c	1053
rte_eth_macaddr_get	app/test/test_link_bonding.c	1059
rte_eth_macaddr_get	app/test/test_link_bonding.c	1065
rte_eth_macaddr_get	app/test/test_link_bonding.c	1070
rte_eth_macaddr_get	app/test/test_link_bonding.c	1096
rte_eth_macaddr_get	app/test/test_link_bonding.c	1102
rte_eth_macaddr_get	app/test/test_link_bonding.c	1108
rte_eth_macaddr_get	app/test/test_link_bonding.c	1728
rte_eth_macaddr_get	app/test/test_link_bonding.c	1729
rte_eth_macaddr_get	app/test/test_link_bonding.c	1738
rte_eth_macaddr_get	app/test/test_link_bonding.c	1752
rte_eth_macaddr_get	app/test/test_link_bonding.c	1767
rte_eth_macaddr_get	app/test/test_link_bonding.c	1774
rte_eth_macaddr_get	app/test/test_link_bonding.c	1786
rte_eth_macaddr_get	app/test/test_link_bonding.c	1793
rte_eth_macaddr_get	app/test/test_link_bonding.c	2305
rte_eth_macaddr_get	app/test/test_link_bonding.c	2306
rte_eth_macaddr_get	app/test/test_link_bonding.c	2315
rte_eth_macaddr_get	app/test/test_link_bonding.c	2321
rte_eth_macaddr_get	app/test/test_link_bonding.c	2327
rte_eth_macaddr_get	app/test/test_link_bonding.c	2339
rte_eth_macaddr_get	app/test/test_link_bonding.c	2345
rte_eth_macaddr_get	app/test/test_link_bonding.c	2351
rte_eth_macaddr_get	app/test/test_link_bonding.c	2365
rte_eth_macaddr_get	app/test/test_link_bonding.c	2371
rte_eth_macaddr_get	app/test/test_link_bonding.c	2377
rte_eth_macaddr_get	app/test/test_link_bonding.c	2388
rte_eth_macaddr_get	app/test/test_link_bonding.c	2394
rte_eth_macaddr_get	app/test/test_link_bonding.c	2400
rte_eth_macaddr_get	app/test/test_link_bonding.c	3207
rte_eth_macaddr_get	app/test/test_link_bonding.c	3208
rte_eth_macaddr_get	app/test/test_link_bonding.c	3217
rte_eth_macaddr_get	app/test/test_link_bonding.c	3223
rte_eth_macaddr_get	app/test/test_link_bonding.c	3229
rte_eth_macaddr_get	app/test/test_link_bonding.c	3241
rte_eth_macaddr_get	app/test/test_link_bonding.c	3247
rte_eth_macaddr_get	app/test/test_link_bonding.c	3253
rte_eth_macaddr_get	app/test/test_link_bonding.c	3267
rte_eth_macaddr_get	app/test/test_link_bonding.c	3273
rte_eth_macaddr_get	app/test/test_link_bonding.c	3279
rte_eth_macaddr_get	app/test/test_link_bonding.c	3290
rte_eth_macaddr_get	app/test/test_link_bonding.c	3296
rte_eth_macaddr_get	app/test/test_link_bonding.c	3302
rte_eth_macaddr_get	app/test/test_link_bonding.c	3805
rte_eth_macaddr_get	app/test/test_link_bonding.c	3806
rte_eth_macaddr_get	app/test/test_link_bonding.c	3816
rte_eth_macaddr_get	app/test/test_link_bonding.c	3830
rte_eth_macaddr_get	app/test/test_link_bonding.c	3846
rte_eth_macaddr_get	app/test/test_link_bonding.c	3853
rte_eth_macaddr_get	app/test/test_link_bonding.c	3865
rte_eth_macaddr_get	app/test/test_link_bonding.c	3873
rte_eth_macaddr_get	app/test/test_link_bonding.c	4287
rte_eth_macaddr_get	app/test/test_link_bonding.c	4288
rte_eth_macaddr_get	app/test/test_link_bonding.c	4297
rte_eth_macaddr_get	app/test/test_link_bonding.c	4303
rte_eth_macaddr_get	app/test/test_link_bonding.c	4309
rte_eth_macaddr_get	app/test/test_link_bonding.c	4321
rte_eth_macaddr_get	app/test/test_link_bonding.c	4327
rte_eth_macaddr_get	app/test/test_link_bonding.c	4333
rte_eth_macaddr_get	app/test/test_link_bonding.c	4347
rte_eth_macaddr_get	app/test/test_link_bonding.c	4353
rte_eth_macaddr_get	app/test/test_link_bonding.c	4359
rte_eth_macaddr_get	app/test/test_link_bonding.c	4371
rte_eth_macaddr_get	app/test/test_link_bonding.c	4377
rte_eth_macaddr_get	app/test/test_link_bonding.c	4383
rte_eth_macaddr_get	app/test/test_pmd_perf.c	758
rte_eth_macaddr_get	examples/distributor/main.c	162
rte_eth_macaddr_get	examples/dpdk_qat/main.c	732
rte_eth_macaddr_get	examples/ip_fragmentation/main.c	910
rte_eth_macaddr_get	examples/ip_reassembly/main.c	1115
rte_eth_macaddr_get	examples/ipv4_multicast/main.c	761
rte_eth_macaddr_get	examples/l2fwd-ivshmem/host/host.c	750
rte_eth_macaddr_get	examples/l2fwd-ivshmem/host/host.c	819
rte_eth_macaddr_get	examples/l2fwd/main.c	655
rte_eth_macaddr_get	examples/l3fwd-acl/main.c	1998
rte_eth_macaddr_get	examples/l3fwd-power/main.c	1540
rte_eth_macaddr_get	examples/l3fwd-vf/main.c	1018
rte_eth_macaddr_get	examples/l3fwd/main.c	2463
rte_eth_macaddr_get	examples/link_status_interrupt/main.c	709
rte_eth_macaddr_get	examples/multi_process/client_server_mp/mp_server/main.c	104
rte_eth_macaddr_get	examples/multi_process/l2fwd_fork/main.c	1126
rte_eth_macaddr_get	examples/quota_watermark/qw/main.c	96
rte_eth_macaddr_get	examples/skeleton/basicfwd.c	92
rte_eth_macaddr_get	examples/vhost/main.c	473
rte_eth_macaddr_get	examples/vhost_xen/main.c	334
rte_eth_macaddr_get	examples/vmdq/main.c	288
rte_eth_macaddr_get	examples/vmdq_dcb/main.c	203
rte_eth_macaddr_get	lib/librte_ether/rte_ethdev.c	1504
rte_eth_macaddr_get	lib/librte_ether/rte_ethdev.h	2154
rte_eth_macaddr_get	lib/librte_pmd_bond/rte_eth_bond_8023ad.c	598
rte_eth_macaddr_get	lib/librte_pmd_bond/rte_eth_bond_8023ad.c	759
rte_eth_macaddr_get	lib/librte_pmd_bond/rte_eth_bond_8023ad.c	979
rte_eth_macaddr_get	lib/librte_pmd_bond/rte_eth_bond_8023ad.c	1112
rte_eth_macaddr_get	lib/librte_pmd_bond/rte_eth_bond_pmd.c	124
rte_eth_dev_info_get	app/test-pmd/cmdline.c	1787
rte_eth_dev_info_get	app/test-pmd/cmdline.c	1904
rte_eth_dev_info_get	app/test-pmd/cmdline.c	3032
rte_eth_dev_info_get	app/test-pmd/cmdline.c	3131
rte_eth_dev_info_get	app/test-pmd/config.c	359
rte_eth_dev_info_get	app/test-pmd/config.c	675
rte_eth_dev_info_get	app/test-pmd/testpmd.c	565
rte_eth_dev_info_get	app/test-pmd/testpmd.c	635
rte_eth_dev_info_get	app/test/test_kni.c	389
rte_eth_dev_info_get	app/test/test_kni.c	557
rte_eth_dev_info_get	app/test/test_kni.c	586
rte_eth_dev_info_get	examples/dpdk_qat/main.c	361
rte_eth_dev_info_get	examples/dpdk_qat/main.c	370
rte_eth_dev_info_get	examples/ip_fragmentation/main.c	924
rte_eth_dev_info_get	examples/ip_reassembly/main.c	1130
rte_eth_dev_info_get	examples/ipv4_multicast/main.c	786
rte_eth_dev_info_get	examples/kni/main.c	804
rte_eth_dev_info_get	examples/l2fwd-ivshmem/host/host.c	717
rte_eth_dev_info_get	examples/l2fwd/main.c	603
rte_eth_dev_info_get	examples/l3fwd-acl/main.c	2022
rte_eth_dev_info_get	examples/l3fwd-power/main.c	1564
rte_eth_dev_info_get	examples/l3fwd-vf/main.c	1032
rte_eth_dev_info_get	examples/l3fwd/main.c	2494
rte_eth_dev_info_get	examples/link_status_interrupt/main.c	652
rte_eth_dev_info_get	examples/multi_process/l2fwd_fork/main.c	1064
rte_eth_dev_info_get	examples/multi_process/symmetric_mp/main.c	245
rte_eth_dev_info_get	examples/vhost/main.c	381
rte_eth_dev_info_get	examples/vhost_xen/main.c	287
rte_eth_dev_info_get	examples/vhost_xen/main.c	310
rte_eth_dev_info_get	examples/vmdq/main.c	210
rte_eth_dev_info_get	examples/vmdq/main.c	258
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	877
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	1083
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	1152
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	1483
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	2268
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	2381
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	2409
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	2507
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	2533
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	2590
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.c	2627
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.h	2165
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.h	2941
rte_eth_dev_info_get	lib/librte_ether/rte_ethdev.h	2960
rte_eth_dev_info_get	lib/librte_kni/rte_kni.c	320
rte_eth_dev_info_get	lib/librte_pmd_bond/rte_eth_bond_api.c	357
rte_eth_dev_get_mtu	lib/librte_ether/rte_ethdev.c	1519
rte_eth_dev_get_mtu	lib/librte_ether/rte_ethdev.h	2179
rte_eth_dev_set_mtu	app/test-pmd/config.c	565
rte_eth_dev_set_mtu	lib/librte_ether/rte_ethdev.c	1534
rte_eth_dev_set_mtu	lib/librte_ether/rte_ethdev.h	2194
rte_eth_dev_vlan_filter	app/test-pmd/config.c	1660
rte_eth_dev_vlan_filter	app/test-pmd/config.c	1663
rte_eth_dev_vlan_filter	lib/librte_ether/rte_ethdev.c	1555
rte_eth_dev_vlan_filter	lib/librte_ether/rte_ethdev.h	2214
rte_eth_dev_set_vlan_strip_on_queue	app/test-pmd/config.c	1623
rte_eth_dev_set_vlan_strip_on_queue	examples/vhost/main.c	953
rte_eth_dev_set_vlan_strip_on_queue	examples/vhost_xen/main.c	736
rte_eth_dev_set_vlan_strip_on_queue	lib/librte_ether/rte_ethdev.c	1581
rte_eth_dev_set_vlan_strip_on_queue	lib/librte_ether/rte_ethdev.h	2235
rte_eth_dev_set_vlan_ether_type	app/test-pmd/config.c	1686
rte_eth_dev_set_vlan_ether_type	lib/librte_ether/rte_ethdev.c	1604
rte_eth_dev_set_vlan_ether_type	lib/librte_ether/rte_ethdev.h	2252
rte_eth_dev_set_vlan_offload	app/test-pmd/config.c	1587
rte_eth_dev_set_vlan_offload	app/test-pmd/config.c	1609
rte_eth_dev_set_vlan_offload	app/test-pmd/config.c	1645
rte_eth_dev_set_vlan_offload	lib/librte_ether/rte_ethdev.c	1621
rte_eth_dev_set_vlan_offload	lib/librte_ether/rte_ethdev.h	2274
rte_eth_dev_get_vlan_offload	app/test-pmd/config.c	339
rte_eth_dev_get_vlan_offload	app/test-pmd/config.c	1580
rte_eth_dev_get_vlan_offload	app/test-pmd/config.c	1602
rte_eth_dev_get_vlan_offload	app/test-pmd/config.c	1638
rte_eth_dev_get_vlan_offload	lib/librte_ether/rte_ethdev.c	1668
rte_eth_dev_get_vlan_offload	lib/librte_ether/rte_ethdev.h	2288
rte_eth_dev_set_vlan_pvid	app/test-pmd/config.c	1720
rte_eth_dev_set_vlan_pvid	lib/librte_ether/rte_ethdev.c	1693
rte_eth_dev_set_vlan_pvid	lib/librte_ether/rte_ethdev.h	2304
rte_eth_rx_burst	app/test-pipeline/pipeline_hash.c	439
rte_eth_rx_burst	app/test-pipeline/runtime.c	88
rte_eth_rx_burst	app/test-pmd/csumonly.c	506
rte_eth_rx_burst	app/test-pmd/flowgen.c	158
rte_eth_rx_burst	app/test-pmd/icmpecho.c	313
rte_eth_rx_burst	app/test-pmd/ieee1588fwd.c	540
rte_eth_rx_burst	app/test-pmd/iofwd.c	97
rte_eth_rx_burst	app/test-pmd/macfwd-retry.c	111
rte_eth_rx_burst	app/test-pmd/macfwd.c	102
rte_eth_rx_burst	app/test-pmd/macswap.c	102
rte_eth_rx_burst	app/test-pmd/rxonly.c	110
rte_eth_rx_burst	app/test-pmd/testpmd.c	922
rte_eth_rx_burst	app/test/test_link_bonding.c	1587
rte_eth_rx_burst	app/test/test_link_bonding.c	1672
rte_eth_rx_burst	app/test/test_link_bonding.c	1961
rte_eth_rx_burst	app/test/test_link_bonding.c	1964
rte_eth_rx_burst	app/test/test_link_bonding.c	2193
rte_eth_rx_burst	app/test/test_link_bonding.c	2195
rte_eth_rx_burst	app/test/test_link_bonding.c	2522
rte_eth_rx_burst	app/test/test_link_bonding.c	2524
rte_eth_rx_burst	app/test/test_link_bonding.c	3109
rte_eth_rx_burst	app/test/test_link_bonding.c	3451
rte_eth_rx_burst	app/test/test_link_bonding.c	3704
rte_eth_rx_burst	app/test/test_link_bonding.c	3980
rte_eth_rx_burst	app/test/test_link_bonding.c	3982
rte_eth_rx_burst	app/test/test_link_bonding.c	4177
rte_eth_rx_burst	app/test/test_link_bonding.c	4180
rte_eth_rx_burst	app/test/test_link_bonding.c	4507
rte_eth_rx_burst	app/test/test_link_bonding.c	4509
rte_eth_rx_burst	app/test/test_pmd_perf.c	394
rte_eth_rx_burst	app/test/test_pmd_perf.c	433
rte_eth_rx_burst	app/test/test_pmd_perf.c	470
rte_eth_rx_burst	app/test/test_pmd_perf.c	548
rte_eth_rx_burst	app/test/test_pmd_perf.c	611
rte_eth_rx_burst	app/test/test_pmd_ring.c	142
rte_eth_rx_burst	app/test/test_pmd_ring.c	178
rte_eth_rx_burst	app/test/test_pmd_ring.c	218
rte_eth_rx_burst	app/test/test_pmd_ring.c	289
rte_eth_rx_burst	app/test/test_pmd_ring.c	319
rte_eth_rx_burst	app/test/test_pmd_ring.c	349
rte_eth_rx_burst	app/test/test_pmd_ring.c	379
rte_eth_rx_burst	examples/distributor/main.c	230
rte_eth_rx_burst	examples/dpdk_qat/main.c	193
rte_eth_rx_burst	examples/exception_path/main.c	245
rte_eth_rx_burst	examples/ip_fragmentation/main.c	466
rte_eth_rx_burst	examples/ip_pipeline/pipeline_rx.c	305
rte_eth_rx_burst	examples/ip_reassembly/main.c	511
rte_eth_rx_burst	examples/ipv4_multicast/main.c	465
rte_eth_rx_burst	examples/kni/main.c	257
rte_eth_rx_burst	examples/l2fwd-ivshmem/host/host.c	610
rte_eth_rx_burst	examples/l2fwd/main.c	334
rte_eth_rx_burst	examples/l3fwd-acl/main.c	1454
rte_eth_rx_burst	examples/l3fwd-power/main.c	849
rte_eth_rx_burst	examples/l3fwd-vf/main.c	562
rte_eth_rx_burst	examples/l3fwd/main.c	1470
rte_eth_rx_burst	examples/link_status_interrupt/main.c	353
rte_eth_rx_burst	examples/load_balancer/runtime.c	196
rte_eth_rx_burst	examples/multi_process/client_server_mp/mp_server/main.c	288
rte_eth_rx_burst	examples/multi_process/l2fwd_fork/main.c	718
rte_eth_rx_burst	examples/multi_process/symmetric_mp/main.c	345
rte_eth_rx_burst	examples/netmap_compat/lib/compat_netmap.c	471
rte_eth_rx_burst	examples/qos_meter/main.c	217
rte_eth_rx_burst	examples/qos_sched/app_thread.c	96
rte_eth_rx_burst	examples/quota_watermark/qw/main.c	188
rte_eth_rx_burst	examples/skeleton/basicfwd.c	127
rte_eth_rx_burst	examples/vhost/main.c	981
rte_eth_rx_burst	examples/vhost/main.c	988
rte_eth_rx_burst	examples/vhost/main.c	1274
rte_eth_rx_burst	examples/vhost/main.c	2114
rte_eth_rx_burst	examples/vhost_xen/main.c	765
rte_eth_rx_burst	examples/vhost_xen/main.c	772
rte_eth_rx_burst	examples/vhost_xen/main.c	1065
rte_eth_rx_burst	examples/vmdq/main.c	520
rte_eth_rx_burst	examples/vmdq_dcb/main.c	354
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.c	2715
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.h	2312
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.h	2328
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.h	2332
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.h	2339
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.h	2354
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.h	2358
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.h	2367
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.h	2389
rte_eth_rx_burst	lib/librte_ether/rte_ethdev.h	2393
rte_eth_rx_burst	lib/librte_pmd_bond/rte_eth_bond_pmd.c	78
rte_eth_rx_burst	lib/librte_pmd_bond/rte_eth_bond_pmd.c	100
rte_eth_rx_burst	lib/librte_pmd_bond/rte_eth_bond_pmd.c	136
rte_eth_rx_burst	lib/librte_port/rte_port_ethdev.c	83
rte_eth_rx_queue_count	lib/librte_ether/rte_ethdev.c	2758
rte_eth_rx_queue_count	lib/librte_ether/rte_ethdev.h	2414
rte_eth_rx_queue_count	lib/librte_ether/rte_ethdev.h	2417
rte_eth_rx_descriptor_done	examples/l3fwd-power/main.c	746
rte_eth_rx_descriptor_done	examples/l3fwd-power/main.c	750
rte_eth_rx_descriptor_done	examples/l3fwd-power/main.c	753
rte_eth_rx_descriptor_done	lib/librte_ether/rte_ethdev.c	2773
rte_eth_rx_descriptor_done	lib/librte_ether/rte_ethdev.h	2441
rte_eth_rx_descriptor_done	lib/librte_ether/rte_ethdev.h	2446
rte_eth_tx_burst	app/test-pipeline/runtime.c	166
rte_eth_tx_burst	app/test-pmd/csumonly.c	676
rte_eth_tx_burst	app/test-pmd/flowgen.c	219
rte_eth_tx_burst	app/test-pmd/icmpecho.c	470
rte_eth_tx_burst	app/test-pmd/ieee1588fwd.c	614
rte_eth_tx_burst	app/test-pmd/iofwd.c	106
rte_eth_tx_burst	app/test-pmd/macfwd-retry.c	128
rte_eth_tx_burst	app/test-pmd/macfwd-retry.c	136
rte_eth_tx_burst	app/test-pmd/macfwd.c	126
rte_eth_tx_burst	app/test-pmd/macswap.c	128
rte_eth_tx_burst	app/test-pmd/txonly.c	275
rte_eth_tx_burst	app/test/test_link_bonding.c	1392
rte_eth_tx_burst	app/test/test_link_bonding.c	1420
rte_eth_tx_burst	app/test/test_link_bonding.c	1501
rte_eth_tx_burst	app/test/test_link_bonding.c	1914
rte_eth_tx_burst	app/test/test_link_bonding.c	1915
rte_eth_tx_burst	app/test/test_link_bonding.c	2115
rte_eth_tx_burst	app/test/test_link_bonding.c	2151
rte_eth_tx_burst	app/test/test_link_bonding.c	2487
rte_eth_tx_burst	app/test/test_link_bonding.c	2489
rte_eth_tx_burst	app/test/test_link_bonding.c	2665
rte_eth_tx_burst	app/test/test_link_bonding.c	2700
rte_eth_tx_burst	app/test/test_link_bonding.c	2743
rte_eth_tx_burst	app/test/test_link_bonding.c	2748
rte_eth_tx_burst	app/test/test_link_bonding.c	2780
rte_eth_tx_burst	app/test/test_link_bonding.c	2855
rte_eth_tx_burst	app/test/test_link_bonding.c	2860
rte_eth_tx_burst	app/test/test_link_bonding.c	2893
rte_eth_tx_burst	app/test/test_link_bonding.c	2996
rte_eth_tx_burst	app/test/test_link_bonding.c	3013
rte_eth_tx_burst	app/test/test_link_bonding.c	3373
rte_eth_tx_burst	app/test/test_link_bonding.c	3375
rte_eth_tx_burst	app/test/test_link_bonding.c	3377
rte_eth_tx_burst	app/test/test_link_bonding.c	3379
rte_eth_tx_burst	app/test/test_link_bonding.c	3414
rte_eth_tx_burst	app/test/test_link_bonding.c	3416
rte_eth_tx_burst	app/test/test_link_bonding.c	3510
rte_eth_tx_burst	app/test/test_link_bonding.c	3541
rte_eth_tx_burst	app/test/test_link_bonding.c	3606
rte_eth_tx_burst	app/test/test_link_bonding.c	3938
rte_eth_tx_burst	app/test/test_link_bonding.c	3940
rte_eth_tx_burst	app/test/test_link_bonding.c	4085
rte_eth_tx_burst	app/test/test_link_bonding.c	4132
rte_eth_tx_burst	app/test/test_link_bonding.c	4469
rte_eth_tx_burst	app/test/test_link_bonding.c	4471
rte_eth_tx_burst	app/test/test_pmd_perf.c	402
rte_eth_tx_burst	app/test/test_pmd_perf.c	442
rte_eth_tx_burst	app/test/test_pmd_perf.c	480
rte_eth_tx_burst	app/test/test_pmd_perf.c	525
rte_eth_tx_burst	app/test/test_pmd_perf.c	669
rte_eth_tx_burst	app/test/test_pmd_ring.c	137
rte_eth_tx_burst	app/test/test_pmd_ring.c	174
rte_eth_tx_burst	app/test/test_pmd_ring.c	213
rte_eth_tx_burst	app/test/test_pmd_ring.c	284
rte_eth_tx_burst	app/test/test_pmd_ring.c	314
rte_eth_tx_burst	app/test/test_pmd_ring.c	344
rte_eth_tx_burst	app/test/test_pmd_ring.c	374
rte_eth_tx_burst	examples/distributor/main.c	270
rte_eth_tx_burst	examples/dpdk_qat/main.c	235
rte_eth_tx_burst	examples/dpdk_qat/main.c	270
rte_eth_tx_burst	examples/exception_path/main.c	291
rte_eth_tx_burst	examples/ip_fragmentation/main.c	257
rte_eth_tx_burst	examples/ip_pipeline/pipeline_tx.c	264
rte_eth_tx_burst	examples/ip_reassembly/main.c	297
rte_eth_tx_burst	examples/ipv4_multicast/main.c	212
rte_eth_tx_burst	examples/kni/main.c	299
rte_eth_tx_burst	examples/l2fwd-ivshmem/host/host.c	409
rte_eth_tx_burst	examples/l2fwd/main.c	200
rte_eth_tx_burst	examples/l3fwd-acl/main.c	1310
rte_eth_tx_burst	examples/l3fwd-power/main.c	444
rte_eth_tx_burst	examples/l3fwd-vf/main.c	319
rte_eth_tx_burst	examples/l3fwd/main.c	508
rte_eth_tx_burst	examples/l3fwd/main.c	556
rte_eth_tx_burst	examples/link_status_interrupt/main.c	218
rte_eth_tx_burst	examples/load_balancer/runtime.c	384
rte_eth_tx_burst	examples/load_balancer/runtime.c	432
rte_eth_tx_burst	examples/multi_process/client_server_mp/mp_client/client.c	181
rte_eth_tx_burst	examples/multi_process/l2fwd_fork/main.c	596
rte_eth_tx_burst	examples/multi_process/symmetric_mp/main.c	350
rte_eth_tx_burst	examples/netmap_compat/lib/compat_netmap.c	561
rte_eth_tx_burst	examples/qos_meter/main.c	201
rte_eth_tx_burst	examples/qos_meter/main.c	234
rte_eth_tx_burst	examples/qos_sched/app_thread.c	141
rte_eth_tx_burst	examples/quota_watermark/qw/main.c	110
rte_eth_tx_burst	examples/quota_watermark/qw/main.c	299
rte_eth_tx_burst	examples/skeleton/basicfwd.c	131
rte_eth_tx_burst	examples/vhost/main.c	1170
rte_eth_tx_burst	examples/vhost/main.c	1232
rte_eth_tx_burst	examples/vhost/main.c	1844
rte_eth_tx_burst	examples/vhost/main.c	2050
rte_eth_tx_burst	examples/vhost_xen/main.c	889
rte_eth_tx_burst	examples/vhost_xen/main.c	1025
rte_eth_tx_burst	examples/vmdq/main.c	531
rte_eth_tx_burst	examples/vmdq_dcb/main.c	360
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.c	2736
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2459
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2464
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2467
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2478
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2481
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2486
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2489
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2494
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2516
rte_eth_tx_burst	lib/librte_ether/rte_ethdev.h	2520
rte_eth_tx_burst	lib/librte_pmd_bond/rte_eth_bond_pmd.c	218
rte_eth_tx_burst	lib/librte_pmd_bond/rte_eth_bond_pmd.c	251
rte_eth_tx_burst	lib/librte_pmd_bond/rte_eth_bond_pmd.c	521
rte_eth_tx_burst	lib/librte_pmd_bond/rte_eth_bond_pmd.c	572
rte_eth_tx_burst	lib/librte_pmd_bond/rte_eth_bond_pmd.c	663
rte_eth_tx_burst	lib/librte_pmd_bond/rte_eth_bond_pmd.c	718
rte_eth_tx_burst	lib/librte_port/rte_port_ethdev.c	152
rte_eth_tx_burst	lib/librte_port/rte_port_ethdev.c	232
rte_eth_dev_fdir_add_signature_filter	app/test-pmd/config.c	1784
rte_eth_dev_fdir_add_signature_filter	app/test-pmd/config.c	1789
rte_eth_dev_fdir_add_signature_filter	app/test-pmd/config.c	1824
rte_eth_dev_fdir_add_signature_filter	lib/librte_ether/rte_ethdev.c	1710
rte_eth_dev_fdir_add_signature_filter	lib/librte_ether/rte_ethdev.h	2552
rte_eth_dev_fdir_update_signature_filter	app/test-pmd/config.c	1802
rte_eth_dev_fdir_update_signature_filter	app/test-pmd/config.c	1807
rte_eth_dev_fdir_update_signature_filter	lib/librte_ether/rte_ethdev.c	1744
rte_eth_dev_fdir_update_signature_filter	lib/librte_ether/rte_ethdev.h	2578
rte_eth_dev_fdir_remove_signature_filter	app/test-pmd/config.c	1820
rte_eth_dev_fdir_remove_signature_filter	lib/librte_ether/rte_ethdev.c	1779
rte_eth_dev_fdir_remove_signature_filter	lib/librte_ether/rte_ethdev.h	2600
rte_eth_dev_fdir_get_infos	app/test-pmd/config.c	1896
rte_eth_dev_fdir_get_infos	lib/librte_ether/rte_ethdev.c	1811
rte_eth_dev_fdir_get_infos	lib/librte_ether/rte_ethdev.h	2617
rte_eth_dev_fdir_add_perfect_filter	app/test-pmd/config.c	1966
rte_eth_dev_fdir_add_perfect_filter	app/test-pmd/config.c	1971
rte_eth_dev_fdir_add_perfect_filter	lib/librte_ether/rte_ethdev.c	1833
rte_eth_dev_fdir_add_perfect_filter	lib/librte_ether/rte_ethdev.h	2647
rte_eth_dev_fdir_update_perfect_filter	app/test-pmd/config.c	1984
rte_eth_dev_fdir_update_perfect_filter	app/test-pmd/config.c	1989
rte_eth_dev_fdir_update_perfect_filter	app/test-pmd/config.c	2007
rte_eth_dev_fdir_update_perfect_filter	lib/librte_ether/rte_ethdev.c	1873
rte_eth_dev_fdir_update_perfect_filter	lib/librte_ether/rte_ethdev.h	2681
rte_eth_dev_fdir_remove_perfect_filter	app/test-pmd/config.c	2002
rte_eth_dev_fdir_remove_perfect_filter	lib/librte_ether/rte_ethdev.c	1912
rte_eth_dev_fdir_remove_perfect_filter	lib/librte_ether/rte_ethdev.h	2707
rte_eth_dev_fdir_set_masks	app/test-pmd/config.c	2019
rte_eth_dev_fdir_set_masks	lib/librte_ether/rte_ethdev.c	1950
rte_eth_dev_fdir_set_masks	lib/librte_ether/rte_ethdev.h	2743
rte_eth_dev_callback_register	app/test/test_link_bonding.c	1236
rte_eth_dev_callback_register	app/test/test_link_bonding.c	2036
rte_eth_dev_callback_register	examples/link_status_interrupt/main.c	706
rte_eth_dev_callback_register	lib/librte_ether/rte_ethdev.c	2790
rte_eth_dev_callback_register	lib/librte_ether/rte_ethdev.h	2777
rte_eth_dev_callback_register	lib/librte_pmd_bond/rte_eth_bond_api.c	410
rte_eth_dev_callback_unregister	app/test/test_link_bonding.c	1303
rte_eth_dev_callback_unregister	app/test/test_link_bonding.c	2064
rte_eth_dev_callback_unregister	lib/librte_ether/rte_ethdev.c	2830
rte_eth_dev_callback_unregister	lib/librte_ether/rte_ethdev.h	2798
rte_eth_dev_callback_unregister	lib/librte_pmd_bond/rte_eth_bond_api.c	486
rte_eth_led_on	lib/librte_ether/rte_ethdev.c	2228
rte_eth_led_on	lib/librte_ether/rte_ethdev.h	2830
rte_eth_led_off	lib/librte_ether/rte_ethdev.c	2243
rte_eth_led_off	lib/librte_ether/rte_ethdev.h	2844
rte_eth_dev_flow_ctrl_get	app/test-pmd/cmdline.c	5220
rte_eth_dev_flow_ctrl_get	lib/librte_ether/rte_ethdev.c	1970
rte_eth_dev_flow_ctrl_get	lib/librte_ether/rte_ethdev.h	2858
rte_eth_dev_flow_ctrl_set	app/test-pmd/cmdline.c	5265
rte_eth_dev_flow_ctrl_set	examples/quota_watermark/qw/init.c	105
rte_eth_dev_flow_ctrl_set	lib/librte_ether/rte_ethdev.c	1986
rte_eth_dev_flow_ctrl_set	lib/librte_ether/rte_ethdev.h	2875
rte_eth_dev_priority_flow_ctrl_set	app/test-pmd/cmdline.c	5313
rte_eth_dev_priority_flow_ctrl_set	lib/librte_ether/rte_ethdev.c	2006
rte_eth_dev_priority_flow_ctrl_set	lib/librte_ether/rte_ethdev.h	2893
rte_eth_dev_mac_addr_add	app/test-pmd/cmdline.c	6076
rte_eth_dev_mac_addr_add	app/test-pmd/cmdline.c	6558
rte_eth_dev_mac_addr_add	examples/vhost/main.c	946
rte_eth_dev_mac_addr_add	examples/vhost_xen/main.c	728
rte_eth_dev_mac_addr_add	examples/vmdq/main.c	314
rte_eth_dev_mac_addr_add	lib/librte_ether/rte_ethdev.c	2280
rte_eth_dev_mac_addr_add	lib/librte_ether/rte_ethdev.h	2914
rte_eth_dev_mac_addr_remove	app/test-pmd/cmdline.c	6078
rte_eth_dev_mac_addr_remove	examples/vhost/main.c	974
rte_eth_dev_mac_addr_remove	examples/vhost_xen/main.c	758
rte_eth_dev_mac_addr_remove	lib/librte_ether/rte_ethdev.c	2334
rte_eth_dev_mac_addr_remove	lib/librte_ether/rte_ethdev.h	2930
rte_eth_dev_rss_reta_update	app/test-pmd/cmdline.c	1809
rte_eth_dev_rss_reta_update	lib/librte_ether/rte_ethdev.c	2083
rte_eth_dev_rss_reta_update	lib/librte_ether/rte_ethdev.h	2947
rte_eth_dev_rss_reta_query	app/test-pmd/config.c	807
rte_eth_dev_rss_reta_query	lib/librte_ether/rte_ethdev.c	2113
rte_eth_dev_rss_reta_query	lib/librte_ether/rte_ethdev.h	2966
rte_eth_dev_uc_hash_table_set	app/test-pmd/cmdline.c	6188
rte_eth_dev_uc_hash_table_set	lib/librte_ether/rte_ethdev.c	2422
rte_eth_dev_uc_hash_table_set	lib/librte_ether/rte_ethdev.h	2988
rte_eth_dev_uc_all_hash_table_set	app/test-pmd/cmdline.c	6250
rte_eth_dev_uc_all_hash_table_set	lib/librte_ether/rte_ethdev.c	2478
rte_eth_dev_uc_all_hash_table_set	lib/librte_ether/rte_ethdev.h	3008
rte_eth_dev_set_vf_rxmode	app/test-pmd/cmdline.c	6491
rte_eth_dev_set_vf_rxmode	lib/librte_ether/rte_ethdev.c	2367
rte_eth_dev_set_vf_rxmode	lib/librte_ether/rte_ethdev.h	3032
rte_eth_dev_set_vf_tx	app/test-pmd/config.c	2098
rte_eth_dev_set_vf_tx	app/test-pmd/config.c	2105
rte_eth_dev_set_vf_tx	lib/librte_ether/rte_ethdev.c	2521
rte_eth_dev_set_vf_tx	lib/librte_ether/rte_ethdev.h	3052
rte_eth_dev_set_vf_rx	app/test-pmd/cmdline.c	6491
rte_eth_dev_set_vf_rx	app/test-pmd/config.c	2096
rte_eth_dev_set_vf_rx	app/test-pmd/config.c	2102
rte_eth_dev_set_vf_rx	lib/librte_ether/rte_ethdev.c	2367
rte_eth_dev_set_vf_rx	lib/librte_ether/rte_ethdev.c	2495
rte_eth_dev_set_vf_rx	lib/librte_ether/rte_ethdev.h	3032
rte_eth_dev_set_vf_rx	lib/librte_ether/rte_ethdev.h	3071
rte_eth_dev_set_vf_vlan_filter	app/test-pmd/config.c	2119
rte_eth_dev_set_vf_vlan_filter	app/test-pmd/config.c	2122
rte_eth_dev_set_vf_vlan_filter	lib/librte_ether/rte_ethdev.c	2547
rte_eth_dev_set_vf_vlan_filter	lib/librte_ether/rte_ethdev.h	3093
rte_eth_mirror_rule_set	app/test-pmd/cmdline.c	7094
rte_eth_mirror_rule_set	app/test-pmd/cmdline.c	7097
rte_eth_mirror_rule_set	app/test-pmd/cmdline.c	7182
rte_eth_mirror_rule_set	app/test-pmd/cmdline.c	7185
rte_eth_mirror_rule_set	lib/librte_ether/rte_ethdev.c	2648
rte_eth_mirror_rule_set	lib/librte_ether/rte_ethdev.h	3118
rte_eth_mirror_rule_reset	app/test-pmd/cmdline.c	7246
rte_eth_mirror_rule_reset	lib/librte_ether/rte_ethdev.c	2691
rte_eth_mirror_rule_reset	lib/librte_ether/rte_ethdev.h	3136
rte_eth_set_queue_rate_limit	app/test-pmd/config.c	2140
rte_eth_set_queue_rate_limit	app/test-pmd/config.c	2143
rte_eth_set_queue_rate_limit	lib/librte_ether/rte_ethdev.c	2576
rte_eth_set_queue_rate_limit	lib/librte_ether/rte_ethdev.h	3154
rte_eth_set_vf_rate_limit	app/test-pmd/config.c	2165
rte_eth_set_vf_rate_limit	app/test-pmd/config.c	2168
rte_eth_set_vf_rate_limit	lib/librte_ether/rte_ethdev.c	2610
rte_eth_set_vf_rate_limit	lib/librte_ether/rte_ethdev.h	3174
rte_eth_dev_bypass_init	app/test-pmd/testpmd.c	1756
rte_eth_dev_bypass_init	lib/librte_ether/rte_ethdev.c	2897
rte_eth_dev_bypass_init	lib/librte_ether/rte_ethdev.h	3188
rte_eth_dev_bypass_state_show	app/test-pmd/cmdline.c	3533
rte_eth_dev_bypass_state_show	lib/librte_ether/rte_ethdev.c	2917
rte_eth_dev_bypass_state_show	lib/librte_ether/rte_ethdev.h	3205
rte_eth_dev_bypass_state_set	app/test-pmd/cmdline.c	3293
rte_eth_dev_bypass_state_set	lib/librte_ether/rte_ethdev.c	2936
rte_eth_dev_bypass_state_set	lib/librte_ether/rte_ethdev.h	3222
rte_eth_dev_wd_timeout_store	app/test-pmd/cmdline.c	3379
rte_eth_dev_wd_timeout_store	lib/librte_ether/rte_ethdev.c	2996
rte_eth_dev_wd_timeout_store	lib/librte_ether/rte_ethdev.h	3294
rte_eth_dev_bypass_ver_show	lib/librte_ether/rte_ethdev.c	3016
rte_eth_dev_bypass_ver_show	lib/librte_ether/rte_ethdev.h	3308
rte_eth_dev_bypass_wd_timeout_show	lib/librte_ether/rte_ethdev.c	3036
rte_eth_dev_bypass_wd_timeout_show	lib/librte_ether/rte_ethdev.h	3330
rte_eth_dev_bypass_wd_reset	lib/librte_ether/rte_ethdev.c	3056
rte_eth_dev_bypass_wd_reset	lib/librte_ether/rte_ethdev.h	3342
rte_eth_dev_rss_hash_update	app/test-pmd/cmdline.c	1494
rte_eth_dev_rss_hash_update	app/test-pmd/config.c	898
rte_eth_dev_rss_hash_update	lib/librte_ether/rte_ethdev.c	2136
rte_eth_dev_rss_hash_update	lib/librte_ether/rte_ethdev.h	3357
rte_eth_dev_rss_hash_conf_get	app/test-pmd/config.c	840
rte_eth_dev_rss_hash_conf_get	app/test-pmd/config.c	895
rte_eth_dev_rss_hash_conf_get	lib/librte_ether/rte_ethdev.c	2159
rte_eth_dev_rss_hash_conf_get	lib/librte_ether/rte_ethdev.h	3374
rte_eth_dev_udp_tunnel_add	app/test-pmd/cmdline.c	6977
rte_eth_dev_udp_tunnel_add	lib/librte_ether/rte_ethdev.c	2175
rte_eth_dev_udp_tunnel_add	lib/librte_ether/rte_ethdev.h	3392
rte_eth_dev_udp_tunnel_delete	app/test-pmd/cmdline.c	6979
rte_eth_dev_udp_tunnel_delete	lib/librte_ether/rte_ethdev.c	2201
rte_eth_dev_udp_tunnel_delete	lib/librte_ether/rte_ethdev.h	3409
rte_eth_dev_add_syn_filter	app/test-pmd/cmdline.c	7396
rte_eth_dev_add_syn_filter	lib/librte_ether/rte_ethdev.c	3077
rte_eth_dev_add_syn_filter	lib/librte_ether/rte_ethdev.h	3426
rte_eth_dev_remove_syn_filter	app/test-pmd/cmdline.c	7399
rte_eth_dev_remove_syn_filter	lib/librte_ether/rte_ethdev.c	3093
rte_eth_dev_remove_syn_filter	lib/librte_ether/rte_ethdev.h	3439
rte_eth_dev_get_syn_filter	app/test-pmd/config.c	2181
rte_eth_dev_get_syn_filter	lib/librte_ether/rte_ethdev.c	3108
rte_eth_dev_get_syn_filter	lib/librte_ether/rte_ethdev.h	3455
rte_eth_dev_add_2tuple_filter	app/test-pmd/cmdline.c	7506
rte_eth_dev_add_2tuple_filter	lib/librte_ether/rte_ethdev.c	3127
rte_eth_dev_add_2tuple_filter	lib/librte_ether/rte_ethdev.h	3479
rte_eth_dev_remove_2tuple_filter	app/test-pmd/cmdline.c	7509
rte_eth_dev_remove_2tuple_filter	lib/librte_ether/rte_ethdev.c	3151
rte_eth_dev_remove_2tuple_filter	lib/librte_ether/rte_ethdev.h	3495
rte_eth_dev_get_2tuple_filter	app/test-pmd/config.c	2202
rte_eth_dev_get_2tuple_filter	lib/librte_ether/rte_ethdev.c	3166
rte_eth_dev_get_2tuple_filter	lib/librte_ether/rte_ethdev.h	3516
rte_eth_dev_add_5tuple_filter	app/test-pmd/cmdline.c	7694
rte_eth_dev_add_5tuple_filter	lib/librte_ether/rte_ethdev.c	3185
rte_eth_dev_add_5tuple_filter	lib/librte_ether/rte_ethdev.h	3539
rte_eth_dev_remove_5tuple_filter	app/test-pmd/cmdline.c	7697
rte_eth_dev_remove_5tuple_filter	lib/librte_ether/rte_ethdev.c	3209
rte_eth_dev_remove_5tuple_filter	lib/librte_ether/rte_ethdev.h	3555
rte_eth_dev_get_5tuple_filter	app/test-pmd/config.c	2231
rte_eth_dev_get_5tuple_filter	lib/librte_ether/rte_ethdev.c	3224
rte_eth_dev_get_5tuple_filter	lib/librte_ether/rte_ethdev.h	3575
rte_eth_dev_add_flex_filter	app/test-pmd/cmdline.c	7959
rte_eth_dev_add_flex_filter	app/test-pmd/cmdline.c	7963
rte_eth_dev_add_flex_filter	lib/librte_ether/rte_ethdev.c	3244
rte_eth_dev_add_flex_filter	lib/librte_ether/rte_ethdev.h	3599
rte_eth_dev_remove_flex_filter	app/test-pmd/cmdline.c	7967
rte_eth_dev_remove_flex_filter	lib/librte_ether/rte_ethdev.c	3260
rte_eth_dev_remove_flex_filter	lib/librte_ether/rte_ethdev.h	3615
rte_eth_dev_get_flex_filter	app/test-pmd/config.c	2270
rte_eth_dev_get_flex_filter	lib/librte_ether/rte_ethdev.c	3275
rte_eth_dev_get_flex_filter	lib/librte_ether/rte_ethdev.h	3636
rte_eth_dev_filter_supported	app/test-pmd/cmdline.c	8129
rte_eth_dev_filter_supported	app/test-pmd/cmdline.c	8303
rte_eth_dev_filter_supported	app/test-pmd/cmdline.c	8555
rte_eth_dev_filter_supported	app/test-pmd/cmdline.c	8799
rte_eth_dev_filter_supported	app/test-pmd/cmdline.c	8855
rte_eth_dev_filter_supported	app/test-pmd/cmdline.c	8944
rte_eth_dev_filter_supported	app/test-pmd/cmdline.c	9027
rte_eth_dev_filter_supported	app/test-pmd/config.c	1891
rte_eth_dev_filter_supported	lib/librte_ether/rte_ethdev.c	3295
rte_eth_dev_filter_supported	lib/librte_ether/rte_ethdev.h	3652
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	6333
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	6338
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	6879
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	6884
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	8149
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	8154
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	8383
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	8386
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	8562
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	8808
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	8866
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	8953
rte_eth_dev_filter_ctrl	app/test-pmd/cmdline.c	9052
rte_eth_dev_filter_ctrl	app/test-pmd/config.c	1917
rte_eth_dev_filter_ctrl	app/test-pmd/config.c	1920
rte_eth_dev_filter_ctrl	lib/librte_ether/rte_ethdev.c	3311
rte_eth_dev_filter_ctrl	lib/librte_ether/rte_ethdev.h	3673

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  2015-02-18  0:31  3%     ` Thomas Monjalon
@ 2015-02-18  1:54  0%       ` Tetsuya Mukawa
  2015-02-18  6:10  0%         ` Tetsuya Mukawa
  0 siblings, 1 reply; 200+ results
From: Tetsuya Mukawa @ 2015-02-18  1:54 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, Neil Horman

On 2015/02/18 9:31, Thomas Monjalon wrote:
> 2015-02-17 15:14, Tetsuya Mukawa:
>> On 2015/02/17 9:36, Thomas Monjalon wrote:
>>> 2015-02-16 13:14, Tetsuya Mukawa:
>>> Is uint8_t sill a good size for hotpluggable virtual device ids?
>> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
>> as port id.
>> If someone reports it doesn't enough, I guess it will be the time to
>> write a patch to change all uint_8 in one patch.
> It's a big ABI breakage. So if we feel it's going to be required,
> it's better to do it now in 2.0 release I think.
>
> Any opinion?
>

Hi Thomas,

I agree with it.
I will add an one more patch to change uint8_t to uint16_t.

Thanks,
Tetsuya

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached
  @ 2015-02-18  0:31  3%     ` Thomas Monjalon
  2015-02-18  1:54  0%       ` Tetsuya Mukawa
  0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2015-02-18  0:31 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev, Neil Horman

2015-02-17 15:14, Tetsuya Mukawa:
> On 2015/02/17 9:36, Thomas Monjalon wrote:
> > 2015-02-16 13:14, Tetsuya Mukawa:
> > Is uint8_t sill a good size for hotpluggable virtual device ids?
> 
> I am not sure it's enough, but uint8_t is widely used in "rte_ethdev.c"
> as port id.
> If someone reports it doesn't enough, I guess it will be the time to
> write a patch to change all uint_8 in one patch.

It's a big ABI breakage. So if we feel it's going to be required,
it's better to do it now in 2.0 release I think.

Any opinion?

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v6 00/16] lib/librte_pmd_fm10k : fm10k pmd driver
  2015-02-17 14:18  3% ` [dpdk-dev] [PATCH v6 00/16] lib/librte_pmd_fm10k : fm10k pmd driver Chen Jing D(Mark)
  2015-02-17 14:18  1%   ` [dpdk-dev] [PATCH v6 03/16] fm10k: register fm10k pmd PF driver Chen Jing D(Mark)
@ 2015-02-18  0:13  0%   ` Thomas Monjalon
  1 sibling, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-02-18  0:13 UTC (permalink / raw)
  To: Chen Jing D(Mark); +Cc: dev

2015-02-17 22:18, Chen Jing D:
> From: "Chen Jing D(Mark)" <jing.d.chen@intel.com>
> 
> The patch set add poll mode driver for the host interface of Intel
> Ethernet Switch FM10000 Series of silicons, which integrate NIC and
> switch functionalities. The patch set include below features:
> 
> 1. Basic RX/TX functions for PF/VF.
> 2. Interrupt handling mechanism for PF/VF.
> 3. per queue start/stop functions for PF/VF.
> 4. Mailbox handling between PF/VF and PF/Switch Manager.
> 5. Receive Side Scaling (RSS) for PF/VF.
> 6. Scatter receive function for PF/VF.
> 7. reta update/query for PF/VF.
> 8. VLAN filter set for PF.
> 9. Link status query for PF/VF.
> 
> Change in v6:
> - Merge ABI patch with fm10k driver regsiter patch.
> - Fix typo.
> - Rework comments.
> - Minor adjustment on commit log.
> - Increase error variable after mbuf allocation failed.
> 
> Change in v5:
> - Add sanity check for mbuf allocation.
> - Add a new patch to claim fm10k driver review
> - Change commit log.
> - Add unlikely in func rx_desc_to_ol_flags to gain performance
> - Add a new patch to add ABI version
> 
> Change in v4:
> - Change commit log to remove improper words.
> 
> Changes in v3:
> - Update base driver.
> - Define several macros to pass base driver compile.
> 
> Changes in v2:
> - Merge 3 patches into 1 to configure fm10k compile environment.
> - Rework on log code to follow style in ixgbe.
> - Rework log message, remove redundant '\n'
> - Update Copyright year from "2014" to "2015"
> - Change base driver directory name from SHARED to base
> - Add more description in log for patch "add PF and VF interrupt"
> - Merge 2 patches into 1 to register fm10k driver
> - Define macro to replace numeric for lower 32-bit mask.
> 
> Chen Jing D(Mark) (1):
>   maintainers: claim for fm10k review
> 
> Jeff Shaw (15):
>   fm10k: add base driver
>   eal: add fm10k device id
>   fm10k: register fm10k pmd PF driver
>   config: change config files to add fm10k into compile
>   fm10k: add reta update/requery functions
>   fm10k: add Rx queue setup/release function
>   fm10k: add Tx queue setup/release function
>   fm10k: add Rx/Tx single queue start/stop function
>   fm10k: add dev start/stop functions
>   fm10k: add receive and tranmit function
>   fm10k: add PF RSS support
>   fm10k: add scatter receive function
>   fm10k: add function to set vlan
>   fm10k: add SRIOV-VF support
>   fm10k: add PF and VF interrupt handling function
> 
>  MAINTAINERS                                     |    4 +
>  config/common_bsdapp                            |   11 +
>  config/common_linuxapp                          |   11 +
>  lib/Makefile                                    |    1 +
>  lib/librte_eal/common/include/rte_pci_dev_ids.h |   22 +
>  lib/librte_pmd_fm10k/Makefile                   |  100 +
>  lib/librte_pmd_fm10k/base/fm10k_api.c           |  341 ++++
>  lib/librte_pmd_fm10k/base/fm10k_api.h           |   61 +
>  lib/librte_pmd_fm10k/base/fm10k_common.c        |  572 ++++++
>  lib/librte_pmd_fm10k/base/fm10k_common.h        |   52 +
>  lib/librte_pmd_fm10k/base/fm10k_mbx.c           | 2185 +++++++++++++++++++++++
>  lib/librte_pmd_fm10k/base/fm10k_mbx.h           |  329 ++++
>  lib/librte_pmd_fm10k/base/fm10k_osdep.h         |  148 ++
>  lib/librte_pmd_fm10k/base/fm10k_pf.c            | 1992 +++++++++++++++++++++
>  lib/librte_pmd_fm10k/base/fm10k_pf.h            |  155 ++
>  lib/librte_pmd_fm10k/base/fm10k_tlv.c           |  914 ++++++++++
>  lib/librte_pmd_fm10k/base/fm10k_tlv.h           |  199 ++
>  lib/librte_pmd_fm10k/base/fm10k_type.h          |  937 ++++++++++
>  lib/librte_pmd_fm10k/base/fm10k_vf.c            |  641 +++++++
>  lib/librte_pmd_fm10k/base/fm10k_vf.h            |   91 +
>  lib/librte_pmd_fm10k/fm10k.h                    |  292 +++
>  lib/librte_pmd_fm10k/fm10k_ethdev.c             | 1867 +++++++++++++++++++
>  lib/librte_pmd_fm10k/fm10k_logs.h               |   78 +
>  lib/librte_pmd_fm10k/fm10k_rxtx.c               |  462 +++++
>  lib/librte_pmd_fm10k/rte_pmd_fm10k_version.map  |    4 +
>  mk/rte.app.mk                                   |    4 +

Pulled from next/dpdk-fm10k, thanks.

^ permalink raw reply	[relevance 0%]

* Re: [dpdk-dev] [PATCH v3 1/5] ethdev: add rx interrupt enable/disable functions
  @ 2015-02-17 15:54  3%   ` Neil Horman
  2015-02-19  7:58  3%     ` Zhou, Danny
  0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2015-02-17 15:54 UTC (permalink / raw)
  To: Zhou Danny; +Cc: dev

On Tue, Feb 17, 2015 at 09:47:15PM +0800, Zhou Danny wrote:
> v3 changes
> - Add return value for interrupt enable/disable functions
> 
> Add two dev_ops functions to enable and disable rx queue interrupts
> 
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> Tested-by: Yong Liu <yong.liu@intel.com>
> ---
>  lib/librte_ether/rte_ethdev.c | 43 ++++++++++++++++++++++++++++++++
>  lib/librte_ether/rte_ethdev.h | 57 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 100 insertions(+)
> 
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index ea3a1fb..d27469a 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -2825,6 +2825,49 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
>  	}
>  	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
>  }
> +
> +int
> +rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
> +				uint16_t queue_id)
> +{
> +	struct rte_eth_dev *dev;
> +
> +	if (port_id >= nb_ports) {
> +		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> +		return (-ENODEV);
> +	}
> +
> +	dev = &rte_eth_devices[port_id];
> +	if (dev == NULL) {
> +		PMD_DEBUG_TRACE("Invalid port device\n");
> +		return (-ENODEV);
> +	}
> +
> +	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
> +	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
> +}
> +
> +int
> +rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
> +				uint16_t queue_id)
> +{
> +	struct rte_eth_dev *dev;
> +
> +	if (port_id >= nb_ports) {
> +		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> +		return (-ENODEV);
> +	}
> +
> +	dev = &rte_eth_devices[port_id];
> +	if (dev == NULL) {
> +		PMD_DEBUG_TRACE("Invalid port device\n");
> +		return (-ENODEV);
> +	}
> +
> +	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
> +	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
> +}
> +
>  #ifdef RTE_NIC_BYPASS
>  int rte_eth_dev_bypass_init(uint8_t port_id)
>  {
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 84160c3..0f320a9 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -848,6 +848,8 @@ struct rte_eth_fdir {
>  struct rte_intr_conf {
>  	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
>  	uint16_t lsc;
> +	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
> +	uint16_t rxq;
>  };
>  
>  /**
> @@ -1109,6 +1111,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
>  				    const struct rte_eth_txconf *tx_conf);
>  /**< @internal Setup a transmit queue of an Ethernet device. */
>  
> +typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> +				    uint16_t rx_queue_id);
> +/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
> +
> +typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
> +				    uint16_t rx_queue_id);
> +/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
> +
>  typedef void (*eth_queue_release_t)(void *queue);
>  /**< @internal Release memory resources allocated by given RX/TX queue. */
>  
> @@ -1445,6 +1455,8 @@ struct eth_dev_ops {
>  	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue.*/
>  	eth_queue_stop_t           tx_queue_stop;/**< Stop TX for a queue.*/
>  	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue.*/
> +	eth_rx_enable_intr_t       rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
> +	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
Put these at the end of eth_dev_ops if you want to avoid breaking ABI

>  	eth_queue_release_t        rx_queue_release;/**< Release RX queue.*/
>  	eth_rx_queue_count_t       rx_queue_count; /**< Get Rx queue count. */
>  	eth_rx_descriptor_done_t   rx_descriptor_done;  /**< Check rxd DD bit */
> @@ -2811,6 +2823,51 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
>  				enum rte_eth_event_type event);
>  
>  /**
> + * When there is no rx packet coming in Rx Queue for a long time, we can
> + * sleep lcore related to RX Queue for power saving, and enable rx interrupt
> + * to be triggered when rx packect arrives.
> + *
> + * The rte_eth_dev_rx_queue_intr_enable() function enables rx queue
> + * interrupt on specific rx queue of a port.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param queue_id
> + *   The index of the receive queue from which to retrieve input packets.
> + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @return
> + *   - (0) if successful.
> + *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
> + *     that operation.
> + *   - (-ENODEV) if *port_id* invalid.
> + */
> +int rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
> +				uint16_t queue_id);
> +
> +/**
> + * When lcore wakes up from rx interrupt indicating packet coming, disable rx
> + * interrupt and returns to polling mode.
> + *
> + * The rte_eth_dev_rx_queue_intr_disable() function disables rx queue
> + * interrupt on specific rx queue of a port.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param queue_id
> + *   The index of the receive queue from which to retrieve input packets.
> + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @return
> + *   - (0) if successful.
> + *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
> + *     that operation.
> + *   - (-ENODEV) if *port_id* invalid.
> + */
> +int rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
> +				uint16_t queue_id);
> +
> +/**
>   * Turn on the LED on the Ethernet device.
>   * This function turns on the LED on the Ethernet device.
>   *
> -- 
> 1.8.1.4
> 
> 

^ permalink raw reply	[relevance 3%]

* [dpdk-dev] [PATCH v6 03/16] fm10k: register fm10k pmd PF driver
  2015-02-17 14:18  3% ` [dpdk-dev] [PATCH v6 00/16] lib/librte_pmd_fm10k : fm10k pmd driver Chen Jing D(Mark)
@ 2015-02-17 14:18  1%   ` Chen Jing D(Mark)
  2015-02-18  0:13  0%   ` [dpdk-dev] [PATCH v6 00/16] lib/librte_pmd_fm10k : fm10k pmd driver Thomas Monjalon
  1 sibling, 0 replies; 200+ results
From: Chen Jing D(Mark) @ 2015-02-17 14:18 UTC (permalink / raw)
  To: dev

From: Jeff Shaw <jeffrey.b.shaw@intel.com>

1. Add init function to scan and initialize fm10k PF device.
2. Add implementation to register fm10k pmd PF driver.
3. Add 3 functions fm10k_dev_configure, fm10k_stats_get and
   fm10k_stats_get.
4. Add fm10k.h to define macros and basic data structure.
5. Add fm10k_logs.h to control log message output.
6. Add Makefile.
7. Add ABI version of librte_pmd_fm10k

Signed-off-by: Jeff Shaw <jeffrey.b.shaw@intel.com>
Signed-off-by: Chen Jing D(Mark) <jing.d.chen@intel.com>
Signed-off-by: Michael Qiu <michael.qiu@intel.com>
---
 lib/librte_pmd_fm10k/Makefile                  |   99 +++++++
 lib/librte_pmd_fm10k/fm10k.h                   |  224 +++++++++++++++
 lib/librte_pmd_fm10k/fm10k_ethdev.c            |  343 ++++++++++++++++++++++++
 lib/librte_pmd_fm10k/fm10k_logs.h              |   78 ++++++
 lib/librte_pmd_fm10k/rte_pmd_fm10k_version.map |    4 +
 5 files changed, 748 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_pmd_fm10k/Makefile
 create mode 100644 lib/librte_pmd_fm10k/fm10k.h
 create mode 100644 lib/librte_pmd_fm10k/fm10k_ethdev.c
 create mode 100644 lib/librte_pmd_fm10k/fm10k_logs.h
 create mode 100644 lib/librte_pmd_fm10k/rte_pmd_fm10k_version.map

diff --git a/lib/librte_pmd_fm10k/Makefile b/lib/librte_pmd_fm10k/Makefile
new file mode 100644
index 0000000..b24cc67
--- /dev/null
+++ b/lib/librte_pmd_fm10k/Makefile
@@ -0,0 +1,99 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_fm10k.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_pmd_fm10k_version.map
+
+LIBABIVER := 1
+
+ifeq ($(CC), icc)
+#
+# CFLAGS for icc
+#
+CFLAGS_BASE_DRIVER = -wd174 -wd593 -wd869 -wd981 -wd2259
+
+else ifeq ($(CC), clang)
+#
+## CFLAGS for clang
+#
+CFLAGS_BASE_DRIVER = -Wno-unused-parameter -Wno-unused-value
+CFLAGS_BASE_DRIVER += -Wno-strict-aliasing -Wno-format-extra-args
+CFLAGS_BASE_DRIVER += -Wno-unused-variable -Wno-unused-but-set-variable
+CFLAGS_BASE_DRIVER += -Wno-missing-field-initializers
+
+else
+#
+# CFLAGS for gcc
+#
+ifneq ($(shell test $(GCC_MAJOR_VERSION) -le 4 -a $(GCC_MINOR_VERSION) -le 3 && echo 1), 1)
+CFLAGS     += -Wno-deprecated
+endif
+CFLAGS_BASE_DRIVER = -Wno-unused-parameter -Wno-unused-value
+CFLAGS_BASE_DRIVER += -Wno-strict-aliasing -Wno-format-extra-args
+CFLAGS_BASE_DRIVER += -Wno-unused-variable -Wno-unused-but-set-variable
+CFLAGS_BASE_DRIVER += -Wno-missing-field-initializers
+endif
+
+#
+# Add extra flags for base driver source files to disable warnings in them
+#
+BASE_DRIVER_OBJS=$(patsubst %.c,%.o,$(notdir $(wildcard $(RTE_SDK)/lib/librte_pmd_fm10k/base/*.c)))
+$(foreach obj, $(BASE_DRIVER_OBJS), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
+
+VPATH += $(RTE_SDK)/lib/librte_pmd_fm10k/base
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_ethdev.c
+
+SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_pf.c
+SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_tlv.c
+SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_common.c
+SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_mbx.c
+SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_vf.c
+SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_api.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += lib/librte_net lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_fm10k/fm10k.h b/lib/librte_pmd_fm10k/fm10k.h
new file mode 100644
index 0000000..1468040
--- /dev/null
+++ b/lib/librte_pmd_fm10k/fm10k.h
@@ -0,0 +1,224 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FM10K_H_
+#define _FM10K_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+#include "fm10k_logs.h"
+#include "base/fm10k_type.h"
+
+/* descriptor ring base addresses must be aligned to the following */
+#define FM10K_ALIGN_RX_DESC  128
+#define FM10K_ALIGN_TX_DESC  128
+
+/* The maximum packet size that FM10K supports */
+#define FM10K_MAX_PKT_SIZE  (15 * 1024)
+
+/* Minimum size of RX buffer FM10K supported */
+#define FM10K_MIN_RX_BUF_SIZE  256
+
+/* The maximum of SRIOV VFs per port supported */
+#define FM10K_MAX_VF_NUM    64
+
+/* number of descriptors must be a multiple of the following */
+#define FM10K_MULT_RX_DESC  FM10K_REQ_RX_DESCRIPTOR_MULTIPLE
+#define FM10K_MULT_TX_DESC  FM10K_REQ_TX_DESCRIPTOR_MULTIPLE
+
+/* maximum size of descriptor rings */
+#define FM10K_MAX_RX_RING_SZ  (512 * 1024)
+#define FM10K_MAX_TX_RING_SZ  (512 * 1024)
+
+/* minimum and maximum number of descriptors in a ring */
+#define FM10K_MIN_RX_DESC  32
+#define FM10K_MIN_TX_DESC  32
+#define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
+#define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc))
+
+/*
+ * byte aligment for HW RX data buffer
+ * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
+ * be 8-byte aligned but without crossing host memory pages (4KB alignment
+ * boundaries). Satisfy first option.
+ */
+#define FM10K_RX_DATABUF_ALIGN 512
+
+/*
+ * threshold default, min, max, and divisor constraints
+ * the configured values must satisfy the following:
+ *   MIN <= value <= MAX
+ *   DIV % value == 0
+ */
+#define FM10K_RX_FREE_THRESH_DEFAULT(rxq)  32
+#define FM10K_RX_FREE_THRESH_MIN(rxq)      1
+#define FM10K_RX_FREE_THRESH_MAX(rxq)      ((rxq)->nb_desc - 1)
+#define FM10K_RX_FREE_THRESH_DIV(rxq)      ((rxq)->nb_desc)
+
+#define FM10K_TX_FREE_THRESH_DEFAULT(txq)  32
+#define FM10K_TX_FREE_THRESH_MIN(txq)      1
+#define FM10K_TX_FREE_THRESH_MAX(txq)      ((txq)->nb_desc - 3)
+#define FM10K_TX_FREE_THRESH_DIV(txq)      0
+
+#define FM10K_DEFAULT_RX_PTHRESH      8
+#define FM10K_DEFAULT_RX_HTHRESH      8
+#define FM10K_DEFAULT_RX_WTHRESH      0
+
+#define FM10K_DEFAULT_TX_PTHRESH      32
+#define FM10K_DEFAULT_TX_HTHRESH      0
+#define FM10K_DEFAULT_TX_WTHRESH      0
+
+#define FM10K_TX_RS_THRESH_DEFAULT(txq)    32
+#define FM10K_TX_RS_THRESH_MIN(txq)        1
+#define FM10K_TX_RS_THRESH_MAX(txq)        \
+	RTE_MIN(((txq)->nb_desc - 2), (txq)->free_thresh)
+#define FM10K_TX_RS_THRESH_DIV(txq)        ((txq)->nb_desc)
+
+#define FM10K_VLAN_TAG_SIZE 4
+
+struct fm10k_dev_info {
+	volatile uint32_t enable;
+	volatile uint32_t glort;
+	/* Protect the mailbox to avoid race condition */
+	rte_spinlock_t    mbx_lock;
+};
+
+/*
+ * Structure to store private data for each driver instance.
+ */
+struct fm10k_adapter {
+	struct fm10k_hw             hw;
+	struct fm10k_hw_stats       stats;
+	struct fm10k_dev_info       info;
+};
+
+#define FM10K_DEV_PRIVATE_TO_HW(adapter) \
+	(&((struct fm10k_adapter *)adapter)->hw)
+
+#define FM10K_DEV_PRIVATE_TO_STATS(adapter) \
+	(&((struct fm10k_adapter *)adapter)->stats)
+
+#define FM10K_DEV_PRIVATE_TO_INFO(adapter) \
+	(&((struct fm10k_adapter *)adapter)->info)
+
+#define FM10K_DEV_PRIVATE_TO_MBXLOCK(adapter) \
+	(&(((struct fm10k_adapter *)adapter)->info.mbx_lock))
+
+struct fm10k_rx_queue {
+	struct rte_mempool *mp;
+	struct rte_mbuf **sw_ring;
+	volatile union fm10k_rx_desc *hw_ring;
+	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+	struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
+	uint64_t hw_ring_phys_addr;
+	uint16_t next_dd;
+	uint16_t next_alloc;
+	uint16_t next_trigger;
+	uint16_t alloc_thresh;
+	volatile uint32_t *tail_ptr;
+	uint16_t nb_desc;
+	uint16_t queue_id;
+	uint8_t port_id;
+	uint8_t drop_en;
+	uint8_t rx_deferred_start; /**< don't start this queue in dev start. */
+};
+
+/*
+ * a FIFO is used to track which descriptors have their RS bit set for Tx
+ * queues which are configured to allow multiple descriptors per packet
+ */
+struct fifo {
+	uint16_t *list;
+	uint16_t *head;
+	uint16_t *tail;
+	uint16_t *endp;
+};
+
+struct fm10k_tx_queue {
+	struct rte_mbuf **sw_ring;
+	struct fm10k_tx_desc *hw_ring;
+	uint64_t hw_ring_phys_addr;
+	struct fifo rs_tracker;
+	uint16_t last_free;
+	uint16_t next_free;
+	uint16_t nb_free;
+	uint16_t nb_used;
+	uint16_t free_trigger;
+	uint16_t free_thresh;
+	uint16_t rs_thresh;
+	volatile uint32_t *tail_ptr;
+	uint16_t nb_desc;
+	uint8_t port_id;
+	uint8_t tx_deferred_start; /** < don't start this queue in dev start. */
+	uint16_t queue_id;
+};
+
+#define MBUF_DMA_ADDR(mb) \
+	((uint64_t) ((mb)->buf_physaddr + (mb)->data_off))
+
+/* enforce 512B alignment on default Rx DMA addresses */
+#define MBUF_DMA_ADDR_DEFAULT(mb) \
+	((uint64_t) RTE_ALIGN(((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM), 512))
+
+static inline void fifo_reset(struct fifo *fifo, uint32_t len)
+{
+	fifo->head = fifo->tail = fifo->list;
+	fifo->endp = fifo->list + len;
+}
+
+static inline void fifo_insert(struct fifo *fifo, uint16_t val)
+{
+	*fifo->head = val;
+	if (++fifo->head == fifo->endp)
+		fifo->head = fifo->list;
+}
+
+/* do not worry about list being empty since we only check it once we know
+ * we have used enough descriptors to set the RS bit at least once */
+static inline uint16_t fifo_peek(struct fifo *fifo)
+{
+	return *fifo->tail;
+}
+
+static inline uint16_t fifo_remove(struct fifo *fifo)
+{
+	uint16_t val;
+	val = *fifo->tail;
+	if (++fifo->tail == fifo->endp)
+		fifo->tail = fifo->list;
+	return val;
+}
+#endif
diff --git a/lib/librte_pmd_fm10k/fm10k_ethdev.c b/lib/librte_pmd_fm10k/fm10k_ethdev.c
new file mode 100644
index 0000000..0b75299
--- /dev/null
+++ b/lib/librte_pmd_fm10k/fm10k_ethdev.c
@@ -0,0 +1,343 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_string_fns.h>
+#include <rte_dev.h>
+#include <rte_spinlock.h>
+
+#include "fm10k.h"
+#include "base/fm10k_api.h"
+
+/* Default delay to acquire mailbox lock */
+#define FM10K_MBXLOCK_DELAY_US 20
+
+static void
+fm10k_mbx_initlock(struct fm10k_hw *hw)
+{
+	rte_spinlock_init(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
+}
+
+static void
+fm10k_mbx_lock(struct fm10k_hw *hw)
+{
+	while (!rte_spinlock_trylock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back)))
+		rte_delay_us(FM10K_MBXLOCK_DELAY_US);
+}
+
+static void
+fm10k_mbx_unlock(struct fm10k_hw *hw)
+{
+	rte_spinlock_unlock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
+}
+
+static int
+fm10k_dev_configure(struct rte_eth_dev *dev)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	if (dev->data->dev_conf.rxmode.hw_strip_crc == 0)
+		PMD_INIT_LOG(WARNING, "fm10k always strip CRC");
+
+	return 0;
+}
+
+static void
+fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	uint64_t ipackets, opackets, ibytes, obytes;
+	struct fm10k_hw *hw =
+		FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct fm10k_hw_stats *hw_stats =
+		FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+	int i;
+
+	PMD_INIT_FUNC_TRACE();
+
+	fm10k_update_hw_stats(hw, hw_stats);
+
+	ipackets = opackets = ibytes = obytes = 0;
+	for (i = 0; (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) &&
+		(i < FM10K_MAX_QUEUES_PF); ++i) {
+		stats->q_ipackets[i] = hw_stats->q[i].rx_packets.count;
+		stats->q_opackets[i] = hw_stats->q[i].tx_packets.count;
+		stats->q_ibytes[i]   = hw_stats->q[i].rx_bytes.count;
+		stats->q_obytes[i]   = hw_stats->q[i].tx_bytes.count;
+		ipackets += stats->q_ipackets[i];
+		opackets += stats->q_opackets[i];
+		ibytes   += stats->q_ibytes[i];
+		obytes   += stats->q_obytes[i];
+	}
+	stats->ipackets = ipackets;
+	stats->opackets = opackets;
+	stats->ibytes = ibytes;
+	stats->obytes = obytes;
+}
+
+static void
+fm10k_stats_reset(struct rte_eth_dev *dev)
+{
+	struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct fm10k_hw_stats *hw_stats =
+		FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+
+	PMD_INIT_FUNC_TRACE();
+
+	memset(hw_stats, 0, sizeof(*hw_stats));
+	fm10k_rebind_hw_stats(hw, hw_stats);
+}
+
+/* Mailbox message handler in VF */
+static const struct fm10k_msg_data fm10k_msgdata_vf[] = {
+	FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
+	FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_msg_mac_vlan_vf),
+	FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_msg_lport_state_vf),
+	FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
+};
+
+/* Mailbox message handler in PF */
+static const struct fm10k_msg_data fm10k_msgdata_pf[] = {
+	FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
+	FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
+	FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_msg_lport_map_pf),
+	FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
+	FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
+	FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_msg_update_pvid_pf),
+	FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
+};
+
+static int
+fm10k_setup_mbx_service(struct fm10k_hw *hw)
+{
+	int err;
+
+	/* Initialize mailbox lock */
+	fm10k_mbx_initlock(hw);
+
+	/* Replace default message handler with new ones */
+	if (hw->mac.type == fm10k_mac_pf)
+		err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_pf);
+	else
+		err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_vf);
+
+	if (err) {
+		PMD_INIT_LOG(ERR, "Failed to register mailbox handler.err:%d",
+				err);
+		return err;
+	}
+	/* Connect to SM for PF device or PF for VF device */
+	return hw->mbx.ops.connect(hw, &hw->mbx);
+}
+
+static struct eth_dev_ops fm10k_eth_dev_ops = {
+	.dev_configure		= fm10k_dev_configure,
+	.stats_get		= fm10k_stats_get,
+	.stats_reset		= fm10k_stats_reset,
+};
+
+static int
+eth_fm10k_dev_init(__rte_unused struct eth_driver *eth_drv,
+	struct rte_eth_dev *dev)
+{
+	struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int diag;
+
+	PMD_INIT_FUNC_TRACE();
+
+	dev->dev_ops = &fm10k_eth_dev_ops;
+
+	/* only initialize in the primary process */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	/* Vendor and Device ID need to be set before init of shared code */
+	memset(hw, 0, sizeof(*hw));
+	hw->device_id = dev->pci_dev->id.device_id;
+	hw->vendor_id = dev->pci_dev->id.vendor_id;
+	hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id;
+	hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id;
+	hw->revision_id = 0;
+	hw->hw_addr = (void *)dev->pci_dev->mem_resource[0].addr;
+	if (hw->hw_addr == NULL) {
+		PMD_INIT_LOG(ERR, "Bad mem resource."
+			" Try to blacklist unused devices.");
+		return -EIO;
+	}
+
+	/* Store fm10k_adapter pointer */
+	hw->back = dev->data->dev_private;
+
+	/* Initialize the shared code */
+	diag = fm10k_init_shared_code(hw);
+	if (diag != FM10K_SUCCESS) {
+		PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
+		return -EIO;
+	}
+
+	/*
+	 * Inialize bus info. Normally we would call fm10k_get_bus_info(), but
+	 * there is no way to get link status without reading BAR4.  Until this
+	 * works, assume we have maximum bandwidth.
+	 * @todo - fix bus info
+	 */
+	hw->bus_caps.speed = fm10k_bus_speed_8000;
+	hw->bus_caps.width = fm10k_bus_width_pcie_x8;
+	hw->bus_caps.payload = fm10k_bus_payload_512;
+	hw->bus.speed = fm10k_bus_speed_8000;
+	hw->bus.width = fm10k_bus_width_pcie_x8;
+	hw->bus.payload = fm10k_bus_payload_256;
+
+	/* Initialize the hw */
+	diag = fm10k_init_hw(hw);
+	if (diag != FM10K_SUCCESS) {
+		PMD_INIT_LOG(ERR, "Hardware init failed: %d", diag);
+		return -EIO;
+	}
+
+	/* Initialize MAC address(es) */
+	dev->data->mac_addrs = rte_zmalloc("fm10k", ETHER_ADDR_LEN, 0);
+	if (dev->data->mac_addrs == NULL) {
+		PMD_INIT_LOG(ERR, "Cannot allocate memory for MAC addresses");
+		return -ENOMEM;
+	}
+
+	diag = fm10k_read_mac_addr(hw);
+	if (diag != FM10K_SUCCESS) {
+		/*
+		 * TODO: remove special handling on VF. Need shared code to
+		 * fix first.
+		 */
+		if (hw->mac.type == fm10k_mac_pf) {
+			PMD_INIT_LOG(ERR, "Read MAC addr failed: %d", diag);
+			return -EIO;
+		} else {
+			/* Generate a random addr */
+			eth_random_addr(hw->mac.addr);
+			memcpy(hw->mac.perm_addr, hw->mac.addr, ETH_ALEN);
+		}
+	}
+
+	ether_addr_copy((const struct ether_addr *)hw->mac.addr,
+			&dev->data->mac_addrs[0]);
+
+	/* Reset the hw statistics */
+	fm10k_stats_reset(dev);
+
+	/* Reset the hw */
+	diag = fm10k_reset_hw(hw);
+	if (diag != FM10K_SUCCESS) {
+		PMD_INIT_LOG(ERR, "Hardware reset failed: %d", diag);
+		return -EIO;
+	}
+
+	/* Setup mailbox service */
+	diag = fm10k_setup_mbx_service(hw);
+	if (diag != FM10K_SUCCESS) {
+		PMD_INIT_LOG(ERR, "Failed to setup mailbox: %d", diag);
+		return -EIO;
+	}
+
+	/*
+	 * Below function will trigger operations on mailbox, acquire lock to
+	 * avoid race condition from interrupt handler. Operations on mailbox
+	 * FIFO will trigger interrupt to PF/SM, in which interrupt handler
+	 * will handle and generate an interrupt to our side. Then,  FIFO in
+	 * mailbox will be touched.
+	 */
+	fm10k_mbx_lock(hw);
+	/* Enable port first */
+	hw->mac.ops.update_lport_state(hw, 0, 0, 1);
+
+	/* Update default vlan */
+	hw->mac.ops.update_vlan(hw, hw->mac.default_vid, 0, true);
+
+	/*
+	 * Add default mac/vlan filter. glort is assigned by SM for PF, while is
+	 * unused for VF. PF will assign correct glort for VF.
+	 */
+	hw->mac.ops.update_uc_addr(hw, hw->mac.dglort_map, hw->mac.addr,
+			      hw->mac.default_vid, 1, 0);
+
+	/* Set unicast mode by default. App can change to other mode in other
+	 * API func.
+	 */
+	hw->mac.ops.update_xcast_mode(hw, hw->mac.dglort_map,
+					FM10K_XCAST_MODE_MULTI);
+
+	fm10k_mbx_unlock(hw);
+
+	return 0;
+}
+
+/*
+ * The set of PCI devices this driver supports. This driver will enable both PF
+ * and SRIOV-VF devices.
+ */
+static struct rte_pci_id pci_id_fm10k_map[] = {
+#define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev) { RTE_PCI_DEVICE(vend, dev) },
+#include "rte_pci_dev_ids.h"
+	{ .vendor_id = 0, /* sentinel */ },
+};
+
+static struct eth_driver rte_pmd_fm10k = {
+	{
+		.name = "rte_pmd_fm10k",
+		.id_table = pci_id_fm10k_map,
+		.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+	},
+	.eth_dev_init = eth_fm10k_dev_init,
+	.dev_private_size = sizeof(struct fm10k_adapter),
+};
+
+/*
+ * Driver initialization routine.
+ * Invoked once at EAL init time.
+ * Register itself as the [Poll Mode] Driver of PCI FM10K devices.
+ */
+static int
+rte_pmd_fm10k_init(__rte_unused const char *name,
+	__rte_unused const char *params)
+{
+	PMD_INIT_FUNC_TRACE();
+	rte_eth_driver_register(&rte_pmd_fm10k);
+	return 0;
+}
+
+static struct rte_driver rte_fm10k_driver = {
+	.type = PMD_PDEV,
+	.init = rte_pmd_fm10k_init,
+};
+
+PMD_REGISTER_DRIVER(rte_fm10k_driver);
diff --git a/lib/librte_pmd_fm10k/fm10k_logs.h b/lib/librte_pmd_fm10k/fm10k_logs.h
new file mode 100644
index 0000000..febd796
--- /dev/null
+++ b/lib/librte_pmd_fm10k/fm10k_logs.h
@@ -0,0 +1,78 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FM10K_LOGS_H_
+#define _FM10K_LOGS_H_
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, RTE_LOGTYPE_PMD, \
+		"PMD: %s(): " fmt "\n", __func__, ##args)
+
+#ifdef RTE_LIBRTE_FM10K_DEBUG_INIT
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+#else
+#define PMD_INIT_FUNC_TRACE() do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_FM10K_DEBUG_RX
+#define PMD_RX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#else
+#define PMD_RX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_FM10K_DEBUG_TX
+#define PMD_TX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#else
+#define PMD_TX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_FM10K_DEBUG_TX_FREE
+#define PMD_TX_FREE_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#else
+#define PMD_TX_FREE_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_FM10K_DEBUG_DRIVER
+#define PMD_DRV_LOG_RAW(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt, __func__, ## args)
+#else
+#define PMD_DRV_LOG_RAW(level, fmt, args...) do { } while (0)
+#endif
+
+#define PMD_DRV_LOG(level, fmt, args...) \
+	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)
+
+#endif /* _FM10K_LOGS_H_ */
diff --git a/lib/librte_pmd_fm10k/rte_pmd_fm10k_version.map b/lib/librte_pmd_fm10k/rte_pmd_fm10k_version.map
new file mode 100644
index 0000000..ef35398
--- /dev/null
+++ b/lib/librte_pmd_fm10k/rte_pmd_fm10k_version.map
@@ -0,0 +1,4 @@
+DPDK_2.0 {
+
+	local: *;
+};
-- 
1.7.7.6

^ permalink raw reply	[relevance 1%]

* [dpdk-dev] [PATCH v6 00/16] lib/librte_pmd_fm10k : fm10k pmd driver
  @ 2015-02-17 14:18  3% ` Chen Jing D(Mark)
  2015-02-17 14:18  1%   ` [dpdk-dev] [PATCH v6 03/16] fm10k: register fm10k pmd PF driver Chen Jing D(Mark)
  2015-02-18  0:13  0%   ` [dpdk-dev] [PATCH v6 00/16] lib/librte_pmd_fm10k : fm10k pmd driver Thomas Monjalon
  0 siblings, 2 replies; 200+ results
From: Chen Jing D(Mark) @ 2015-02-17 14:18 UTC (permalink / raw)
  To: dev

From: "Chen Jing D(Mark)" <jing.d.chen@intel.com>

The patch set add poll mode driver for the host interface of Intel
Ethernet Switch FM10000 Series of silicons, which integrate NIC and
switch functionalities. The patch set include below features:

1. Basic RX/TX functions for PF/VF.
2. Interrupt handling mechanism for PF/VF.
3. per queue start/stop functions for PF/VF.
4. Mailbox handling between PF/VF and PF/Switch Manager.
5. Receive Side Scaling (RSS) for PF/VF.
6. Scatter receive function for PF/VF.
7. reta update/query for PF/VF.
8. VLAN filter set for PF.
9. Link status query for PF/VF.

Change in v6:
- Merge ABI patch with fm10k driver regsiter patch.
- Fix typo.
- Rework comments.
- Minor adjustment on commit log.
- Increase error variable after mbuf allocation failed.

Change in v5:
- Add sanity check for mbuf allocation.
- Add a new patch to claim fm10k driver review
- Change commit log.
- Add unlikely in func rx_desc_to_ol_flags to gain performance
- Add a new patch to add ABI version

Change in v4:
- Change commit log to remove improper words.

Changes in v3:
- Update base driver.
- Define several macros to pass base driver compile.

Changes in v2:
- Merge 3 patches into 1 to configure fm10k compile environment.
- Rework on log code to follow style in ixgbe.
- Rework log message, remove redundant '\n'
- Update Copyright year from "2014" to "2015"
- Change base driver directory name from SHARED to base
- Add more description in log for patch "add PF and VF interrupt"
- Merge 2 patches into 1 to register fm10k driver
- Define macro to replace numeric for lower 32-bit mask.

Chen Jing D(Mark) (1):
  maintainers: claim for fm10k review

Jeff Shaw (15):
  fm10k: add base driver
  eal: add fm10k device id
  fm10k: register fm10k pmd PF driver
  config: change config files to add fm10k into compile
  fm10k: add reta update/requery functions
  fm10k: add Rx queue setup/release function
  fm10k: add Tx queue setup/release function
  fm10k: add Rx/Tx single queue start/stop function
  fm10k: add dev start/stop functions
  fm10k: add receive and tranmit function
  fm10k: add PF RSS support
  fm10k: add scatter receive function
  fm10k: add function to set vlan
  fm10k: add SRIOV-VF support
  fm10k: add PF and VF interrupt handling function

 MAINTAINERS                                     |    4 +
 config/common_bsdapp                            |   11 +
 config/common_linuxapp                          |   11 +
 lib/Makefile                                    |    1 +
 lib/librte_eal/common/include/rte_pci_dev_ids.h |   22 +
 lib/librte_pmd_fm10k/Makefile                   |  100 +
 lib/librte_pmd_fm10k/base/fm10k_api.c           |  341 ++++
 lib/librte_pmd_fm10k/base/fm10k_api.h           |   61 +
 lib/librte_pmd_fm10k/base/fm10k_common.c        |  572 ++++++
 lib/librte_pmd_fm10k/base/fm10k_common.h        |   52 +
 lib/librte_pmd_fm10k/base/fm10k_mbx.c           | 2185 +++++++++++++++++++++++
 lib/librte_pmd_fm10k/base/fm10k_mbx.h           |  329 ++++
 lib/librte_pmd_fm10k/base/fm10k_osdep.h         |  148 ++
 lib/librte_pmd_fm10k/base/fm10k_pf.c            | 1992 +++++++++++++++++++++
 lib/librte_pmd_fm10k/base/fm10k_pf.h            |  155 ++
 lib/librte_pmd_fm10k/base/fm10k_tlv.c           |  914 ++++++++++
 lib/librte_pmd_fm10k/base/fm10k_tlv.h           |  199 ++
 lib/librte_pmd_fm10k/base/fm10k_type.h          |  937 ++++++++++
 lib/librte_pmd_fm10k/base/fm10k_vf.c            |  641 +++++++
 lib/librte_pmd_fm10k/base/fm10k_vf.h            |   91 +
 lib/librte_pmd_fm10k/fm10k.h                    |  292 +++
 lib/librte_pmd_fm10k/fm10k_ethdev.c             | 1867 +++++++++++++++++++
 lib/librte_pmd_fm10k/fm10k_logs.h               |   78 +
 lib/librte_pmd_fm10k/fm10k_rxtx.c               |  462 +++++
 lib/librte_pmd_fm10k/rte_pmd_fm10k_version.map  |    4 +
 mk/rte.app.mk                                   |    4 +
 26 files changed, 11473 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_pmd_fm10k/Makefile
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_api.c
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_api.h
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_common.c
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_common.h
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_mbx.c
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_mbx.h
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_osdep.h
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_pf.c
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_pf.h
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_tlv.c
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_tlv.h
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_type.h
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_vf.c
 create mode 100644 lib/librte_pmd_fm10k/base/fm10k_vf.h
 create mode 100644 lib/librte_pmd_fm10k/fm10k.h
 create mode 100644 lib/librte_pmd_fm10k/fm10k_ethdev.c
 create mode 100644 lib/librte_pmd_fm10k/fm10k_logs.h
 create mode 100644 lib/librte_pmd_fm10k/fm10k_rxtx.c
 create mode 100644 lib/librte_pmd_fm10k/rte_pmd_fm10k_version.map

-- 
1.7.7.6

^ permalink raw reply	[relevance 3%]

* Re: [dpdk-dev] [PATCH v2] doc: Add requirements for x32 ABI
  2015-02-16 16:27 11% ` [dpdk-dev] [PATCH v2] " Daniel Mrzyglod
@ 2015-02-16 16:29  4%   ` De Lara Guarch, Pablo
  2015-02-18 19:33  4%     ` Thomas Monjalon
  0 siblings, 1 reply; 200+ results
From: De Lara Guarch, Pablo @ 2015-02-16 16:29 UTC (permalink / raw)
  To: Mrzyglod, DanielX T, dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Daniel Mrzyglod
> Sent: Monday, February 16, 2015 4:27 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2] doc: Add requirements for x32 ABI
> 
> This patch add requirements about compiler and distribution support.
> 
> v2:
> spelling fixes
> 
> Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com>
> ---
>  doc/guides/linux_gsg/sys_reqs.rst | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/guides/linux_gsg/sys_reqs.rst
> b/doc/guides/linux_gsg/sys_reqs.rst
> index 8e2307b..ef4196e 100644
> --- a/doc/guides/linux_gsg/sys_reqs.rst
> +++ b/doc/guides/linux_gsg/sys_reqs.rst
> @@ -62,7 +62,7 @@ Compilation of the DPDK
>  *   coreutils:  cmp, sed, grep, arch
> 
>  *   gcc: versions 4.5.x or later is recommended for i686/x86_64. versions 4.8.x
> or later is recommanded
> -    for ppc_64. On some distributions, some specific compiler flags and linker
> flags are enabled by
> +    for ppc_64 and x86_x32 ABI. On some distributions, some specific
> compiler flags and linker flags are enabled by
>      default and affect performance (- fstack-protector, for example). Please
> refer to the documentation
>      of your distribution and to gcc -dumpspecs.
> 
> @@ -78,7 +78,14 @@ Compilation of the DPDK
> 
>      glibc.ppc64, libgcc.ppc64, libstdc++.ppc64 and glibc-devel.ppc64 for IBM
> ppc_64;
> 
> -*   Python, version 2.6 or 2.7, to use various helper scripts included in the
> DPDK package
> +.. note::
> +
> +    x86_x32 ABI is currently supported with distribution packages only on
> Ubuntu
> +    higher than 13.10 or recent debian distribution. The only supported
> compiler is gcc 4.8+.
> +
> +.. note::
> +
> +    Python, version 2.6 or 2.7, to use various helper scripts included in the
> DPDK package
> 
> 
>  **Optional Tools:**
> --
> 2.1.0

Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>

Thanks Daniel!

^ permalink raw reply	[relevance 4%]

* [dpdk-dev] [PATCH v2] doc: Add requirements for x32 ABI
    2015-02-16 16:09  4% ` De Lara Guarch, Pablo
@ 2015-02-16 16:27 11% ` Daniel Mrzyglod
  2015-02-16 16:29  4%   ` De Lara Guarch, Pablo
  1 sibling, 1 reply; 200+ results
From: Daniel Mrzyglod @ 2015-02-16 16:27 UTC (permalink / raw)
  To: dev

This patch add requirements about compiler and distribution support.

v2:
spelling fixes

Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com>
---
 doc/guides/linux_gsg/sys_reqs.rst | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/guides/linux_gsg/sys_reqs.rst b/doc/guides/linux_gsg/sys_reqs.rst
index 8e2307b..ef4196e 100644
--- a/doc/guides/linux_gsg/sys_reqs.rst
+++ b/doc/guides/linux_gsg/sys_reqs.rst
@@ -62,7 +62,7 @@ Compilation of the DPDK
 *   coreutils:  cmp, sed, grep, arch
 
 *   gcc: versions 4.5.x or later is recommended for i686/x86_64. versions 4.8.x or later is recommanded
-    for ppc_64. On some distributions, some specific compiler flags and linker flags are enabled by
+    for ppc_64 and x86_x32 ABI. On some distributions, some specific compiler flags and linker flags are enabled by
     default and affect performance (- fstack-protector, for example). Please refer to the documentation
     of your distribution and to gcc -dumpspecs.
 
@@ -78,7 +78,14 @@ Compilation of the DPDK
 
     glibc.ppc64, libgcc.ppc64, libstdc++.ppc64 and glibc-devel.ppc64 for IBM ppc_64;
 
-*   Python, version 2.6 or 2.7, to use various helper scripts included in the DPDK package
+.. note::
+
+    x86_x32 ABI is currently supported with distribution packages only on Ubuntu
+    higher than 13.10 or recent debian distribution. The only supported  compiler is gcc 4.8+.
+
+.. note::
+
+    Python, version 2.6 or 2.7, to use various helper scripts included in the DPDK package
 
 
 **Optional Tools:**
-- 
2.1.0

^ permalink raw reply	[relevance 11%]

* Re: [dpdk-dev] [PATCH] doc: Add requirements for x32 ABI
  @ 2015-02-16 16:09  4% ` De Lara Guarch, Pablo
  2015-02-16 16:27 11% ` [dpdk-dev] [PATCH v2] " Daniel Mrzyglod
  1 sibling, 0 replies; 200+ results
From: De Lara Guarch, Pablo @ 2015-02-16 16:09 UTC (permalink / raw)
  To: Mrzyglod, DanielX T, dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Daniel Mrzyglod
> Sent: Friday, February 13, 2015 3:58 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] doc: Add requirements for x32 ABI
> 
> This patch add requirements about compiler and distribution support.
> 
> Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com>
> ---
>  doc/guides/linux_gsg/sys_reqs.rst | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/guides/linux_gsg/sys_reqs.rst
> b/doc/guides/linux_gsg/sys_reqs.rst
> index 8e2307b..ef4196e 100644
> --- a/doc/guides/linux_gsg/sys_reqs.rst
> +++ b/doc/guides/linux_gsg/sys_reqs.rst
> @@ -62,7 +62,7 @@ Compilation of the DPDK
>  *   coreutils:  cmp, sed, grep, arch
> 
>  *   gcc: versions 4.5.x or later is recommended for i686/x86_64. versions 4.8.x
> or later is recommanded
> -    for ppc_64. On some distributions, some specific compiler flags and linker
> flags are enabled by
> +    for ppc_64 and x86_x32 ABI. On some distributions, some specific
> compiler flags and linker flags are enabled by
>      default and affect performance (- fstack-protector, for example). Please
> refer to the documentation
>      of your distribution and to gcc -dumpspecs.
> 
> @@ -78,7 +78,14 @@ Compilation of the DPDK
> 
>      glibc.ppc64, libgcc.ppc64, libstdc++.ppc64 and glibc-devel.ppc64 for IBM
> ppc_64;
> 
> -*   Python, version 2.6 or 2.7, to use various helper scripts included in the
> DPDK package
> +.. note::
> +
> +    x86_x32 ABI is currently supported with distribution packages only on
> Ubuntu
> +    higher then 13.10 or recent debian distribution. The only supported
> compiler is gcc 4.8+.

Typo here: "then" -> "than"

> +
> +.. note::
> +
> +    Python, version 2.6 or 2.7, to use various helper scripts included in the
> DPDK package
> 
> 
>  **Optional Tools:**
> --
> 2.1.0

^ permalink raw reply	[relevance 4%]

* Re: [dpdk-dev] [PULL REQUEST] fm10k: new polling mode driver for PF/VF.
  @ 2015-02-16 12:24  0% ` Thomas Monjalon
  0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2015-02-16 12:24 UTC (permalink / raw)
  To: Chen Jing D(Mark); +Cc: dev

Hi,

2015-02-16 18:18, Chen Jing D:
> These changes add poll mode driver for the host interface of Intel
> Ethernet Switch FM10000 Series of silicons, which integrate NIC and
> switch functionalities. The patch set include below features:
> 
> 1. Basic RX/TX functions for PF/VF.
> 2. Interrupt handling mechanism for PF/VF.
> 3. per queue start/stop functions for PF/VF.
> 4. Mailbox handling between PF/VF and PF/Switch Manager.
> 5. Receive Side Scaling (RSS) for PF/VF.
> 6. Scatter receive function for PF/VF.
> 7. reta update/query for PF/VF.
> 8. VLAN filter set for PF.
> 9. Link status query for PF/VF.
> 
> The following changes since commit f2c5125a686ab64034925dabafea0877d1e5857e:
> 
>   app/testpmd: use default Rx/Tx port configuration (2015-02-14 11:35:25 +0100)
> 
> are available in the git repository at:
> 
>   jing@dpdk.org:dpdk-fm10k-next.git master
> 
> for you to fetch changes up to 1b073a75d5e809f10c0a71cbc755b02045bf8783:
> 
>   fm10k: Add ABI version of librte_pmd_fm10k (2015-02-16 03:46:00 -0500)

It seems you are requesting to pull the v5, right?
I think there were some comments from David which are not adressed.

Thanks for checking them

^ permalink raw reply	[relevance 0%]

Results 13601-13800 of ~18000   |  | reverse | sort options + mbox downloads above
-- links below jump to the message on this page --
2014-12-22 16:47     [dpdk-dev] [PATCH RFC 0/3] DPDK ethdev callback support Bruce Richardson
2015-02-18 17:42  4% ` [dpdk-dev] [PATCH v3 " John McNamara
2015-02-19 17:56  4%   ` [dpdk-dev] [PATCH v4 " John McNamara
2015-02-20 17:03  4%   ` [dpdk-dev] [PATCH v5 " John McNamara
2015-02-23 18:30  4%   ` [dpdk-dev] [PATCH v6 " John McNamara
2015-02-23 23:39  0%     ` Thomas Monjalon
2014-12-27 23:13     [dpdk-dev] [RFC] resolve conflict between net/ethernet.h and rte_ethdev.h Stephen Hemminger
2015-01-06 10:44     ` Thomas Monjalon
2015-03-04 23:16       ` Thomas Monjalon
2015-03-10 13:29  3%     ` Thomas Monjalon
2015-03-10 15:46  0%       ` Stephen Hemminger
2015-01-30 17:38     [dpdk-dev] [PATCH 0/8] Improve build process Gonzalez Monroy, Sergio
2015-01-30 18:12     ` Neil Horman
2015-02-11 11:11       ` Gonzalez Monroy, Sergio
2015-02-12  9:22         ` Panu Matilainen
2015-02-12 10:03           ` Gonzalez Monroy, Sergio
2015-02-12 12:23             ` Neil Horman
2015-02-12 14:07               ` Panu Matilainen
2015-02-12 15:52                 ` Neil Horman
2015-02-13 10:14                   ` Panu Matilainen
2015-02-13 11:08                     ` Gonzalez Monroy, Sergio
2015-02-13 12:51                       ` Neil Horman
2015-02-20 14:31  0%                     ` Gonzalez Monroy, Sergio
2015-02-22 23:37  0%                       ` Neil Horman
2015-02-23 10:25  0%                         ` Gonzalez Monroy, Sergio
2015-02-23 13:52  0%                           ` Neil Horman
2015-02-23 14:58  0%                             ` Gonzalez Monroy, Sergio
2015-02-23 18:23  0%                               ` Neil Horman
2015-02-24 13:24  0%                                 ` Gonzalez Monroy, Sergio
2015-01-30 21:16     [dpdk-dev] [PATCH] ABI: Add abi checking utility Neil Horman
2015-02-02 18:18     ` [dpdk-dev] [PATCH v2] " Neil Horman
2015-02-27 13:48  5%   ` Neil Horman
2015-02-27 13:55  5%     ` Thomas Monjalon
2015-03-03 22:18 10%   ` Thomas Monjalon
2015-03-04 11:49  8%     ` Neil Horman
2015-03-04 12:54  5%       ` Thomas Monjalon
2015-03-04 14:39  5%         ` Neil Horman
2015-03-04 15:15  5%           ` Thomas Monjalon
2015-03-04 15:42  5%             ` Neil Horman
2015-03-04 16:15  5%               ` Thomas Monjalon
2015-03-04 16:26 17% ` [dpdk-dev] [PATCH v3] " Neil Horman
2015-03-04 16:49  9%   ` Thomas Monjalon
2015-03-05 16:57  5%     ` Neil Horman
2015-03-11 19:36  5%       ` Neil Horman
2015-03-13  8:51  5%         ` Thomas Monjalon
2015-03-13 11:56  5%   ` Kavanagh, Mark B
2015-03-13 14:10  5%     ` Neil Horman
2015-03-13 14:25  5%       ` Kavanagh, Mark B
2015-03-13 14:58  5%         ` Neil Horman
2015-03-13 15:49  5%           ` Kavanagh, Mark B
2015-03-13 14:09 17% ` [dpdk-dev] [PATCH v4] " Neil Horman
2015-03-17 15:42  5%   ` Thomas Monjalon
2015-03-17 16:47  9%     ` Thomas Monjalon
2015-03-17 18:08  9%     ` Neil Horman
2015-03-17 18:08 29% ` [dpdk-dev] [PATCH v5] " Neil Horman
2015-03-17 21:17  5%   ` Thomas Monjalon
2015-02-09  8:30     [dpdk-dev] [PATCH v7 01/14] eal_pci: Add flag to hold kernel driver type Tetsuya Mukawa
2015-02-17  0:36     ` [dpdk-dev] [PATCH v8 03/14] eal/pci, ethdev: Remove assumption that port will not be detached Thomas Monjalon
2015-02-17  6:14       ` Tetsuya Mukawa
2015-02-18  0:31  3%     ` Thomas Monjalon
2015-02-18  1:54  0%       ` Tetsuya Mukawa
2015-02-18  6:10  0%         ` Tetsuya Mukawa
2015-02-18  9:27  0%           ` Iremonger, Bernard
2015-02-18  9:57  0%           ` Thomas Monjalon
2015-02-18 10:03  3%             ` Bruce Richardson
2015-02-18 10:58  0%               ` Tetsuya Mukawa
2015-02-18 12:23  0%                 ` Bruce Richardson
2015-02-18 12:38  0%                   ` Tetsuya Mukawa
2015-02-18 12:33  0%                 ` Iremonger, Bernard
2015-02-18 12:41  0%                   ` Tetsuya Mukawa
2015-02-13  8:19     [dpdk-dev] [PATCH v5 01/17] fm10k: add base driver Chen Jing D(Mark)
2015-02-17 14:18  3% ` [dpdk-dev] [PATCH v6 00/16] lib/librte_pmd_fm10k : fm10k pmd driver Chen Jing D(Mark)
2015-02-17 14:18  1%   ` [dpdk-dev] [PATCH v6 03/16] fm10k: register fm10k pmd PF driver Chen Jing D(Mark)
2015-02-18  0:13  0%   ` [dpdk-dev] [PATCH v6 00/16] lib/librte_pmd_fm10k : fm10k pmd driver Thomas Monjalon
2015-02-13 15:58     [dpdk-dev] [PATCH] doc: Add requirements for x32 ABI Daniel Mrzyglod
2015-02-16 16:09  4% ` De Lara Guarch, Pablo
2015-02-16 16:27 11% ` [dpdk-dev] [PATCH v2] " Daniel Mrzyglod
2015-02-16 16:29  4%   ` De Lara Guarch, Pablo
2015-02-18 19:33  4%     ` Thomas Monjalon
2015-02-16 10:18     [dpdk-dev] [PULL REQUEST] fm10k: new polling mode driver for PF/VF Chen Jing D(Mark)
2015-02-16 12:24  0% ` Thomas Monjalon
2015-02-17 13:47     [dpdk-dev] [PATCH v3 0/5] Interrupt mode PMD Zhou Danny
2015-02-17 13:47     ` [dpdk-dev] [PATCH v3 1/5] ethdev: add rx interrupt enable/disable functions Zhou Danny
2015-02-17 15:54  3%   ` Neil Horman
2015-02-19  7:58  3%     ` Zhou, Danny
2015-02-19 13:02  3%       ` Neil Horman
2015-02-17 13:47     ` [dpdk-dev] [PATCH v3 4/5] eal: add per rx queue interrupt handling based on VFIO Zhou Danny
2015-02-17 15:58       ` Neil Horman
2015-02-19  8:10  3%     ` Zhou, Danny
2015-02-19 13:04  3%       ` Neil Horman
2015-02-18 11:02  1% [dpdk-dev] [RFC PATCH] lib/librte_ethdev: Expand port identifier Tetsuya Mukawa
2015-02-18 12:30  0% ` Bruce Richardson
2015-02-18 12:31  0%   ` Bruce Richardson
2015-02-18 13:05  0%     ` Wodkowski, PawelX
2015-02-18 14:10  0%       ` Bruce Richardson
2015-02-18 13:10  0%     ` Marc Sune
2015-02-18 13:49  0%       ` Bruce Richardson
     [not found]     <2601191342CEEE43887BDE71AB977258213E4175@irsmsx105.ger.corp.intel.com>
2015-02-09 10:22     ` [dpdk-dev] [PATCH] x32 ABI support, first iteration Ananyev, Konstantin
2015-02-12 13:18       ` De Lara Guarch, Pablo
2015-02-18 19:32  4%     ` Thomas Monjalon
2015-02-19 13:48  3% [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Zhou Danny
2015-02-19 13:48  3% ` [dpdk-dev] [PATCH v4 1/5] ethdev: add rx interrupt enable/disable functions Zhou Danny
2015-02-20  8:50  0% ` [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Gonzalez Monroy, Sergio
2015-02-23 16:55  3% [dpdk-dev] [PATCH v5 0/6] " Zhou Danny
2015-02-23 16:55  3% ` [dpdk-dev] [PATCH v5 1/6] ethdev: add rx interrupt enable/disable functions Zhou Danny
2015-02-27  4:56  3% ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Cunming Liang
2015-02-27  4:56  2%   ` [dpdk-dev] [PATCH v6 5/8] ethdev: add rx interrupt enable/disable functions Cunming Liang
2015-02-27  8:00  0%   ` [dpdk-dev] [PATCH v6 0/8] Interrupt mode PMD Liu, Yong
2015-05-05  5:39  3%   ` [dpdk-dev] From: Cunming Liang <cunming.liang@intel.com> Cunming Liang
2015-05-05  5:39  2%     ` [dpdk-dev] [PATCH v7 07/10] ethdev: add rx intr enable, disable and ctl functions Cunming Liang
2015-05-21  8:55  2%     ` [dpdk-dev] [PATCH v8 00/11] Interrupt mode PMD Cunming Liang
2015-05-21  8:55           ` [dpdk-dev] [PATCH v8 01/11] eal/linux: add interrupt vectors support in intr_handle Cunming Liang
2015-05-21 10:32  3%         ` Neil Horman
     [not found]               ` <20150521104300.00757b4e@urahara>
2015-05-21 17:58  4%             ` Neil Horman
2015-05-21 18:21  3%               ` Stephen Hemminger
     [not found]                   ` <20150521111400.2a04a196@urahara>
2015-05-22  0:05  4%                 ` Neil Horman
     [not found]                     ` <40594e9e6e0543afa11e4dbd90e59b22@BRMWP-EXMB11.corp.brocade.com>
2015-05-22 16:52  5%                   ` Stephen Hemminger
2015-05-27 10:33  4%                     ` Neil Horman
2015-05-21  8:56  2%       ` [dpdk-dev] [PATCH v8 08/11] ethdev: add rx intr enable, disable and ctl functions Cunming Liang
2015-05-29  8:45  4%       ` [dpdk-dev] [PATCH v9 00/12] Interrupt mode PMD Cunming Liang
2015-05-29  8:45  2%         ` [dpdk-dev] [PATCH v9 08/12] ethdev: add rx intr enable, disable and ctl functions Cunming Liang
2015-05-29  8:45 11%         ` [dpdk-dev] [PATCH v9 12/12] abi: fix v2.1 abi broken issue Cunming Liang
2015-05-05  5:53  3%   ` [dpdk-dev] [PATCH v7 00/10] Interrupt mode PMD Cunming Liang
2015-02-24 21:50  4% [dpdk-dev] closing version 2.0.0-rc1 Thomas Monjalon
2015-02-25  0:56  0% ` Stephen Hemminger
2015-02-27  0:42  0% ` Zhang, Helin
2015-02-26  0:12  4% [dpdk-dev] [dpdk-announce] release candidate 2.0.0-rc1 Thomas Monjalon
2015-02-27 13:11     [dpdk-dev] [PATCH v4 00/18] unified packet type Helin Zhang
2015-05-22  8:44  2% ` [dpdk-dev] [PATCH v5 " Helin Zhang
2015-05-22  8:44       ` [dpdk-dev] [PATCH v5 01/18] mbuf: redefine packet_type in rte_mbuf Helin Zhang
2015-05-22 10:09  3%     ` Neil Horman
2015-05-22  8:44  3%   ` [dpdk-dev] [PATCH v5 18/18] mbuf: remove old packet type bit masks Helin Zhang
2015-02-28  7:36     [dpdk-dev] Error seen while compiling Pktgen-dpdk Shankari Vaidyalingam
2015-02-28 14:00  3% ` Neil Horman
2015-02-28 18:20  0%   ` Wiles, Keith
2015-03-12 16:54     [dpdk-dev] [PATCH] ethdev: additional parameter in RX callback John McNamara
2015-03-12 19:15  3% ` Neil Horman
2015-03-13  9:41  0%   ` Bruce Richardson
2015-03-13 13:45  0%     ` Neil Horman
2015-03-13 14:50  0%       ` Bruce Richardson
2015-03-13 15:09  4%         ` Neil Horman
2015-03-13 16:26  0%           ` Mcnamara, John
2015-03-13 17:31  4%             ` Neil Horman
2015-03-13 18:28  0%               ` Mcnamara, John
2015-03-13 23:15  5%                 ` Neil Horman
2015-03-23 15:16  0%                   ` Thomas Monjalon
2015-03-23 15:29  0%                     ` Bruce Richardson
2015-03-23 16:00  3%                     ` Neil Horman
2015-03-30 19:52  0%                       ` Thomas Monjalon
2015-03-17 22:43  3% [dpdk-dev] [dpdk-announce] release candidate 2.0.0-rc2 Thomas Monjalon
2015-03-20 14:51  4% [dpdk-dev] tools brainstorming Thomas Monjalon
2015-03-20 15:07  0% ` Butler, Siobhan A
2015-03-20 15:16  3% ` Neil Horman
2015-03-20 15:18  0% ` Simon Kågström
2015-03-23  8:41  0% ` Cao, Waterman
2015-03-23 16:18  0% ` Mcnamara, John
2015-04-08 10:43     ` Butler, Siobhan A
2015-04-13 15:02  2%   ` Neil Horman
2015-03-25 18:11     [dpdk-dev] [PATCH v2 0/7] Hyperv PMD patches Stephen Hemminger
2015-03-25 18:11  1% ` [dpdk-dev] [PATCH v2 3/7] hv: add basic vmbus support Stephen Hemminger
2015-04-03  9:51  6% [dpdk-dev] [PATCHv2] doc: remove duplicate in release nots new features Siobhan Butler
2015-04-03 21:12  4% [dpdk-dev] [dpdk-announce] DPDK 2.0.0 released Thomas Monjalon
2015-04-16 10:38     [dpdk-dev] Beyond DPDK 2.0 O'Driscoll, Tim
2015-04-24  7:47     ` Luke Gorrie
2015-04-24 17:39       ` Jay Rolette
2015-04-24 17:51         ` Matthew Hall
2015-04-25 13:30  3%       ` Marc Sune
2015-04-25 16:08  0%         ` Wiles, Keith
2015-04-26 21:56  0%           ` Neil Horman
     [not found]                 ` <D162FA4E.1DED8%keith.wiles@intel.com>
2015-04-27  9:52  0%               ` Marc Sune
2015-04-27 13:39  0%                 ` Wiles, Keith
2015-04-27 15:34  0%                   ` Marc Sune
2015-04-27 10:29  0%               ` Neil Horman
2015-04-27 13:50  0%                 ` Wiles, Keith
2015-04-20 14:11     [dpdk-dev] [RFC PATCH] ethdev: remove old flow director API Thomas Monjalon
2015-04-20 16:33  3% ` Neil Horman
2015-04-20 16:45  0%   ` Venky Venkatesan
2015-04-27 16:08  0%     ` Thomas Monjalon
2015-04-20 15:41     [dpdk-dev] [PATCH v4 00/12] mbuf: enhancements of mbuf clones Olivier Matz
2015-04-21  9:55     ` [dpdk-dev] [PATCH v5 " Olivier Matz
2015-04-21 11:50  3%   ` Neil Horman
2015-04-20 21:54     [dpdk-dev] [PATCH 0/7] Hyper-V Poll Mode Driver Stephen Hemminger
2015-04-20 21:54  2% ` [dpdk-dev] [PATCH 3/7] hv: add basic vmbus support Stephen Hemminger
2015-04-21 12:23  0%   ` Neil Horman
2015-04-21 17:32     [dpdk-dev] [PATCH v4 0/7] Hyper-V Poll Mode driver Stephen Hemminger
2015-04-21 17:32  1% ` [dpdk-dev] [PATCH v4 3/7] hv: add basic vmbus support Stephen Hemminger
2015-04-22 16:27     [dpdk-dev] [PATCH v3 0/4] bonding corrections and additions Eric Kinzie
2015-04-22 16:27     ` [dpdk-dev] [PATCH v3 3/4] bond mode 4: allow external state machine Eric Kinzie
2015-04-22 19:51  3%   ` Neil Horman
2015-04-23 21:35     [dpdk-dev] [PATCH v7 0/6] Move EAL common functions Ravi Kerur
2015-04-23 21:35     ` [dpdk-dev] [PATCH v7 1/6] Move common functions in eal_thread.c Ravi Kerur
2015-04-24 13:51       ` Neil Horman
2015-04-24 15:14         ` Ravi Kerur
2015-04-24 15:22  3%       ` Neil Horman
2015-04-24 16:45  0%         ` Ravi Kerur
2015-04-24 18:53  0%           ` Neil Horman
2015-04-24 19:21  0%             ` Ravi Kerur
2015-04-24 19:51  4%               ` Neil Horman
2015-04-24 21:24  3%                 ` Ravi Kerur
2015-04-25  1:45  4%                   ` Ravi Kerur
2015-04-25 12:32  0%                     ` Neil Horman
2015-04-25 13:02  0%                       ` Neil Horman
2015-04-26  0:09  0%                         ` Ravi Kerur
2015-04-27 13:44  0%                           ` Neil Horman
2015-04-27 22:39  3%                             ` Ravi Kerur
2015-04-28 19:35  0%                               ` Neil Horman
2015-04-28 23:52  4%                                 ` Ravi Kerur
2015-04-29 10:04  3%                                   ` Neil Horman
2015-04-29 17:47  5%                                     ` Ravi Kerur
2015-04-30 16:00  3%                                       ` Neil Horman
2015-05-01  0:15  4%                                         ` Ravi Kerur
2015-04-24 12:58  8% [dpdk-dev] [PATCH] doc: fixed spellings and typos John McNamara
2015-04-28 16:36     [dpdk-dev] [PATCH 0/3] eal: uio irq fixes and enhancements Stephen Hemminger
2015-05-12 20:02     ` Thomas Monjalon
2015-05-13  8:57  3%   ` Bruce Richardson
2015-05-13  9:32  0%     ` Thomas Monjalon
2015-04-28 23:46  4% [dpdk-dev] [PATCH v8 0/6] Move common functions in EAL Ravi Kerur
2015-04-28 23:46  2% ` [dpdk-dev] [PATCH v8 1/6] Move common functions in eal_thread.c Ravi Kerur
2015-04-28 23:46  1%   ` [dpdk-dev] [PATCH v8 2/6] Move common functions in eal.c Ravi Kerur
2015-04-29 10:14  0% ` [dpdk-dev] [PATCH v8 0/6] Move common functions in EAL Neil Horman
2015-04-29  0:30  2% [dpdk-dev] [PATCH 1/3] pcap: utilize underlying real interface properties Nicolás Pernas Maradei
     [not found]     <CAFb4SLBGcR1EHL5FkJ7r6-7mqWR9UJ7GLD2cm18SJ8AuoWu_Og@mail.gmail.com>
2015-04-29  8:29  0% ` [dpdk-dev] gmake test on freeBSD Bruce Richardson
2015-04-29 17:58  0%   ` Ravi Kerur
2015-04-29 17:04     [dpdk-dev] [PATCH 0/6] rte_sched: patches against 2.o Stephen Hemminger
2015-04-29 17:04     ` [dpdk-dev] [PATCH 4/6] rte_sched: allow reading without clearing Stephen Hemminger
2015-05-11 12:53  3%   ` Thomas Monjalon
2015-05-05  2:32     [dpdk-dev] [PATCH RFC 0/6] support of QinQ stripping and insertion of i40e Helin Zhang
2015-05-26  8:36     ` [dpdk-dev] [PATCH 0/5] support i40e QinQ stripping and insertion Helin Zhang
2015-05-26  8:36       ` [dpdk-dev] [PATCH 2/5] mbuf: use the reserved 16 bits for double vlan Helin Zhang
2015-05-26 14:55  3%     ` Stephen Hemminger
2015-05-26 15:00  0%       ` Zhang, Helin
2015-05-26 15:02  3%       ` Ananyev, Konstantin
2015-05-26 15:35  3%         ` Stephen Hemminger
2015-05-26 15:46  3%           ` Ananyev, Konstantin
2015-05-27  1:07  0%             ` Zhang, Helin
2015-05-05 14:43     [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 " Pablo de Lara
2015-05-12 15:33  4%   ` Neil Horman
2015-05-13 13:52  0%     ` De Lara Guarch, Pablo
2015-05-13 14:20  0%       ` Neil Horman
2015-05-07 15:35     [dpdk-dev] [RFC PATCH 0/2] Move PMDs out of lib directory Bruce Richardson
2015-05-12 17:04     ` [dpdk-dev] [PATCH 00/19] Move PMDs to drivers directory Bruce Richardson
2015-05-12 17:05  1%   ` [dpdk-dev] [PATCH 14/19] virtio: move virtio PMD " Bruce Richardson
2015-05-15 15:56       ` [dpdk-dev] [PATCH v2 00/19] Move PMDs " Bruce Richardson
2015-05-15 15:56  1%     ` [dpdk-dev] [PATCH v2 14/19] virtio: move virtio PMD to drivers/net Bruce Richardson
2015-05-08 16:37  4% [dpdk-dev] [RFC PATCH 0/2] dynamic memzones Sergio Gonzalez Monroy
2015-05-08 16:37  1% ` [dpdk-dev] [RFC PATCH 2/2] eal: memzone allocated by malloc Sergio Gonzalez Monroy
2015-05-12 16:30  0% ` [dpdk-dev] [RFC PATCH 0/2] dynamic memzones Olivier MATZ
2015-05-11 16:29     [dpdk-dev] [RFC PATCHv2 0/2] pktdev as wrapper type Bruce Richardson
2015-05-19 11:31     ` Bruce Richardson
2015-05-20  8:31       ` Thomas Monjalon
2015-05-20 10:05         ` Marc Sune
2015-05-20 10:28           ` Neil Horman
2015-05-20 17:01             ` Marc Sune
2015-05-20 18:47               ` Neil Horman
2015-05-21 12:12  3%             ` Richardson, Bruce
2015-05-11 17:07     [dpdk-dev] [PATCH v3 0/6] rte_sched: cleanups and API changes Stephen Hemminger
2015-05-11 17:07     ` [dpdk-dev] [PATCH 2/6] rte_sched: expand scheduler hierarchy for more VLAN's Stephen Hemminger
2015-05-11 17:20  3%   ` Neil Horman
     [not found]       ` <8edea4c81f624728bb5f0476b680c410@BRMWP-EXMB11.corp.brocade.com>
2015-05-11 17:32  4%     ` Stephen Hemminger
2015-05-11 17:43  4%       ` Neil Horman
2015-05-11 23:45     [dpdk-dev] [RFC PATCH 0/2] ethdev: add port speed capability bitmap Marc Sune
2015-05-11 23:45     ` [dpdk-dev] [RFC PATCH 1/2] Added ETH_SPEED_CAP bitmap in rte_eth_dev_info Marc Sune
2015-05-26 15:03  3%   ` Stephen Hemminger
2015-05-26 15:09  0%     ` Marc Sune
2015-05-14 20:55     [dpdk-dev] Technical Steering Committee (TSC) O'Driscoll, Tim
2015-05-19 14:43     ` Stephen Hemminger
2015-05-19 15:34       ` Neil Horman
2015-05-19 15:45         ` Thomas Monjalon
2015-05-19 17:34           ` Neil Horman
2015-05-19 20:21  3%         ` O'Driscoll, Tim
2015-05-26 12:39     [dpdk-dev] [PATCH v3 00/10] table: added table statistics Maciej Gajdzica
2015-05-26 12:39     ` [dpdk-dev] [PATCH v3 01/10] table: added structure for storing table stats Maciej Gajdzica
2015-05-26 14:57  3%   ` Stephen Hemminger
2015-05-26 21:40  0%     ` Dumitrescu, Cristian
2015-05-26 21:57  0%       ` Stephen Hemminger
2015-05-28 19:32  3%         ` Dumitrescu, Cristian
2015-05-28 21:41  3%           ` Stephen Hemminger
2015-05-27 13:47     [dpdk-dev] [PATCH 1/4] kni: add function to query the name of a kni object Bruce Richardson
2015-05-27 13:52     ` Marc Sune
2015-05-27 13:55       ` Bruce Richardson
     [not found]         ` <5565D195.9040701@bisdn.de>
2015-05-27 15:36  3%       ` Bruce Richardson
2015-05-27 18:10  3% [dpdk-dev] [PATCH v4 0/5] rte_sched: cleanup and API enhancements Stephen Hemminger
2015-05-27 18:10  3% ` [dpdk-dev] [PATCH 4/5] rte_sched: hide structure of port hierarchy Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).