* [dpdk-dev] [PATCH v3 2/8] lib/librte_ether: defind RX/TX lock mode
@ 2016-06-07 6:12 4% ` Zhe Tao
0 siblings, 0 replies; 200+ results
From: Zhe Tao @ 2016-06-07 6:12 UTC (permalink / raw)
To: dev
Cc: wenzhuo.lu, zhe.tao, konstantin.ananyev, bruce.richardson,
jing.d.chen, cunming.liang, jingjing.wu, helin.zhang
Define lock mode for RX/TX queue. Because when resetting
the device we want the resetting thread to get the lock
of the RX/TX queue to make sure the RX/TX is stopped.
Using next ABI macro for this ABI change as it has too
much impact. 7 APIs and 1 global variable are impacted.
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
Signed-off-by: Zhe Tao <zhe.tao@intel.com>
---
lib/librte_ether/rte_ethdev.h | 62 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};
/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */
+#endif
};
/**
+ * The macros for the RX/TX lock mode functions
+ */
+#ifdef RTE_NEXT_ABI
+#define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func
+#endif
+
+/* Add the lock RX/TX function for VF reset */
+#define GENERATE_RX_LOCK(func, nic) \
+uint16_t func ## _lock(void *rx_queue, \
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \
+uint16_t func ## _lock(void *tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
2.1.4
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v2 2/8] lib/librte_ether: defind RX/TX lock mode
@ 2016-06-07 5:45 4% ` Zhe Tao
0 siblings, 0 replies; 200+ results
From: Zhe Tao @ 2016-06-07 5:45 UTC (permalink / raw)
To: dev
Cc: wenzhuo.lu, zhe.tao, konstantin.ananyev, bruce.richardson,
jing.d.chen, cunming.liang, jingjing.wu, helin.zhang
From: Wenzhuo Lu <wenzhuo.lu@intel.com>
Define lock mode for RX/TX queue. Because when resetting
the device we want the resetting thread to get the lock
of the RX/TX queue to make sure the RX/TX is stopped.
Using next ABI macro for this ABI change as it has too
much impact. 7 APIs and 1 global variable are impacted.
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
Signed-off-by: Zhe Tao <zhe.tao@intel.com>
Signed-off-by: zhe.tao <zhe.tao@intel.com>
---
lib/librte_ether/rte_ethdev.h | 62 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};
/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */
+#endif
};
/**
+ * The macros for the RX/TX lock mode functions
+ */
+#ifdef RTE_NEXT_ABI
+#define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func
+#endif
+
+/* Add the lock RX/TX function for VF reset */
+#define GENERATE_RX_LOCK(func, nic) \
+uint16_t func ## _lock(void *rx_queue, \
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \
+uint16_t func ## _lock(void *tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
2.1.4
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability
2016-05-13 6:16 3% ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
@ 2016-06-07 4:05 3% ` Yuanhan Liu
0 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-06-07 4:05 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
v3: - make the "reconnect" feature be default for client mode, as it's
good to handle guest OS restart with less effort.
- fix var not-initilized error pointed out by Rich
NOTE: I created a branch at dpdk.org [0] for more convenient testing:
[0]: git://dpdk.org/next/dpdk-next-virtio for-testing
When the DPDK vhost-user application (such as OVS) restarts (due to
crash, or update), the vhost-user connection between DPDK and QEMU
won't be established automatically again. In another word, the virtio
net is broken.
The reason it doesn't work is that DPDK just acts as server only.
A restart of the server needs a reconnection from the client (QEMU).
However, reconnect from QEMU is not supported from QEMU.
Adding the support of client mode and let DPDK be the client somehow
would resolve above issue a bit easier: a restart of DPDK would naturally
try to connect to the server (QEMU) automatically.
Therefore, this patchset implements the DPDK vhost-user client mode, by
introducing a new arg (flags) for API rte_vhost_driver_register(). And the
client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this
implies an API breakage. However, since this release deals with ABI/API
refactoring, it should not be an issue.
Another interesting thing to make it work is that you not only have
to consider that case the DPDK vhost-user app might restart, but also
have to think that QEMU might restart as well: guest OS sometimes
just reboots. In such case, when the server is down, the client has
to keep reconnecting with the server until the server is back and the
connection is established again. And that's what "reconnect" patch for.
Note that current QEMU doesn't not support a second time connection
from client, thus a restart of DPDK vhost-user will not work. This is
because current QEMU won't be able to detect the disconnect from
restart, thus it will not listen for later connections. Patches [1] have
been sent, it's just not merged yet. But unlike the vhost-user mulitple
queue case, that we have critical depends on QEMU implementation, here
we have no such dependency, therefore, I think it's okay to make DPDK
be ready for the "reconnect" stuff first. (note that I also mentioned
this fact in the release doc).
[1]: http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg01507.html
v2: - added release doc
- do not remove socket file for the client mode
- create one thread ony to handle all reconnects
Thanks.
--yliu
---
Yuanhan Liu (6):
vhost: rename structs for enabling client mode
vhost: add vhost-user client mode
vhost: add reconnect ability
vhost: workaround stale vring base
examples/vhost: add client option
vhost: add pmd client option
doc/guides/rel_notes/release_16_07.rst | 21 ++
drivers/net/vhost/rte_eth_vhost.c | 38 ++-
examples/vhost/main.c | 12 +-
lib/librte_vhost/rte_virtio_net.h | 12 +-
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 8 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 403 ++++++++++++++++++---------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
lib/librte_vhost/virtio-net.c | 9 +
8 files changed, 361 insertions(+), 148 deletions(-)
--
1.9.0
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH v3 18/20] examples/tep_term: adapt to new vhost ABI/API changes
2016-06-07 3:51 9% ` [dpdk-dev] [PATCH v3 00/20] " Yuanhan Liu
` (3 preceding siblings ...)
2016-06-07 3:52 4% ` [dpdk-dev] [PATCH v3 17/20] vhost: reserve few more space for future extension Yuanhan Liu
@ 2016-06-07 3:52 6% ` Yuanhan Liu
4 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-06-07 3:52 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Thomas Monjalon, Panu Matilainen, Traynor Kevin,
Rich Lane, Tetsuya Mukawa, Yuanhan Liu
Adapt to the new vhost ABI/API refactoring changes, to not break the
build. It's a straightforward change: replace "struct virtio_net *dev"
with "int fd". Simple build test only so far.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
tep_term is built on top of vhost switch example; they shared a lot of
code (before the vhost example cleanup). Idealy, we might should move
the vxlan part to vhost example, and introduce an option to enable it.
However, I found that would take more effort, including the effort
of making it co-work with VLAN and VMDq stuff as well as the effort
to not break anything, I found it's better to start simple first:
just do a new ABI/API adaption.
---
examples/tep_termination/main.c | 83 +++++++++++++++++-----------------
examples/tep_termination/main.h | 5 +-
examples/tep_termination/vxlan_setup.c | 20 ++++----
examples/tep_termination/vxlan_setup.h | 6 +--
4 files changed, 57 insertions(+), 57 deletions(-)
diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
index b8297dd..32eb925 100644
--- a/examples/tep_termination/main.c
+++ b/examples/tep_termination/main.c
@@ -566,10 +566,9 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m)
struct rte_mbuf **m_table;
unsigned len, ret = 0;
const uint16_t lcore_id = rte_lcore_id();
- struct virtio_net *dev = vdev->dev;
+ int vid = vdev->vid;
- RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") TX: MAC address is external\n",
- dev->device_fh);
+ RTE_LOG(DEBUG, VHOST_DATA, "(%d) TX: MAC address is external\n", vid);
/* Add packet to the port tx queue */
tx_q = &lcore_tx_queue[lcore_id];
@@ -578,8 +577,8 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m)
tx_q->m_table[len] = m;
len++;
if (enable_stats) {
- dev_statistics[dev->device_fh].tx_total++;
- dev_statistics[dev->device_fh].tx++;
+ dev_statistics[vid].tx_total++;
+ dev_statistics[vid].tx++;
}
if (unlikely(len == MAX_PKT_BURST)) {
@@ -614,7 +613,7 @@ static int
switch_worker(__rte_unused void *arg)
{
struct rte_mempool *mbuf_pool = arg;
- struct virtio_net *dev = NULL;
+ int vid;
struct vhost_dev *vdev = NULL;
struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
struct virtio_net_data_ll *dev_ll;
@@ -688,7 +687,7 @@ switch_worker(__rte_unused void *arg)
while (dev_ll != NULL) {
vdev = dev_ll->vdev;
- dev = vdev->dev;
+ vid = vdev->vid;
if (unlikely(vdev->remove)) {
dev_ll = dev_ll->next;
@@ -709,22 +708,22 @@ switch_worker(__rte_unused void *arg)
* must be less than virtio queue size
*/
if (enable_retry && unlikely(rx_count >
- rte_vring_available_entries(dev, VIRTIO_RXQ))) {
+ rte_vhost_avail_entries(vid, VIRTIO_RXQ))) {
for (retry = 0; retry < burst_rx_retry_num;
retry++) {
rte_delay_us(burst_rx_delay_time);
- if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ))
+ if (rx_count <= rte_vhost_avail_entries(vid, VIRTIO_RXQ))
break;
}
}
- ret_count = overlay_options.rx_handle(dev, pkts_burst, rx_count);
+ ret_count = overlay_options.rx_handle(vid, pkts_burst, rx_count);
if (enable_stats) {
rte_atomic64_add(
- &dev_statistics[dev->device_fh].rx_total_atomic,
+ &dev_statistics[vid].rx_total_atomic,
rx_count);
rte_atomic64_add(
- &dev_statistics[dev->device_fh].rx_atomic, ret_count);
+ &dev_statistics[vid].rx_atomic, ret_count);
}
while (likely(rx_count)) {
rx_count--;
@@ -736,7 +735,7 @@ switch_worker(__rte_unused void *arg)
if (likely(!vdev->remove)) {
/* Handle guest TX*/
- tx_count = rte_vhost_dequeue_burst(dev,
+ tx_count = rte_vhost_dequeue_burst(vid,
VIRTIO_TXQ, mbuf_pool,
pkts_burst, MAX_PKT_BURST);
/* If this is the first received packet we need to learn the MAC */
@@ -913,18 +912,24 @@ init_data_ll(void)
* loop in the rte_pause loop.
*/
static void
-destroy_device(volatile struct virtio_net *dev)
+destroy_device(int vid)
{
struct virtio_net_data_ll *ll_lcore_dev_cur;
struct virtio_net_data_ll *ll_main_dev_cur;
struct virtio_net_data_ll *ll_lcore_dev_last = NULL;
struct virtio_net_data_ll *ll_main_dev_last = NULL;
- struct vhost_dev *vdev;
+ struct vhost_dev *vdev = NULL;
int lcore;
- dev->flags &= ~VIRTIO_DEV_RUNNING;
-
- vdev = (struct vhost_dev *)dev->priv;
+ ll_main_dev_cur = ll_root_used;
+ while (ll_main_dev_cur != NULL) {
+ if (ll_main_dev_cur->vdev->vid == vid) {
+ vdev = ll_main_dev_cur->vdev;
+ break;
+ }
+ }
+ if (!vdev)
+ return;
/* set the remove flag. */
vdev->remove = 1;
@@ -944,8 +949,7 @@ destroy_device(volatile struct virtio_net *dev)
if (ll_lcore_dev_cur == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to find the dev to be destroy.\n",
- dev->device_fh);
+ "(%d) Failed to find the dev to be destroy.\n", vid);
return;
}
@@ -992,8 +996,8 @@ destroy_device(volatile struct virtio_net *dev)
/* Decrement number of device on the lcore. */
lcore_info[vdev->coreid].lcore_ll->device_num--;
- RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed "
- "from data core\n", dev->device_fh);
+ RTE_LOG(INFO, VHOST_DATA, "(%d) Device has been removed "
+ "from data core\n", vid);
rte_free(vdev);
@@ -1004,7 +1008,7 @@ destroy_device(volatile struct virtio_net *dev)
* to the main linked list and the allocated to a specific data core.
*/
static int
-new_device(struct virtio_net *dev)
+new_device(int vid)
{
struct virtio_net_data_ll *ll_dev;
int lcore, core_add = 0;
@@ -1014,18 +1018,16 @@ new_device(struct virtio_net *dev)
vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
if (vdev == NULL) {
RTE_LOG(INFO, VHOST_DATA,
- "(%"PRIu64") Couldn't allocate memory for vhost dev\n",
- dev->device_fh);
+ "(%d) Couldn't allocate memory for vhost dev\n", vid);
return -1;
}
- vdev->dev = dev;
- dev->priv = vdev;
+ vdev->vid = vid;
/* Add device to main ll */
ll_dev = get_data_ll_free_entry(&ll_root_free);
if (ll_dev == NULL) {
- RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in"
+ RTE_LOG(INFO, VHOST_DATA, "(%d) No free entry found in"
" linked list Device limit of %d devices per core"
- " has been reached\n", dev->device_fh, nb_devices);
+ " has been reached\n", vid, nb_devices);
if (vdev->regions_hpa)
rte_free(vdev->regions_hpa);
rte_free(vdev);
@@ -1033,7 +1035,7 @@ new_device(struct virtio_net *dev)
}
ll_dev->vdev = vdev;
add_data_ll_entry(&ll_root_used, ll_dev);
- vdev->rx_q = dev->device_fh;
+ vdev->rx_q = vid;
/* reset ready flag */
vdev->ready = DEVICE_MAC_LEARNING;
@@ -1050,10 +1052,9 @@ new_device(struct virtio_net *dev)
ll_dev = get_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free);
if (ll_dev == NULL) {
RTE_LOG(INFO, VHOST_DATA,
- "(%"PRIu64") Failed to add device to data core\n",
- dev->device_fh);
+ "(%d) Failed to add device to data core\n", vid);
vdev->ready = DEVICE_SAFE_REMOVE;
- destroy_device(dev);
+ destroy_device(vid);
rte_free(vdev->regions_hpa);
rte_free(vdev);
return -1;
@@ -1065,17 +1066,17 @@ new_device(struct virtio_net *dev)
ll_dev);
/* Initialize device stats */
- memset(&dev_statistics[dev->device_fh], 0,
+ memset(&dev_statistics[vid], 0,
sizeof(struct device_statistics));
/* Disable notifications. */
- rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0);
- rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0);
+ rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
+ rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
lcore_info[vdev->coreid].lcore_ll->device_num++;
- dev->flags |= VIRTIO_DEV_RUNNING;
- RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n",
- dev->device_fh, vdev->coreid);
+ RTE_LOG(INFO, VHOST_DATA,
+ "(%d) Device has been added to data core %d\n",
+ vid, vdev->coreid);
return 0;
}
@@ -1113,7 +1114,7 @@ print_stats(void)
dev_ll = ll_root_used;
while (dev_ll != NULL) {
- device_fh = (uint32_t)dev_ll->vdev->dev->device_fh;
+ device_fh = dev_ll->vdev->vid;
tx_total = dev_statistics[device_fh].tx_total;
tx = dev_statistics[device_fh].tx;
tx_dropped = tx_total - tx;
@@ -1257,7 +1258,7 @@ main(int argc, char *argv[])
rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
/* Register CUSE device to handle IOCTLs. */
- ret = rte_vhost_driver_register((char *)&dev_basename);
+ ret = rte_vhost_driver_register(dev_basename, 0);
if (ret != 0)
rte_exit(EXIT_FAILURE, "CUSE device setup failure.\n");
diff --git a/examples/tep_termination/main.h b/examples/tep_termination/main.h
index 4b123ab..c0ea766 100644
--- a/examples/tep_termination/main.h
+++ b/examples/tep_termination/main.h
@@ -71,8 +71,7 @@ struct device_statistics {
* Device linked list structure for data path.
*/
struct vhost_dev {
- /**< Pointer to device created by vhost lib. */
- struct virtio_net *dev;
+ int vid;
/**< Number of memory regions for gpa to hpa translation. */
uint32_t nregions_hpa;
/**< Memory region information for gpa to hpa translation. */
@@ -116,6 +115,6 @@ struct virtio_net_data_ll {
};
uint32_t
-virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count);
+virtio_dev_rx(int vid, struct rte_mbuf **pkts, uint32_t count);
#endif /* _MAIN_H_ */
diff --git a/examples/tep_termination/vxlan_setup.c b/examples/tep_termination/vxlan_setup.c
index 2a48e14..58bc334 100644
--- a/examples/tep_termination/vxlan_setup.c
+++ b/examples/tep_termination/vxlan_setup.c
@@ -244,17 +244,17 @@ vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m)
{
int i, ret;
struct ether_hdr *pkt_hdr;
- struct virtio_net *dev = vdev->dev;
- uint64_t portid = dev->device_fh;
+ int vid = vdev->vid;
+ uint64_t portid = vid;
struct ipv4_hdr *ip;
struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
if (unlikely(portid > VXLAN_N_PORTS)) {
RTE_LOG(INFO, VHOST_DATA,
- "(%"PRIu64") WARNING: Not configuring device,"
+ "(%d) WARNING: Not configuring device,"
"as already have %d ports for VXLAN.",
- dev->device_fh, VXLAN_N_PORTS);
+ vid, VXLAN_N_PORTS);
return -1;
}
@@ -262,9 +262,9 @@ vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m)
pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
if (is_same_ether_addr(&(pkt_hdr->s_addr), &vdev->mac_address)) {
RTE_LOG(INFO, VHOST_DATA,
- "(%"PRIu64") WARNING: This device is using an existing"
+ "(%d) WARNING: This device is using an existing"
" MAC address and has not been registered.\n",
- dev->device_fh);
+ vid);
return -1;
}
@@ -425,7 +425,7 @@ vxlan_tx_pkts(uint8_t port_id, uint16_t queue_id,
/* Check for decapsulation and pass packets directly to VIRTIO device */
int
-vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts_burst,
+vxlan_rx_pkts(int vid, struct rte_mbuf **pkts_burst,
uint32_t rx_count)
{
uint32_t i = 0;
@@ -436,11 +436,11 @@ vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts_burst,
for (i = 0; i < rx_count; i++) {
if (enable_stats) {
rte_atomic64_add(
- &dev_statistics[dev->device_fh].rx_bad_ip_csum,
+ &dev_statistics[vid].rx_bad_ip_csum,
(pkts_burst[i]->ol_flags & PKT_RX_IP_CKSUM_BAD)
!= 0);
rte_atomic64_add(
- &dev_statistics[dev->device_fh].rx_bad_ip_csum,
+ &dev_statistics[vid].rx_bad_ip_csum,
(pkts_burst[i]->ol_flags & PKT_RX_L4_CKSUM_BAD)
!= 0);
}
@@ -452,6 +452,6 @@ vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts_burst,
count++;
}
- ret = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_valid, count);
+ ret = rte_vhost_enqueue_burst(vid, VIRTIO_RXQ, pkts_valid, count);
return ret;
}
diff --git a/examples/tep_termination/vxlan_setup.h b/examples/tep_termination/vxlan_setup.h
index 1846540..8d26461 100644
--- a/examples/tep_termination/vxlan_setup.h
+++ b/examples/tep_termination/vxlan_setup.h
@@ -55,10 +55,10 @@ typedef void (*ol_tunnel_destroy_t)(struct vhost_dev *vdev);
typedef int (*ol_tx_handle_t)(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
-typedef int (*ol_rx_handle_t)(struct virtio_net *dev, struct rte_mbuf **pkts,
+typedef int (*ol_rx_handle_t)(int vid, struct rte_mbuf **pkts,
uint32_t count);
-typedef int (*ol_param_handle)(struct virtio_net *dev);
+typedef int (*ol_param_handle)(int vid);
struct ol_switch_ops {
ol_port_configure_t port_configure;
@@ -82,6 +82,6 @@ int
vxlan_tx_pkts(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
int
-vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count);
+vxlan_rx_pkts(int vid, struct rte_mbuf **pkts, uint32_t count);
#endif /* VXLAN_SETUP_H_ */
--
1.9.0
^ permalink raw reply [relevance 6%]
* [dpdk-dev] [PATCH v3 17/20] vhost: reserve few more space for future extension
2016-06-07 3:51 9% ` [dpdk-dev] [PATCH v3 00/20] " Yuanhan Liu
` (2 preceding siblings ...)
2016-06-07 3:52 13% ` [dpdk-dev] [PATCH v3 13/20] vhost: export vid as the only interface to applications Yuanhan Liu
@ 2016-06-07 3:52 4% ` Yuanhan Liu
2016-06-07 3:52 6% ` [dpdk-dev] [PATCH v3 18/20] examples/tep_term: adapt to new vhost ABI/API changes Yuanhan Liu
4 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-06-07 3:52 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Thomas Monjalon, Panu Matilainen, Traynor Kevin,
Rich Lane, Tetsuya Mukawa, Yuanhan Liu
"virtio_net_device_ops" is the only left open struct that an application
can access, therefore, it's the only place that might introduce potential
ABI break in future for extension.
So, do some reservation for it. 5 should be pretty enough, considering
that we have barely touched it for a long while. Another reason to
choose 5 is for cache alignment: 5 makes the struct 64 bytes for 64 bit
machine.
With this, it's confidence to say that we might be able to be free from
the ABI violation forever.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Tested-by: Rich Lane <rich.lane@bigswitch.com>
Acked-by: Rich Lane <rich.lane@bigswitch.com>
---
lib/librte_vhost/rte_virtio_net.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index fc1d799..bc2b74b 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -66,6 +66,8 @@ struct virtio_net_device_ops {
void (*destroy_device)(int vid); /**< Remove device. */
int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
+
+ void *reserved[5]; /**< Reserved for future extension */
};
/**
--
1.9.0
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v3 13/20] vhost: export vid as the only interface to applications
2016-06-07 3:51 9% ` [dpdk-dev] [PATCH v3 00/20] " Yuanhan Liu
2016-06-07 3:52 7% ` [dpdk-dev] [PATCH v3 11/20] vhost: introduce new API to export queue free entries Yuanhan Liu
2016-06-07 3:52 3% ` [dpdk-dev] [PATCH v3 12/20] vhost: remove dependency on priv field Yuanhan Liu
@ 2016-06-07 3:52 13% ` Yuanhan Liu
2016-06-07 3:52 4% ` [dpdk-dev] [PATCH v3 17/20] vhost: reserve few more space for future extension Yuanhan Liu
2016-06-07 3:52 6% ` [dpdk-dev] [PATCH v3 18/20] examples/tep_term: adapt to new vhost ABI/API changes Yuanhan Liu
4 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-06-07 3:52 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Thomas Monjalon, Panu Matilainen, Traynor Kevin,
Rich Lane, Tetsuya Mukawa, Yuanhan Liu
With all the previous prepare works, we are just one step away from
the final ABI refactoring. That is, to change current API to let them
stick to vid instead of the old virtio_net dev.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Tested-by: Rich Lane <rich.lane@bigswitch.com>
Acked-by: Rich Lane <rich.lane@bigswitch.com>
---
v2: update release note
v3: - bump the ABI version to 3.
- remove "struct virtio_net *dev" field in vhost example
---
doc/guides/rel_notes/release_16_07.rst | 9 ++++-
drivers/net/vhost/rte_eth_vhost.c | 47 +++++++++------------------
examples/vhost/main.c | 25 +++++++-------
examples/vhost/main.h | 2 --
lib/librte_vhost/Makefile | 2 +-
lib/librte_vhost/rte_virtio_net.h | 18 +++++-----
lib/librte_vhost/vhost_rxtx.c | 15 +++++++--
lib/librte_vhost/vhost_user/virtio-net-user.c | 14 ++++----
lib/librte_vhost/virtio-net.c | 17 ++++++----
9 files changed, 77 insertions(+), 72 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 7b602b7..8dbcf8a 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -118,6 +118,10 @@ API Changes
* ``rte_vring_available_entries`` is renamed to ``rte_vhost_avail_entries``.
+* All existing vhost APIs and callbacks with ``virtio_net`` struct pointer
+ as the parameter have been changed due to the ABI refactoring mentioned
+ below: it's replaced by ``int vid``.
+
ABI Changes
-----------
@@ -129,6 +133,9 @@ ABI Changes
* The ``rte_port_source_params`` structure has new fields to support PCAP file.
It was already in release 16.04 with ``RTE_NEXT_ABI`` flag.
+* vhost ABI refactoring has been made: ``virtio_net`` structure is never
+ exported to application any more. Instead, a handle, ``vid``, has been
+ used to represent this structure internally.
Shared Library Versions
-----------------------
@@ -165,7 +172,7 @@ The libraries prepended with a plus sign were incremented in this version.
librte_sched.so.1
librte_table.so.2
librte_timer.so.1
- librte_vhost.so.2
+ + librte_vhost.so.3
Tested Platforms
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index de0f25e..56c1c36 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -71,9 +71,9 @@ static struct ether_addr base_eth_addr = {
};
struct vhost_queue {
+ int vid;
rte_atomic32_t allow_queuing;
rte_atomic32_t while_queuing;
- struct virtio_net *device;
struct pmd_internal *internal;
struct rte_mempool *mb_pool;
uint8_t port;
@@ -139,7 +139,7 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
goto out;
/* Dequeue packets from guest TX queue */
- nb_rx = rte_vhost_dequeue_burst(r->device,
+ nb_rx = rte_vhost_dequeue_burst(r->vid,
r->virtqueue_id, r->mb_pool, bufs, nb_bufs);
r->rx_pkts += nb_rx;
@@ -170,7 +170,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
goto out;
/* Enqueue packets to guest RX queue */
- nb_tx = rte_vhost_enqueue_burst(r->device,
+ nb_tx = rte_vhost_enqueue_burst(r->vid,
r->virtqueue_id, bufs, nb_bufs);
r->tx_pkts += nb_tx;
@@ -222,7 +222,7 @@ find_internal_resource(char *ifname)
}
static int
-new_device(struct virtio_net *dev)
+new_device(int vid)
{
struct rte_eth_dev *eth_dev;
struct internal_list *list;
@@ -234,12 +234,7 @@ new_device(struct virtio_net *dev)
int newnode;
#endif
- if (dev == NULL) {
- RTE_LOG(INFO, PMD, "Invalid argument\n");
- return -1;
- }
-
- rte_vhost_get_ifname(dev->vid, ifname, sizeof(ifname));
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
list = find_internal_resource(ifname);
if (list == NULL) {
RTE_LOG(INFO, PMD, "Invalid device name: %s\n", ifname);
@@ -250,7 +245,7 @@ new_device(struct virtio_net *dev)
internal = eth_dev->data->dev_private;
#ifdef RTE_LIBRTE_VHOST_NUMA
- newnode = rte_vhost_get_numa_node(dev->vid);
+ newnode = rte_vhost_get_numa_node(vid);
if (newnode > 0)
eth_dev->data->numa_node = newnode;
#endif
@@ -259,7 +254,7 @@ new_device(struct virtio_net *dev)
vq = eth_dev->data->rx_queues[i];
if (vq == NULL)
continue;
- vq->device = dev;
+ vq->vid = vid;
vq->internal = internal;
vq->port = eth_dev->data->port_id;
}
@@ -267,13 +262,13 @@ new_device(struct virtio_net *dev)
vq = eth_dev->data->tx_queues[i];
if (vq == NULL)
continue;
- vq->device = dev;
+ vq->vid = vid;
vq->internal = internal;
vq->port = eth_dev->data->port_id;
}
- for (i = 0; i < rte_vhost_get_queue_num(dev->vid) * VIRTIO_QNUM; i++)
- rte_vhost_enable_guest_notification(dev, i, 0);
+ for (i = 0; i < rte_vhost_get_queue_num(vid) * VIRTIO_QNUM; i++)
+ rte_vhost_enable_guest_notification(vid, i, 0);
eth_dev->data->dev_link.link_status = ETH_LINK_UP;
@@ -298,7 +293,7 @@ new_device(struct virtio_net *dev)
}
static void
-destroy_device(volatile struct virtio_net *dev)
+destroy_device(int vid)
{
struct rte_eth_dev *eth_dev;
struct vhost_queue *vq;
@@ -306,12 +301,7 @@ destroy_device(volatile struct virtio_net *dev)
char ifname[PATH_MAX];
unsigned i;
- if (dev == NULL) {
- RTE_LOG(INFO, PMD, "Invalid argument\n");
- return;
- }
-
- rte_vhost_get_ifname(dev->vid, ifname, sizeof(ifname));
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
list = find_internal_resource(ifname);
if (list == NULL) {
RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname);
@@ -343,13 +333,13 @@ destroy_device(volatile struct virtio_net *dev)
vq = eth_dev->data->rx_queues[i];
if (vq == NULL)
continue;
- vq->device = NULL;
+ vq->vid = -1;
}
for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
vq = eth_dev->data->tx_queues[i];
if (vq == NULL)
continue;
- vq->device = NULL;
+ vq->vid = -1;
}
RTE_LOG(INFO, PMD, "Connection closed\n");
@@ -358,19 +348,14 @@ destroy_device(volatile struct virtio_net *dev)
}
static int
-vring_state_changed(struct virtio_net *dev, uint16_t vring, int enable)
+vring_state_changed(int vid, uint16_t vring, int enable)
{
struct rte_vhost_vring_state *state;
struct rte_eth_dev *eth_dev;
struct internal_list *list;
char ifname[PATH_MAX];
- if (dev == NULL) {
- RTE_LOG(ERR, PMD, "Invalid argument\n");
- return -1;
- }
-
- rte_vhost_get_ifname(dev->vid, ifname, sizeof(ifname));
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
list = find_internal_resource(ifname);
if (list == NULL) {
RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname);
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 9b74a16..c854660 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -795,7 +795,7 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
{
uint16_t ret;
- ret = rte_vhost_enqueue_burst(dst_vdev->dev, VIRTIO_RXQ, &m, 1);
+ ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
if (enable_stats) {
rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
@@ -1041,7 +1041,6 @@ static inline void __attribute__((always_inline))
drain_eth_rx(struct vhost_dev *vdev)
{
uint16_t rx_count, enqueue_count;
- struct virtio_net *dev = vdev->dev;
struct rte_mbuf *pkts[MAX_PKT_BURST];
rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
@@ -1055,19 +1054,19 @@ drain_eth_rx(struct vhost_dev *vdev)
* to diminish packet loss.
*/
if (enable_retry &&
- unlikely(rx_count > rte_vhost_avail_entries(dev->vid,
+ unlikely(rx_count > rte_vhost_avail_entries(vdev->vid,
VIRTIO_RXQ))) {
uint32_t retry;
for (retry = 0; retry < burst_rx_retry_num; retry++) {
rte_delay_us(burst_rx_delay_time);
- if (rx_count <= rte_vhost_avail_entries(dev->vid,
+ if (rx_count <= rte_vhost_avail_entries(vdev->vid,
VIRTIO_RXQ))
break;
}
}
- enqueue_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ,
+ enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
pkts, rx_count);
if (enable_stats) {
rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
@@ -1084,7 +1083,7 @@ drain_virtio_tx(struct vhost_dev *vdev)
uint16_t count;
uint16_t i;
- count = rte_vhost_dequeue_burst(vdev->dev, VIRTIO_TXQ, mbuf_pool,
+ count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ, mbuf_pool,
pkts, MAX_PKT_BURST);
/* setup VMDq for the first packet */
@@ -1171,13 +1170,13 @@ switch_worker(void *arg __rte_unused)
* of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
*/
static void
-destroy_device (volatile struct virtio_net *dev)
+destroy_device(int vid)
{
struct vhost_dev *vdev = NULL;
int lcore;
TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
- if (vdev->vid == dev->vid)
+ if (vdev->vid == vid)
break;
}
if (!vdev)
@@ -1221,12 +1220,11 @@ destroy_device (volatile struct virtio_net *dev)
* and the allocated to a specific data core.
*/
static int
-new_device (struct virtio_net *dev)
+new_device(int vid)
{
int lcore, core_add = 0;
uint32_t device_num_min = num_devices;
struct vhost_dev *vdev;
- int vid = dev->vid;
vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
if (vdev == NULL) {
@@ -1235,7 +1233,6 @@ new_device (struct virtio_net *dev)
vid);
return -1;
}
- vdev->dev = dev;
vdev->vid = vid;
TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry);
@@ -1259,8 +1256,8 @@ new_device (struct virtio_net *dev)
lcore_info[vdev->coreid].device_num++;
/* Disable notifications. */
- rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0);
- rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0);
+ rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
+ rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
RTE_LOG(INFO, VHOST_DATA,
"(%d) device has been added to data core %d\n",
@@ -1316,7 +1313,7 @@ print_stats(void)
"RX total: %" PRIu64 "\n"
"RX dropped: %" PRIu64 "\n"
"RX successful: %" PRIu64 "\n",
- vdev->dev->vid,
+ vdev->vid,
tx_total, tx_dropped, tx,
rx_total, rx_dropped, rx);
}
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index e99c436..6bb42e8 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -49,8 +49,6 @@ struct device_statistics {
};
struct vhost_dev {
- /**< Pointer to device created by vhost lib. */
- struct virtio_net *dev;
/**< Number of memory regions for gpa to hpa translation. */
uint32_t nregions_hpa;
/**< Device MAC address (Obtained on first TX packet). */
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index e33ff53..7ef8d34 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -36,7 +36,7 @@ LIB = librte_vhost.a
EXPORT_MAP := rte_vhost_version.map
-LIBABIVER := 2
+LIBABIVER := 3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 0427461..370345e 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -178,10 +178,10 @@ struct virtio_memory {
*
*/
struct virtio_net_device_ops {
- int (*new_device)(struct virtio_net *); /**< Add device. */
- void (*destroy_device)(volatile struct virtio_net *); /**< Remove device. */
+ int (*new_device)(int vid); /**< Add device. */
+ void (*destroy_device)(int vid); /**< Remove device. */
- int (*vring_state_changed)(struct virtio_net *dev, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
+ int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
};
/**
@@ -220,7 +220,7 @@ int rte_vhost_feature_enable(uint64_t feature_mask);
/* Returns currently supported vhost features */
uint64_t rte_vhost_feature_get(void);
-int rte_vhost_enable_guest_notification(struct virtio_net *dev, uint16_t queue_id, int enable);
+int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
/* Register vhost driver. dev_name could be different for multiple instance support. */
int rte_vhost_driver_register(const char *dev_name);
@@ -291,8 +291,8 @@ uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
* be received from the physical port or from another virtual device. A packet
* count is returned to indicate the number of packets that were succesfully
* added to the RX queue.
- * @param dev
- * virtio-net device
+ * @param vid
+ * virtio-net device ID
* @param queue_id
* virtio queue index in mq case
* @param pkts
@@ -302,14 +302,14 @@ uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
* @return
* num of packets enqueued
*/
-uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
/**
* This function gets guest buffers from the virtio device TX virtqueue,
* construct host mbufs, copies guest buffer content to host mbufs and
* store them in pkts to be processed.
- * @param dev
+ * @param vid
* virtio-net device
* @param queue_id
* virtio queue index in mq case
@@ -322,7 +322,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
* @return
* num of packets dequeued
*/
-uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
#endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 8d87508..08cab08 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -46,6 +46,7 @@
#include <rte_arp.h>
#include "vhost-net.h"
+#include "virtio-net.h"
#define MAX_PKT_BURST 32
#define VHOST_LOG_PAGE 4096
@@ -587,9 +588,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
}
uint16_t
-rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
{
+ struct virtio_net *dev = get_device(vid);
+
+ if (!dev)
+ return 0;
+
if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
return virtio_dev_merge_rx(dev, queue_id, pkts, count);
else
@@ -815,9 +821,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
uint16_t
-rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
{
+ struct virtio_net *dev;
struct rte_mbuf *rarp_mbuf = NULL;
struct vhost_virtqueue *vq;
uint32_t desc_indexes[MAX_PKT_BURST];
@@ -826,6 +833,10 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
uint16_t free_entries;
uint16_t avail_idx;
+ dev = get_device(vid);
+ if (!dev)
+ return 0;
+
if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
dev->vid, __func__, queue_id);
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 9385af1..7fa69a7 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -117,7 +117,7 @@ user_set_mem_table(int vid, struct VhostUserMsg *pmsg)
/* Remove from the data plane. */
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
if (dev->mem) {
@@ -279,6 +279,9 @@ user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
struct vhost_vring_file file;
struct virtio_net *dev = get_device(vid);
+ if (!dev)
+ return;
+
file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
file.fd = VIRTIO_INVALID_EVENTFD;
@@ -289,7 +292,7 @@ user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
vhost_set_vring_kick(vid, &file);
if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
- if (notify_ops->new_device(dev) == 0)
+ if (notify_ops->new_device(vid) == 0)
dev->flags |= VIRTIO_DEV_RUNNING;
}
}
@@ -307,7 +310,7 @@ user_get_vring_base(int vid,
return -1;
/* We have to stop the queue (virtio) if it is running. */
if (dev->flags & VIRTIO_DEV_RUNNING)
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
/* Here we are safe to get the last used index */
vhost_get_vring_base(vid, state->index, state);
@@ -342,9 +345,8 @@ user_set_vring_enable(int vid,
"set queue enable: %d to qp idx: %d\n",
enable, state->index);
- if (notify_ops->vring_state_changed) {
- notify_ops->vring_state_changed(dev, state->index, enable);
- }
+ if (notify_ops->vring_state_changed)
+ notify_ops->vring_state_changed(vid, state->index, enable);
dev->virtqueue[state->index]->enabled = enable;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 115eba4..ea216c0 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -296,7 +296,7 @@ vhost_destroy_device(int vid)
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
cleanup_device(dev, 1);
@@ -354,7 +354,7 @@ vhost_reset_owner(int vid)
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
cleanup_device(dev, 0);
@@ -718,13 +718,13 @@ vhost_set_backend(int vid, struct vhost_vring_file *file)
if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
if (dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED &&
dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED) {
- if (notify_ops->new_device(dev) < 0)
+ if (notify_ops->new_device(vid) < 0)
return -1;
dev->flags |= VIRTIO_DEV_RUNNING;
}
} else if (file->fd == VIRTIO_DEV_STOPPED) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
return 0;
@@ -800,9 +800,14 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
}
-int rte_vhost_enable_guest_notification(struct virtio_net *dev,
- uint16_t queue_id, int enable)
+int
+rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
{
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL)
+ return -1;
+
if (enable) {
RTE_LOG(ERR, VHOST_CONFIG,
"guest notification isn't supported.\n");
--
1.9.0
^ permalink raw reply [relevance 13%]
* [dpdk-dev] [PATCH v3 12/20] vhost: remove dependency on priv field
2016-06-07 3:51 9% ` [dpdk-dev] [PATCH v3 00/20] " Yuanhan Liu
2016-06-07 3:52 7% ` [dpdk-dev] [PATCH v3 11/20] vhost: introduce new API to export queue free entries Yuanhan Liu
@ 2016-06-07 3:52 3% ` Yuanhan Liu
2016-06-07 3:52 13% ` [dpdk-dev] [PATCH v3 13/20] vhost: export vid as the only interface to applications Yuanhan Liu
` (2 subsequent siblings)
4 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-06-07 3:52 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Thomas Monjalon, Panu Matilainen, Traynor Kevin,
Rich Lane, Tetsuya Mukawa, Yuanhan Liu
This change could let us avoid the dependency of "virtio_net"
struct, to prepare for the ABI refactoring.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Tested-by: Rich Lane <rich.lane@bigswitch.com>
Acked-by: Rich Lane <rich.lane@bigswitch.com>
---
drivers/net/vhost/rte_eth_vhost.c | 13 +++++++------
examples/vhost/main.c | 10 +++++++---
2 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 6fa9f6b..de0f25e 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -275,7 +275,6 @@ new_device(struct virtio_net *dev)
for (i = 0; i < rte_vhost_get_queue_num(dev->vid) * VIRTIO_QNUM; i++)
rte_vhost_enable_guest_notification(dev, i, 0);
- dev->priv = eth_dev;
eth_dev->data->dev_link.link_status = ETH_LINK_UP;
for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
@@ -303,6 +302,8 @@ destroy_device(volatile struct virtio_net *dev)
{
struct rte_eth_dev *eth_dev;
struct vhost_queue *vq;
+ struct internal_list *list;
+ char ifname[PATH_MAX];
unsigned i;
if (dev == NULL) {
@@ -310,11 +311,13 @@ destroy_device(volatile struct virtio_net *dev)
return;
}
- eth_dev = (struct rte_eth_dev *)dev->priv;
- if (eth_dev == NULL) {
- RTE_LOG(INFO, PMD, "Failed to find a ethdev\n");
+ rte_vhost_get_ifname(dev->vid, ifname, sizeof(ifname));
+ list = find_internal_resource(ifname);
+ if (list == NULL) {
+ RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname);
return;
}
+ eth_dev = list->eth_dev;
/* Wait until rx/tx_pkt_burst stops accessing vhost device */
for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
@@ -336,8 +339,6 @@ destroy_device(volatile struct virtio_net *dev)
eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
- dev->priv = NULL;
-
for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
vq = eth_dev->data->rx_queues[i];
if (vq == NULL)
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 3ae302f..9b74a16 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1173,10 +1173,15 @@ switch_worker(void *arg __rte_unused)
static void
destroy_device (volatile struct virtio_net *dev)
{
- struct vhost_dev *vdev;
+ struct vhost_dev *vdev = NULL;
int lcore;
- vdev = (struct vhost_dev *)dev->priv;
+ TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+ if (vdev->vid == dev->vid)
+ break;
+ }
+ if (!vdev)
+ return;
/*set the remove flag. */
vdev->remove = 1;
while(vdev->ready != DEVICE_SAFE_REMOVE) {
@@ -1231,7 +1236,6 @@ new_device (struct virtio_net *dev)
return -1;
}
vdev->dev = dev;
- dev->priv = vdev;
vdev->vid = vid;
TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry);
--
1.9.0
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH v3 11/20] vhost: introduce new API to export queue free entries
2016-06-07 3:51 9% ` [dpdk-dev] [PATCH v3 00/20] " Yuanhan Liu
@ 2016-06-07 3:52 7% ` Yuanhan Liu
2016-06-07 3:52 3% ` [dpdk-dev] [PATCH v3 12/20] vhost: remove dependency on priv field Yuanhan Liu
` (3 subsequent siblings)
4 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-06-07 3:52 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Thomas Monjalon, Panu Matilainen, Traynor Kevin,
Rich Lane, Tetsuya Mukawa, Yuanhan Liu
The new API rte_vhost_avail_entries() is actually a rename of
rte_vring_available_entries(), with the "vring" to "vhost" name
change to keep the consistency of other vhost exported APIs.
This change could let us avoid the dependency of "virtio_net"
struct, to prepare for the ABI refactoring.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Tested-by: Rich Lane <rich.lane@bigswitch.com>
Acked-by: Rich Lane <rich.lane@bigswitch.com>
---
doc/guides/rel_notes/release_16_07.rst | 2 ++
examples/vhost/main.c | 4 ++--
lib/librte_vhost/rte_vhost_version.map | 1 +
lib/librte_vhost/rte_virtio_net.h | 24 +++++++++++++-----------
lib/librte_vhost/virtio-net.c | 17 +++++++++++++++++
5 files changed, 35 insertions(+), 13 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 30e78d4..7b602b7 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -116,6 +116,8 @@ API Changes
ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+* ``rte_vring_available_entries`` is renamed to ``rte_vhost_avail_entries``.
+
ABI Changes
-----------
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d04f779..3ae302f 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1055,13 +1055,13 @@ drain_eth_rx(struct vhost_dev *vdev)
* to diminish packet loss.
*/
if (enable_retry &&
- unlikely(rx_count > rte_vring_available_entries(dev,
+ unlikely(rx_count > rte_vhost_avail_entries(dev->vid,
VIRTIO_RXQ))) {
uint32_t retry;
for (retry = 0; retry < burst_rx_retry_num; retry++) {
rte_delay_us(burst_rx_delay_time);
- if (rx_count <= rte_vring_available_entries(dev,
+ if (rx_count <= rte_vhost_avail_entries(dev->vid,
VIRTIO_RXQ))
break;
}
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index 4608e3b..93f1188 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -24,6 +24,7 @@ DPDK_2.1 {
DPDK_16.07 {
global:
+ rte_vhost_avail_entries;
rte_vhost_get_ifname;
rte_vhost_get_numa_node;
rte_vhost_get_queue_num;
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 0898e8b..0427461 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -184,17 +184,6 @@ struct virtio_net_device_ops {
int (*vring_state_changed)(struct virtio_net *dev, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
};
-static inline uint16_t __attribute__((always_inline))
-rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
-{
- struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
-
- if (!vq->enabled)
- return 0;
-
- return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
-}
-
/**
* Function to convert guest physical addresses to vhost virtual addresses.
* This is used to convert guest virtio buffer addresses.
@@ -285,6 +274,19 @@ uint32_t rte_vhost_get_queue_num(int vid);
int rte_vhost_get_ifname(int vid, char *buf, size_t len);
/**
+ * Get how many avail entries are left in the queue
+ *
+ * @param vid
+ * virtio-net device ID
+ * @param queue_id
+ * virtio queue index
+ *
+ * @return
+ * num of avail entires left
+ */
+uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
+
+/**
* This function adds buffers to the virtio devices RX virtqueue. Buffers can
* be received from the physical port or from another virtual device. A packet
* count is returned to indicate the number of packets that were succesfully
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 375c9d4..115eba4 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -783,6 +783,23 @@ rte_vhost_get_ifname(int vid, char *buf, size_t len)
return 0;
}
+uint16_t
+rte_vhost_avail_entries(int vid, uint16_t queue_id)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (!dev)
+ return 0;
+
+ vq = dev->virtqueue[queue_id];
+ if (!vq->enabled)
+ return 0;
+
+ return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
+}
+
int rte_vhost_enable_guest_notification(struct virtio_net *dev,
uint16_t queue_id, int enable)
{
--
1.9.0
^ permalink raw reply [relevance 7%]
* [dpdk-dev] [PATCH v3 00/20] vhost ABI/API refactoring
2016-05-13 5:24 8% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Yuanhan Liu
` (4 preceding siblings ...)
2016-05-26 17:04 4% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Rich Lane
@ 2016-06-07 3:51 9% ` Yuanhan Liu
2016-06-07 3:52 7% ` [dpdk-dev] [PATCH v3 11/20] vhost: introduce new API to export queue free entries Yuanhan Liu
` (4 more replies)
5 siblings, 5 replies; 200+ results
From: Yuanhan Liu @ 2016-06-07 3:51 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Thomas Monjalon, Panu Matilainen, Traynor Kevin,
Rich Lane, Tetsuya Mukawa, Yuanhan Liu
v3: - adapted the new vhost ABI/API changes to tep_term example, to make
sure not break build at least.
- bumped the ABI version to 3
NOTE: I created a branch at dpdk.org [0] for more conveinient testing:
[0]: git://dpdk.org/next/dpdk-next-virtio for-testing
Every time we introduce a new feature to vhost, we are likely to break
ABI. Moreover, some cleanups (such as the one from Ilya to remove vec_buf
from vhost_virtqueue struct) also break ABI.
This patch set is meant to resolve above issue ultimately, by hiding
virtio_net structure (as well as few others) internaly, and export the
virtio_net dev strut to applications by a number, vid, like the way
kernel exposes an fd to user space.
Back to the patch set, the first part of this set makes some changes to
vhost example, vhost-pmd and vhost, bit by bit, to remove the dependence
to "virtio_net" struct. And then do the final change to make the current
APIs to adapt to using "vid".
After that, "vrtio_net_device_ops" is the only left open struct that an
application can acces, therefore, it's the only place that might introduce
potential ABI breakage in future for extension. Hence, I made few more
(5) space reservation, to make sure we will not break ABI for a long time,
and hopefuly, forever.
The last bit of this patch set is some cleanups, including the one from
Ilya.
v2: - exported ifname as well to fix a vhost-pmd issue reported by Rich
- separated the big patch that introduces several new APIs into some
small patches.
- updated release note
- updated version.map
Thanks.
--yliu
---
Ilya Maximets (1):
vhost: make buf vector for scatter Rx local
Yuanhan Liu (19):
vhost: declare backend with int type
vhost: set/reset dev flags internally
vhost: declare device fh as int
examples/vhost: make a copy of virtio device id
vhost: rename device fh to vid
vhost: get device by vid only
vhost: move vhost device ctx to cuse
vhost: introduce new API to export numa node
vhost: introduce new API to export number of queues
vhost: introduce new API to export ifname
vhost: introduce new API to export queue free entries
vhost: remove dependency on priv field
vhost: export vid as the only interface to applications
vhost: hide internal structs/macros/functions
vhost: remove unnecessary fields
vhost: remove virtio-net.h
vhost: reserve few more space for future extension
examples/tep_term: adapt to new vhost ABI/API changes
vhost: per device virtio net header len
doc/guides/rel_notes/release_16_07.rst | 11 +-
drivers/net/vhost/rte_eth_vhost.c | 79 ++++-----
examples/tep_termination/main.c | 83 +++++-----
examples/tep_termination/main.h | 5 +-
examples/tep_termination/vxlan_setup.c | 20 +--
examples/tep_termination/vxlan_setup.h | 6 +-
examples/vhost/main.c | 116 +++++++------
examples/vhost/main.h | 3 +-
lib/librte_vhost/Makefile | 2 +-
lib/librte_vhost/rte_vhost_version.map | 10 ++
lib/librte_vhost/rte_virtio_net.h | 223 +++++++------------------
lib/librte_vhost/vhost-net.h | 201 ++++++++++++++++++----
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 83 +++++-----
lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 30 ++--
lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 12 +-
lib/librte_vhost/vhost_rxtx.c | 133 ++++++++-------
lib/librte_vhost/vhost_user/vhost-net-user.c | 53 +++---
lib/librte_vhost/vhost_user/vhost-net-user.h | 2 +
lib/librte_vhost/vhost_user/virtio-net-user.c | 64 +++----
lib/librte_vhost/vhost_user/virtio-net-user.h | 18 +-
lib/librte_vhost/virtio-net.c | 229 +++++++++++++++++---------
lib/librte_vhost/virtio-net.h | 43 -----
22 files changed, 752 insertions(+), 674 deletions(-)
delete mode 100644 lib/librte_vhost/virtio-net.h
--
1.9.0
^ permalink raw reply [relevance 9%]
* Re: [dpdk-dev] RFC: DPDK Long Term Support
2016-06-06 14:21 4% ` Thomas Monjalon
@ 2016-06-06 15:07 5% ` Neil Horman
0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2016-06-06 15:07 UTC (permalink / raw)
To: Thomas Monjalon
Cc: dev, Mcnamara, John, Christian Ehrhardt, Markos Chandras,
Panu Matilainen
On Mon, Jun 06, 2016 at 04:21:11PM +0200, Thomas Monjalon wrote:
> 2016-06-06 09:47, Neil Horman:
> > On Mon, Jun 06, 2016 at 11:27:29AM +0200, Thomas Monjalon wrote:
> > > 2016-06-05 14:15, Neil Horman:
> > > > On Fri, Jun 03, 2016 at 03:07:49PM +0000, Mcnamara, John wrote:
> > > > > Introduction
> > > > > ------------
> > > > >
> > > > > This document sets out a proposal for a DPDK Long Term Support release (LTS).
> > > > >
> > > > > The purpose of the DPDK LTS will be to maintain a stable release of DPDK with
> > > > > backported bug fixes over an extended period of time. This will provide
> > > > > downstream consumers of DPDK with a stable target on which to base
> > > > > applications or packages.
> > > [...]
> > > > I'm not opposed to an LTS release, but it seems to be re-solving the issue of
> > > > ABI breakage. That is to say, there is alreay a process in place for managing
> > > > ABI changes to the DPDK, which is designed to help ensure that:
> > > >
> > > > 1) ABI changes are signaled at least 2 releases early
> > > > 2) ABI changes whenever possible are designed such that backward compatibility
> > > > versions can be encoded at the same time with versioning tags
> > >
> > > Sorry I don't understand your point.
> > > We are talking about two different things:
> > > 1/ ABI care for each new major release
> > > 2/ Minor release for bug fixes
> > >
> > > I think both may exist.
> > >
> > Sure, they can exist together (they being both an ABI backwards compatible HEAD
> > and a set of LTS releases). The point I'm trying to make is that if you do your
> > ABI compatible HEAD well enough, you don't really need an LTS release.
> >
> > Thats not to say that you can't do both, but an LTS release is a significant
> > workload item, especially given the rapid pace of change in HEAD. The longer
> > you maintain an LTS release, the more difficult "minor" bugfixes are to
> > integrate, especially if you wind up skipping any ABI breaking patches. I think
> > its worth calling attention to that as this approach gets considered.
> >
> > > > Those two mechanism are expressly intended to allow application upgrades of DPDK
> > > > libraries without worrying about ABI breakage. While LTS releases are a fine
> > > > approach for some things, they sacrifice upstream efficiency (by creating work
> > > > for backporting teams), while allowing upstream developers more leverage to just
> > > > create ABI breaking changes on a whim, ignoring the existing ABI compatibility
> > > > mechanism
> > >
> > > No it was not stated that upstream developers should ignore ABI compatibility.
> > > Do you mean having a stable branch means ABI preservation for the next major
> > > release is less important?
> > >
> > I never stated that developers should ignore ABI compatibility, I stated that
> > creating an LTS release will make it that much easier for developers to do so.
> >
> > And I think, pragmatically speaking, that is a concern. Given that the
> > existance of an LTS release will make it tempting for developers to simply
> > follow the deprecation process rather than try to create ABI backward compatible
> > paths.
> >
> > Looking at the git history, it seems clear to me that this is already happening.
> > I'm able to find a multitude of instances in which the deprecation process has
> > been followed reasonably well, but I can find no instances in which any efforts
> > have been made for backward compatibility.
>
> There were some examples of backward compatibility in hash and lpm libraries.
>
Ok, apologies, but you still see my point. A relatively minor number of
instances of creating backward compatibility among a much larger set of easier
deprecate and replace instances. Its not really having the effect it was
intended to.
> > > > LTS is a fine process for projects in which API/ABI breakage is either uncommon
> > > > or fairly isolated, but that in my mind doesn't really describe DPDK.
> > >
> > > Yes API/ABI breakages are still common in DPDK.
> > > So it's even more important to have some stable branches.
> >
> > We seem to be comming to different conclusions based on the same evidence. We
> > agree that API/ABI changes continue to be frequent ocurances, but my position is
> > that we already have a process in place to mitigate that, which is simply not
> > being used (i.e. versioning symbols to provide backward compatible paths),
> > whereas you seem to be asserting that an LTS model will allow for ABI stabiilty
> > and bug fixes.
> >
> > While I don't disagree with that statement (LTS does provide both of those
> > things if the maintainer does it properly), I'm forced to ask the question,
> > before we solve this problem in a new way,
>
> The following questions are interesting but please don't assume the stable
> branch address the same issue as ABI compat.
Given your perspecive on what LTS/stable branches should be, I absolutely agree,
but thats not what John M. Was proposing. from his initial proposal, he
specifically called out which changes were acceptable:
What changes should not be backported
-------------------------------------
* API or ABI breaking changes.
* Features should not be backported. Unless:
* There is a justifiable use case (for example a new PMD).
* The change is non-invasive.
* The work of preparing the backport is done by the proposer.
* There is support within the community.
The above list in my mind amounts to "Any change that there is sufficient
consumer demand for and doesn't present too much validation difficulty, except
ABI or API breaking changes".
While theres nothing really wrong with that, if we want to go down that path,
that really says to me that this is a way around ABI compatibilty problems,
because the inclusion of any other fix, given sufficient demand, can be
potentially justified. So, in Johns proposal, a stable branch / LTS release is
going to effectively be a way to allow consumers to stay on one API/ABI level
for longer period of time before having to make a major change catch up to the
HEAD release.
> In each major release, we add some new bugs because of new features, even
> if the ABI is kept.
> In a minor stable release there are only some bug fixes. So the only way
> to have a "bug free" version in a stable environment, is to do some
> maintenance in a stable branch.
>
Again, I agree with your perspecitive on what a stable branch should be, but
thats not what John was proposing, and thats what I'm raising a concern about.
> > lets ask why the existing way isn't
> > being used. Do developers just not care about backwards compatibility? Is the
> > process to hard? Something else? I really don't like the idea of abandoning
> > what currently exists to replace it with something else, without first
> > addressing why what we have isn't working.
>
> We can address both. But I strongly think the ABI compat is another topic.
I agree it can be a separate topic, but given the proposal here, it seems like
an awfully tempting way to avoid having to address it. Not saying its a bad
plan, mind you, just that ABI compatibility is something that does need to be
kept at the forefront, because it still changes often (more often than it has
to).
Neil
>
^ permalink raw reply [relevance 5%]
* Re: [dpdk-dev] RFC: DPDK Long Term Support
2016-06-06 13:47 5% ` Neil Horman
@ 2016-06-06 14:21 4% ` Thomas Monjalon
2016-06-06 15:07 5% ` Neil Horman
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-06-06 14:21 UTC (permalink / raw)
To: Neil Horman
Cc: dev, Mcnamara, John, Christian Ehrhardt, Markos Chandras,
Panu Matilainen
2016-06-06 09:47, Neil Horman:
> On Mon, Jun 06, 2016 at 11:27:29AM +0200, Thomas Monjalon wrote:
> > 2016-06-05 14:15, Neil Horman:
> > > On Fri, Jun 03, 2016 at 03:07:49PM +0000, Mcnamara, John wrote:
> > > > Introduction
> > > > ------------
> > > >
> > > > This document sets out a proposal for a DPDK Long Term Support release (LTS).
> > > >
> > > > The purpose of the DPDK LTS will be to maintain a stable release of DPDK with
> > > > backported bug fixes over an extended period of time. This will provide
> > > > downstream consumers of DPDK with a stable target on which to base
> > > > applications or packages.
> > [...]
> > > I'm not opposed to an LTS release, but it seems to be re-solving the issue of
> > > ABI breakage. That is to say, there is alreay a process in place for managing
> > > ABI changes to the DPDK, which is designed to help ensure that:
> > >
> > > 1) ABI changes are signaled at least 2 releases early
> > > 2) ABI changes whenever possible are designed such that backward compatibility
> > > versions can be encoded at the same time with versioning tags
> >
> > Sorry I don't understand your point.
> > We are talking about two different things:
> > 1/ ABI care for each new major release
> > 2/ Minor release for bug fixes
> >
> > I think both may exist.
> >
> Sure, they can exist together (they being both an ABI backwards compatible HEAD
> and a set of LTS releases). The point I'm trying to make is that if you do your
> ABI compatible HEAD well enough, you don't really need an LTS release.
>
> Thats not to say that you can't do both, but an LTS release is a significant
> workload item, especially given the rapid pace of change in HEAD. The longer
> you maintain an LTS release, the more difficult "minor" bugfixes are to
> integrate, especially if you wind up skipping any ABI breaking patches. I think
> its worth calling attention to that as this approach gets considered.
>
> > > Those two mechanism are expressly intended to allow application upgrades of DPDK
> > > libraries without worrying about ABI breakage. While LTS releases are a fine
> > > approach for some things, they sacrifice upstream efficiency (by creating work
> > > for backporting teams), while allowing upstream developers more leverage to just
> > > create ABI breaking changes on a whim, ignoring the existing ABI compatibility
> > > mechanism
> >
> > No it was not stated that upstream developers should ignore ABI compatibility.
> > Do you mean having a stable branch means ABI preservation for the next major
> > release is less important?
> >
> I never stated that developers should ignore ABI compatibility, I stated that
> creating an LTS release will make it that much easier for developers to do so.
>
> And I think, pragmatically speaking, that is a concern. Given that the
> existance of an LTS release will make it tempting for developers to simply
> follow the deprecation process rather than try to create ABI backward compatible
> paths.
>
> Looking at the git history, it seems clear to me that this is already happening.
> I'm able to find a multitude of instances in which the deprecation process has
> been followed reasonably well, but I can find no instances in which any efforts
> have been made for backward compatibility.
There were some examples of backward compatibility in hash and lpm libraries.
> > > LTS is a fine process for projects in which API/ABI breakage is either uncommon
> > > or fairly isolated, but that in my mind doesn't really describe DPDK.
> >
> > Yes API/ABI breakages are still common in DPDK.
> > So it's even more important to have some stable branches.
>
> We seem to be comming to different conclusions based on the same evidence. We
> agree that API/ABI changes continue to be frequent ocurances, but my position is
> that we already have a process in place to mitigate that, which is simply not
> being used (i.e. versioning symbols to provide backward compatible paths),
> whereas you seem to be asserting that an LTS model will allow for ABI stabiilty
> and bug fixes.
>
> While I don't disagree with that statement (LTS does provide both of those
> things if the maintainer does it properly), I'm forced to ask the question,
> before we solve this problem in a new way,
The following questions are interesting but please don't assume the stable
branch address the same issue as ABI compat.
In each major release, we add some new bugs because of new features, even
if the ABI is kept.
In a minor stable release there are only some bug fixes. So the only way
to have a "bug free" version in a stable environment, is to do some
maintenance in a stable branch.
> lets ask why the existing way isn't
> being used. Do developers just not care about backwards compatibility? Is the
> process to hard? Something else? I really don't like the idea of abandoning
> what currently exists to replace it with something else, without first
> addressing why what we have isn't working.
We can address both. But I strongly think the ABI compat is another topic.
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] RFC: DPDK Long Term Support
2016-06-06 9:27 5% ` Thomas Monjalon
@ 2016-06-06 13:47 5% ` Neil Horman
2016-06-06 14:21 4% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2016-06-06 13:47 UTC (permalink / raw)
To: Thomas Monjalon
Cc: dev, Mcnamara, John, Christian Ehrhardt, Markos Chandras,
Panu Matilainen
On Mon, Jun 06, 2016 at 11:27:29AM +0200, Thomas Monjalon wrote:
> 2016-06-05 14:15, Neil Horman:
> > On Fri, Jun 03, 2016 at 03:07:49PM +0000, Mcnamara, John wrote:
> > > Introduction
> > > ------------
> > >
> > > This document sets out a proposal for a DPDK Long Term Support release (LTS).
> > >
> > > The purpose of the DPDK LTS will be to maintain a stable release of DPDK with
> > > backported bug fixes over an extended period of time. This will provide
> > > downstream consumers of DPDK with a stable target on which to base
> > > applications or packages.
> [...]
> > I'm not opposed to an LTS release, but it seems to be re-solving the issue of
> > ABI breakage. That is to say, there is alreay a process in place for managing
> > ABI changes to the DPDK, which is designed to help ensure that:
> >
> > 1) ABI changes are signaled at least 2 releases early
> > 2) ABI changes whenever possible are designed such that backward compatibility
> > versions can be encoded at the same time with versioning tags
>
> Sorry I don't understand your point.
> We are talking about two different things:
> 1/ ABI care for each new major release
> 2/ Minor release for bug fixes
>
> I think both may exist.
>
Sure, they can exist together (they being both an ABI backwards compatible HEAD
and a set of LTS releases). The point I'm trying to make is that if you do your
ABI compatible HEAD well enough, you don't really need an LTS release.
Thats not to say that you can't do both, but an LTS release is a significant
workload item, especially given the rapid pace of change in HEAD. The longer
you maintain an LTS release, the more difficult "minor" bugfixes are to
integrate, especially if you wind up skipping any ABI breaking patches. I think
its worth calling attention to that as this approach gets considered.
> > Those two mechanism are expressly intended to allow application upgrades of DPDK
> > libraries without worrying about ABI breakage. While LTS releases are a fine
> > approach for some things, they sacrifice upstream efficiency (by creating work
> > for backporting teams), while allowing upstream developers more leverage to just
> > create ABI breaking changes on a whim, ignoring the existing ABI compatibility
> > mechanism
>
> No it was not stated that upstream developers should ignore ABI compatibility.
> Do you mean having a stable branch means ABI preservation for the next major
> release is less important?
>
I never stated that developers should ignore ABI compatibility, I stated that
creating an LTS release will make it that much easier for developers to do so.
And I think, pragmatically speaking, that is a concern. Given that the
existance of an LTS release will make it tempting for developers to simply
follow the deprecation process rather than try to create ABI backward compatible
paths.
Looking at the git history, it seems clear to me that this is already happening.
I'm able to find a multitude of instances in which the deprecation process has
been followed reasonably well, but I can find no instances in which any efforts
have been made for backward compatibility.
> > LTS is a fine process for projects in which API/ABI breakage is either uncommon
> > or fairly isolated, but that in my mind doesn't really describe DPDK.
>
> Yes API/ABI breakages are still common in DPDK.
> So it's even more important to have some stable branches.
We seem to be comming to different conclusions based on the same evidence. We
agree that API/ABI changes continue to be frequent ocurances, but my position is
that we already have a process in place to mitigate that, which is simply not
being used (i.e. versioning symbols to provide backward compatible paths),
whereas you seem to be asserting that an LTS model will allow for ABI stabiilty
and bug fixes.
While I don't disagree with that statement (LTS does provide both of those
things if the maintainer does it properly), I'm forced to ask the question,
before we solve this problem in a new way, lets ask why the existing way isn't
being used. Do developers just not care about backwards compatibility? Is the
process to hard? Something else? I really don't like the idea of abandoning
what currently exists to replace it with something else, without first
addressing why what we have isn't working.
Neil
>
^ permalink raw reply [relevance 5%]
* Re: [dpdk-dev] RFC: DPDK Long Term Support
2016-06-03 16:05 0% ` Thomas Monjalon
@ 2016-06-06 11:49 0% ` Yuanhan Liu
0 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-06-06 11:49 UTC (permalink / raw)
To: Thomas Monjalon
Cc: Mcnamara, John, dev, Christian Ehrhardt, Markos Chandras,
Panu Matilainen
On Fri, Jun 03, 2016 at 06:05:15PM +0200, Thomas Monjalon wrote:
> Hi,
>
> 2016-06-03 15:07, Mcnamara, John:
> > Introduction
> > ------------
> >
> > This document sets out a proposal for a DPDK Long Term Support release (LTS).
>
> In general, LTS refer to a longer maintenance than than regular one.
> Here we are talking to doing some maintenance as stable releases first.
> Currently we have no maintenance at all.
> So I suggest to differentiate "stable branches" and "LTS" for some stable branches.
>
> > The purpose of the DPDK LTS will be to maintain a stable release of DPDK with
> > backported bug fixes over an extended period of time. This will provide
> > downstream consumers of DPDK with a stable target on which to base
> > applications or packages.
> [...]
> > The proposed maintainer for the LTS is Yuanhan Liu
> > <yuanhan.liu@linux.intel.com>.
>
> I wonder if Yuanhan is OK to maintain every stable releases which could be
> requested/needed?
I'm Okay, since I assume the maintain effort would be small: mainly
for picking acked and tested *bug fix* patches.
> Or should we have other committers for the stable releases
> that Yuanhan would not want to maintain himself?
> The Linux model is to let people declare themselves when they want to maintain
> a stable branch.
I have no object though, if somebody volunteer him as a stable branch
maintainer.
>
> > The proposed duration of the LTS support is 2 years.
>
> I think we should discuss the support duration for each release separately.
>
> > There will only be one LTS branch being maintained at any time. At the end of
> > the 2 year cycle the maintenance on the previous LTS will be wound down.
>
> Seems a bit too restrictive.
> Currently, there is no maintenance at all because nobody was volunteer.
> If Yuanhan is volunteer for a stable branch every 2 years, fine.
> If someone else is volunteer for other branches, why not let him do it?
>
> > The proposed initial LTS version will be DPDK 16.07. The next versions, based
> > on a 2 year cycle, will be DPDK 18.08, 20.08, etc.
>
> Let's do a first run with 16.07 and see later what we want to do next.
> How long time a stable branch must be announced before its initial release?
>
> > What changes should be backported
> > ---------------------------------
> >
> > * Bug fixes that don't break the ABI.
>
> And API?
> And behaviour (if not clearly documented in the API)?
Agreed, we should not include those changes, either.
>
> [...]
> > Developers submitting fixes to the mainline should also CC the maintainer so
> > that they can evaluate the patch. A <stable@dpdk.org> email address could be
> > provided for this so that it can be included as a CC in the commit messages
> > and documented in the Code Contribution Guidelines.
>
> Why?
> We must avoid putting too much restrictions on the contributors.
This is actually requested by me, in a behaviour similar to Linux
kernel community takes. Here is the thing, the developer normally
knows better than a generic maintainer (assume it's me) that a patch
applies to stable branch or not. This is especially true for DPDK,
since we ask the developer to note down the bug commit by adding a
fix line.
It wouldn't be a burden for an active contributor, as CCing to related
people (including right mailing list) is a good habit they already
have. For some one-time contributors, it's okay that they don't know
and follow it.
In such case, I guess we need the help from the related subsystem
maintainer: if it's a good bug fix that applies to stable branch,
and the contributor forgot to make a explicit cc to stable mailing
list, the subsystem maintainer should forward or ask him to forward
to stable mailing list.
The reason I'm asking is that as a generic maintainer, there is
simply no such energy to keep an eye on all patches: you have to
be aware of that we have thoughts of email per month from dpdk dev
mailing list: the number of last month is 1808.
Doing so would allow one person maintain several stable tree
be possible.
For more info, you could check linux/Documentation/stable_kernel_rules.txt.
>
> > Intel will provide validation engineers to test the LTS branch/tree. Tested
> > releases can be marked using a Git tag with an incremented revision number. For
> > example: 16.07.00_LTS -> 16.07.01_LTS. The testing cadence should be quarterly
> > but will be best effort only and dependent on available resources.
>
> Thanks
> It must not be just a tag. There should be an announce and a tarball ready
> to download.
Agreed.
--yliu
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] RFC: DPDK Long Term Support
2016-06-05 18:15 5% ` Neil Horman
@ 2016-06-06 9:27 5% ` Thomas Monjalon
2016-06-06 13:47 5% ` Neil Horman
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-06-06 9:27 UTC (permalink / raw)
To: Neil Horman
Cc: dev, Mcnamara, John, Christian Ehrhardt, Markos Chandras,
Panu Matilainen
2016-06-05 14:15, Neil Horman:
> On Fri, Jun 03, 2016 at 03:07:49PM +0000, Mcnamara, John wrote:
> > Introduction
> > ------------
> >
> > This document sets out a proposal for a DPDK Long Term Support release (LTS).
> >
> > The purpose of the DPDK LTS will be to maintain a stable release of DPDK with
> > backported bug fixes over an extended period of time. This will provide
> > downstream consumers of DPDK with a stable target on which to base
> > applications or packages.
[...]
> I'm not opposed to an LTS release, but it seems to be re-solving the issue of
> ABI breakage. That is to say, there is alreay a process in place for managing
> ABI changes to the DPDK, which is designed to help ensure that:
>
> 1) ABI changes are signaled at least 2 releases early
> 2) ABI changes whenever possible are designed such that backward compatibility
> versions can be encoded at the same time with versioning tags
Sorry I don't understand your point.
We are talking about two different things:
1/ ABI care for each new major release
2/ Minor release for bug fixes
I think both may exist.
> Those two mechanism are expressly intended to allow application upgrades of DPDK
> libraries without worrying about ABI breakage. While LTS releases are a fine
> approach for some things, they sacrifice upstream efficiency (by creating work
> for backporting teams), while allowing upstream developers more leverage to just
> create ABI breaking changes on a whim, ignoring the existing ABI compatibility
> mechanism
No it was not stated that upstream developers should ignore ABI compatibility.
Do you mean having a stable branch means ABI preservation for the next major
release is less important?
> LTS is a fine process for projects in which API/ABI breakage is either uncommon
> or fairly isolated, but that in my mind doesn't really describe DPDK.
Yes API/ABI breakages are still common in DPDK.
So it's even more important to have some stable branches.
^ permalink raw reply [relevance 5%]
* Re: [dpdk-dev] RFC: DPDK Long Term Support
2016-06-03 15:07 4% [dpdk-dev] RFC: DPDK Long Term Support Mcnamara, John
@ 2016-06-03 16:05 0% ` Thomas Monjalon
2016-06-06 11:49 0% ` Yuanhan Liu
2016-06-03 18:17 3% ` Matthew Hall
2016-06-05 18:15 5% ` Neil Horman
2 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-06-03 16:05 UTC (permalink / raw)
To: Mcnamara, John; +Cc: dev, Christian Ehrhardt, Markos Chandras, Panu Matilainen
Hi,
2016-06-03 15:07, Mcnamara, John:
> Introduction
> ------------
>
> This document sets out a proposal for a DPDK Long Term Support release (LTS).
In general, LTS refer to a longer maintenance than than regular one.
Here we are talking to doing some maintenance as stable releases first.
Currently we have no maintenance at all.
So I suggest to differentiate "stable branches" and "LTS" for some stable branches.
> The purpose of the DPDK LTS will be to maintain a stable release of DPDK with
> backported bug fixes over an extended period of time. This will provide
> downstream consumers of DPDK with a stable target on which to base
> applications or packages.
[...]
> The proposed maintainer for the LTS is Yuanhan Liu
> <yuanhan.liu@linux.intel.com>.
I wonder if Yuanhan is OK to maintain every stable releases which could be
requested/needed? Or should we have other committers for the stable releases
that Yuanhan would not want to maintain himself?
The Linux model is to let people declare themselves when they want to maintain
a stable branch.
> The proposed duration of the LTS support is 2 years.
I think we should discuss the support duration for each release separately.
> There will only be one LTS branch being maintained at any time. At the end of
> the 2 year cycle the maintenance on the previous LTS will be wound down.
Seems a bit too restrictive.
Currently, there is no maintenance at all because nobody was volunteer.
If Yuanhan is volunteer for a stable branch every 2 years, fine.
If someone else is volunteer for other branches, why not let him do it?
> The proposed initial LTS version will be DPDK 16.07. The next versions, based
> on a 2 year cycle, will be DPDK 18.08, 20.08, etc.
Let's do a first run with 16.07 and see later what we want to do next.
How long time a stable branch must be announced before its initial release?
> What changes should be backported
> ---------------------------------
>
> * Bug fixes that don't break the ABI.
And API?
And behaviour (if not clearly documented in the API)?
[...]
> Developers submitting fixes to the mainline should also CC the maintainer so
> that they can evaluate the patch. A <stable@dpdk.org> email address could be
> provided for this so that it can be included as a CC in the commit messages
> and documented in the Code Contribution Guidelines.
Why?
We must avoid putting too much restrictions on the contributors.
> Intel will provide validation engineers to test the LTS branch/tree. Tested
> releases can be marked using a Git tag with an incremented revision number. For
> example: 16.07.00_LTS -> 16.07.01_LTS. The testing cadence should be quarterly
> but will be best effort only and dependent on available resources.
Thanks
It must not be just a tag. There should be an announce and a tarball ready
to download.
[...]
> In order to reduce the testing effort the number of OSes which will be
> officially validated should be as small as possible. The proposal is that the
> following long term OSes are used for validation:
>
> (OSV reps please confirm.)
>
> * Ubuntu 16.04 LTS
> * RHEL 7.3
> * SuSE 11 SP4 or 12
> * FreeBSD 10.3
I'm sure there will be more validation on the field or from contributors.
[...]
> The LTS guidelines shall be reviewed after 1 year to adjust for any experiences
> from LTS maintainership.
Yes seems very reasonnable.
Thanks
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH 2/8] lib/librte_ether: defind RX/TX lock mode
@ 2016-06-06 5:40 4% ` Wenzhuo Lu
0 siblings, 0 replies; 200+ results
From: Wenzhuo Lu @ 2016-06-06 5:40 UTC (permalink / raw)
To: dev; +Cc: Wenzhuo Lu, Zhe Tao
Define lock mode for RX/TX queue. Because when resetting
the device we want the resetting thread to get the lock
of the RX/TX queue to make sure the RX/TX is stopped.
Using next ABI macro for this ABI change as it has too
much impact. 7 APIs and 1 global variable are impacted.
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
Signed-off-by: Zhe Tao <zhe.tao@intel.com>
---
lib/librte_ether/rte_ethdev.h | 62 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};
/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */
+#endif
};
/**
+ * The macros for the RX/TX lock mode functions
+ */
+#ifdef RTE_NEXT_ABI
+#define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func
+#endif
+
+/* Add the lock RX/TX function for VF reset */
+#define GENERATE_RX_LOCK(func, nic) \
+uint16_t func ## _lock(void *rx_queue, \
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \
+uint16_t func ## _lock(void *tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
1.9.3
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] RFC: DPDK Long Term Support
2016-06-03 15:07 4% [dpdk-dev] RFC: DPDK Long Term Support Mcnamara, John
2016-06-03 16:05 0% ` Thomas Monjalon
2016-06-03 18:17 3% ` Matthew Hall
@ 2016-06-05 18:15 5% ` Neil Horman
2016-06-06 9:27 5% ` Thomas Monjalon
2 siblings, 1 reply; 200+ results
From: Neil Horman @ 2016-06-05 18:15 UTC (permalink / raw)
To: Mcnamara, John; +Cc: dev, Christian Ehrhardt, Markos Chandras, Panu Matilainen
On Fri, Jun 03, 2016 at 03:07:49PM +0000, Mcnamara, John wrote:
> Introduction
> ------------
>
> This document sets out a proposal for a DPDK Long Term Support release (LTS).
>
> The purpose of the DPDK LTS will be to maintain a stable release of DPDK with
> backported bug fixes over an extended period of time. This will provide
> downstream consumers of DPDK with a stable target on which to base
> applications or packages.
>
> As with previous DPDK guidelines this proposal is open for discussion within
> the community. The consensus view will be included in the DPDK documentation
> as a guideline.
>
>
> LTS Maintainer
> --------------
>
> The proposed maintainer for the LTS is Yuanhan Liu
> <yuanhan.liu@linux.intel.com>.
>
>
> LTS Duration
> ------------
>
> The proposed duration of the LTS support is 2 years.
>
> There will only be one LTS branch being maintained at any time. At the end of
> the 2 year cycle the maintenance on the previous LTS will be wound down.
>
>
> LTS Version
> ------------
>
> The proposed initial LTS version will be DPDK 16.07. The next versions, based
> on a 2 year cycle, will be DPDK 18.08, 20.08, etc.
>
>
> What changes should be backported
> ---------------------------------
>
> * Bug fixes that don't break the ABI.
>
>
> What changes should not be backported
> -------------------------------------
>
> * API or ABI breaking changes.
>
> * Features should not be backported. Unless:
>
> * There is a justifiable use case (for example a new PMD).
> * The change is non-invasive.
> * The work of preparing the backport is done by the proposer.
> * There is support within the community.
>
>
> Role of the maintainer
> ----------------------
>
> * The maintainer will evaluate fixes to the DPDK master submitted by the
> fixing developer and apply them to the LTS branch/tree.
>
> * The maintainer will evaluate backported patches from downstream consumers
> and apply them to the LTS branch/tree.
>
> * The maintainer will not backport non-trivial fixes without assistance from
> the downstream consumers or requester.
>
>
> Role of the downstream consumers
> --------------------------------
>
> Developers submitting fixes to the mainline should also CC the maintainer so
> that they can evaluate the patch. A <stable@dpdk.org> email address could be
> provided for this so that it can be included as a CC in the commit messages
> and documented in the Code Contribution Guidelines.
>
> The downstream consumers (OSVs and DPDK dependent application and framework
> developers) should identify issues in the field that have been fixed in the
> mainline release and report them to the maintainer. They should, ideally,
> assist with backporting any required fixes.
>
>
> Testing
> -------
>
> Intel will provide validation engineers to test the LTS branch/tree. Tested
> releases can be marked using a Git tag with an incremented revision number. For
> example: 16.07.00_LTS -> 16.07.01_LTS. The testing cadence should be quarterly
> but will be best effort only and dependent on available resources.
>
>
> Validated OSes
> --------------
>
> In order to reduce the testing effort the number of OSes which will be
> officially validated should be as small as possible. The proposal is that the
> following long term OSes are used for validation:
>
> (OSV reps please confirm.)
>
> * Ubuntu 16.04 LTS
> * RHEL 7.3
> * SuSE 11 SP4 or 12
> * FreeBSD 10.3
>
> Fixes for newer OSes, kernels (and associated KNI fixes), and newer GCC/Clang
> versions can be backported but the validation effort will be limited to the
> above platforms.
>
>
> Release Notes
> -------------
>
> The LTS release notes should be updated to include a section with backported
> fixes. Patches for backporting should include additions to the release notes
> like patches to the mainline branch.
>
>
> LTS Review
> ----------
>
> The LTS guidelines shall be reviewed after 1 year to adjust for any experiences
> from LTS maintainership.
>
>
>
>
>
>
>
I'm not opposed to an LTS release, but it seems to be re-solving the issue of
ABI breakage. That is to say, there is alreay a process in place for managing
ABI changes to the DPDK, which is designed to help ensure that:
1) ABI changes are signaled at least 2 releases early
2) ABI changes whenever possible are designed such that backward compatibility
versions can be encoded at the same time with versioning tags
Those two mechanism are expressly intended to allow application upgrades of DPDK
libraries without worrying about ABI breakage. While LTS releases are a fine
approach for some things, they sacrifice upstream efficiency (by creating work
for backporting teams), while allowing upstream developers more leverage to just
create ABI breaking changes on a whim, ignoring the existing ABI compatibility
mechanism
LTS is a fine process for projects in which API/ABI breakage is either uncommon
or fairly isolated, but that in my mind doesn't really describe DPDK.
Neil
^ permalink raw reply [relevance 5%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 19:23 0% ` Wiles, Keith
@ 2016-06-03 19:28 0% ` Arnon Warshavsky
0 siblings, 0 replies; 200+ results
From: Arnon Warshavsky @ 2016-06-03 19:28 UTC (permalink / raw)
To: Wiles, Keith
Cc: Neil Horman, Panu Matilainen, Richardson, Bruce, Thomas Monjalon,
Yuanhan Liu, dev, Tan, Jianfeng, Stephen Hemminger,
Christian Ehrhardt, Olivier Matz
I
On Fri, Jun 3, 2016 at 10:23 PM, Wiles, Keith <keith.wiles@intel.com> wrote:
>
> On 6/3/16, 2:18 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>
> >On Fri, Jun 03, 2016 at 07:07:50PM +0000, Wiles, Keith wrote:
> >> On 6/3/16, 2:00 PM, "dev on behalf of Wiles, Keith" <
> dev-bounces@dpdk.org on behalf of keith.wiles@intel.com> wrote:
> >>
> >> >On 6/3/16, 1:52 PM, "Arnon Warshavsky" <arnon@qwilt.com<mailto:
> arnon@qwilt.com>> wrote:
> >> >
> >> >
> >> >
> >> >On Fri, Jun 3, 2016 at 9:38 PM, Neil Horman <nhorman@tuxdriver.com
> <mailto:nhorman@tuxdriver.com>> wrote:
> >> >On Fri, Jun 03, 2016 at 06:29:13PM +0000, Wiles, Keith wrote:
> >> >>
> >> >> On 6/3/16, 12:44 PM, "Neil Horman" <nhorman@tuxdriver.com<mailto:
> nhorman@tuxdriver.com>> wrote:
> >> >>
> >> >> >On Fri, Jun 03, 2016 at 04:04:14PM +0000, Wiles, Keith wrote:
> >> >> >> Sorry, I deleted all of the text as it was getting a bit long.
> >> >> >>
> >> >> >> Here are my thoughts as of now, which is a combination of many
> suggestions I read from everyone’s emails. I hope this is not too hard to
> understand.
> >> >> >>
> >> >> >> - Break out the current command line options out of the DPDK
> common code and move into a new lib.
> >> >> >> - At this point I was thinking of keeping the
> rte_eal_init(args, argv) API and just have it pass the args/argv to the new
> lib to create the data storage.
> >> >> >> - Maybe move the rte_eal_init() API to the new lib or keep
> it in the common eal code. Do not want to go hog wild.
> >> >> >> - The rte_eal_init(args, argv) would then call to the new API
> rte_eal_initialize(void), which in turn queries the data storage. (still
> thinking here)
> >> >> >These three items seem to be the exact opposite of my suggestion.
> The point of
> >> >> >this change was to segregate the parsing of configuration away from
> the
> >> >> >initalization dpdk using that configurtion. By keeping
> rte_eal_init in such a
> >> >> >way that the command line is directly passed into it, you've not
> changed that
> >> >> >implicit binding to command line options.
> >> >>
> >> >> Neil,
> >> >>
> >> >> You maybe reading the above wrong or I wrote it wrong, which is a
> high possibility. I want to move the command line parsing out of DPDK an
> into a library, but I still believe I need to provide some backward
> compatibility for ABI and to reduce the learning curve. The current
> applications can still call the rte_eal_init(), which then calls the new
> lib parser for dpdk command line options and then calls
> rte_eal_initialize() or move to the new API rte_eal_initialize() preceded
> by a new library call to parse the old command line args. At some point we
> can deprecate the rte_eal_init() if we think it is reasonable.
> >> >>
> >> >> >
> >> >> >I can understand if you want to keep rte_eal_init as is for ABI
> purposes, but
> >> >> >then you should create an rte_eal_init2(foo), where foo is some
> handle to in
> >> >> >memory parsed configuration, so that applications can preform that
> separation.
> >> >>
> >> >> I think you describe what I had planned here. The
> rte_eal_initialize() routine is the new rte_eal_init2() API and the
> rte_eal_init() was only for backward compatibility was my thinking. I
> figured the argument to rte_eal_initialize() would be something to be
> decided, but it will mostly likely be some type of pointer to the storage.
> >> >>
> >> >> I hope that clears that up, but let me know.
> >> >>
> >> >yes, that clarifies your thinking, and I agree with it. Thank you!
> >> >Neil
> >> >
> >> >> ++Keith
> >> >>
> >> >> >
> >> >> >Neil
> >> >> >
> >> >> >> - The example apps args needs to be passed to the examples as
> is for now, then we can convert them one at a time if needed.
> >> >> >>
> >> >> >> - I would like to keep the storage of the data separate from the
> file parser as they can use the ‘set’ routines to build the data storage up.
> >> >> >> - Keeping them split allows for new parsers to be created,
> while keeping the data storage from changing.
> >> >> >> - The rte_cfg code could be modified to use the new configuration
> if someone wants to take on that task ☺
> >> >> >>
> >> >> >> - Next is the data storage and how we can access the data in a
> clean simple way.
> >> >> >> - I want to have some simple level of hierarchy in the data.
> >> >> >> - Having a string containing at least two levels
> “primary:secondary”.
> >> >> >> - Primary string is something like “EAL” or “Pktgen” or
> “testpmd” to divide the data storage into logical major groups.
> >> >> >> - The primary allows us to have groups and then we can
> have common secondary strings in different groups if needed.
> >> >> >> - Secondary string can be whatever the developer of that
> group would like e.g. simple “EAL:foobar”, two levels “testpmd:foo.bar”
> >> >> >>
> >> >> >> - The secondary string is treated as a single string if it has
> a hierarchy or not, but referencing a single value in the data storage.
> >> >> >> - Key value pairs (KVP) or a hashmap data store.
> >> >> >> - The key here is the whole string “EAL:foobar” not just
> “foobar” secondary string.
> >> >> >> - If we want to have the two split I am ok with that
> as well meaning the API would be:
> >> >> >> rte_map_get(mapObj, “EAL”, “foo.bar”);
> >> >> >> rte_map_set(mapObj, “EAL”, “foo.bar”, value);
> >> >> >> - Have the primary as a different section in the data
> store, would allow for dumping that section maybe easier, not sure.
> >> >> >> - I am leaning toward
> >> >> >> - Not going to try splitting up the string or parse it as it
> is up to the developer to make it unique in the data store.
> >> >> >> - Use a code design to make the strings simple to use without
> having typos be a problem.
> >> >> >> - Not sure what the design is yet, but I do not want to have
> to concat two string or split strings in the code.
> >> >> >>
> >> >> >> This is as far as I have gotten and got tired of typing ☺
> >> >> >>
> >> >> >> I hope this will satisfy most everyone’s needs for now.
> >> >> >>
> >> >> >>
> >> >> >> Regards,
> >> >> >> Keith
> >> >> >>
> >> >> >>
> >> >> >>
> >> >> >
> >> >>
> >> >>
> >> >>
> >> >
> >> >Keith
> >> >What about the data types of the values?
> >> >I would assume that as a library it can provide the service of typed
> get/set and not leave conversion and validation to the app.
> >> >
> >> >rte_map_get_int(map,section,key)
> >> >rte_map_get_double(...)
> >> >rte_map_get_string(...)
> >> >rte_map_get_bytes(...,destBuff , destBuffSize) //e.g byte array of RSS
> key
> >> >This may also allow some basic validity of the configuration file
> >> >Another point I forgot about is default values.
> >> >We sometimes use a notation where the app also specifies a default
> value in case the configuration did not specify it
> >> > rte_map_get_int(map,section,key , defaultValue )
> >> >and specify if this was a mandatory that has no default
> >> > rte_map_get_int_crash_if_missing (map,section,key)
> >> >
> >> >
> >> >
> >> >
> >> >/Arnon
> >> >
> >> >Arnon,
> >> >
> >> >Yes, I too was thinking about access type APIs, but had not come to a
> full conclusion yet. As long as the API for get/put can return any value,
> we can add a layer on top of these primary get/put APIs to do some basic
> type checking. This way the developer can add his/her own type checking
> APIs or we provide a couple basic types for simple values.
> >>
> >> One more thing. I had not thought about default values as the defaults
> are handle directly by the code when an option is not applied. I think it
> should be left up to the developer to add default values to the storage or
> handle it when an option is not found in the storage.
> >>
> >> If I understand your code above the API would pass in a default value
> if one did not exist in the storage, which I guess is reasonable. Anyone
> think this is a good idea or not?
> >>
> >
> >I'm not opposed to default values, but it seems to me that if we are
> splitting
> >out a configuration storage library from dpdk, part of the initzliation
> of that
> >library can be installing default values. That is to say, instead of
> having the
> >code specific areas assume a default value if none is present in the
> config, an
> >init function for the configuration storage library would just populate
> the
> >keystore. That way all the dpdk itself has to do is a key lookup.
>
> +1
>
> If someone needs or wants default values in the API call then a wrapper
> functions around the basic keystore APIs can be done by the developer or we
> can add a new set of APIs to provide that type of feature, just like the
> variable type APIs. Just as long as the basic APIs do not exclude we can
> add it later.
>
> >
> >Neil
> >
> >> >
> >> >Does that make sense?
> >> >
> >> >++Keith
> >> >
> >>
> >>
> >>
> >
>
>
>
>
Yes.
I like to use the the getValue(myAlternativeDefault) concept when I have
different granularity defaults coming from different hierarchies,
but per dpdk as a single configuration separation to an init phase indeed
makes more sense, so +1 here too
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 19:18 0% ` Neil Horman
@ 2016-06-03 19:23 0% ` Wiles, Keith
2016-06-03 19:28 0% ` Arnon Warshavsky
0 siblings, 1 reply; 200+ results
From: Wiles, Keith @ 2016-06-03 19:23 UTC (permalink / raw)
To: Neil Horman
Cc: Arnon Warshavsky, Panu Matilainen, Richardson, Bruce,
Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Olivier Matz
On 6/3/16, 2:18 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>On Fri, Jun 03, 2016 at 07:07:50PM +0000, Wiles, Keith wrote:
>> On 6/3/16, 2:00 PM, "dev on behalf of Wiles, Keith" <dev-bounces@dpdk.org on behalf of keith.wiles@intel.com> wrote:
>>
>> >On 6/3/16, 1:52 PM, "Arnon Warshavsky" <arnon@qwilt.com<mailto:arnon@qwilt.com>> wrote:
>> >
>> >
>> >
>> >On Fri, Jun 3, 2016 at 9:38 PM, Neil Horman <nhorman@tuxdriver.com<mailto:nhorman@tuxdriver.com>> wrote:
>> >On Fri, Jun 03, 2016 at 06:29:13PM +0000, Wiles, Keith wrote:
>> >>
>> >> On 6/3/16, 12:44 PM, "Neil Horman" <nhorman@tuxdriver.com<mailto:nhorman@tuxdriver.com>> wrote:
>> >>
>> >> >On Fri, Jun 03, 2016 at 04:04:14PM +0000, Wiles, Keith wrote:
>> >> >> Sorry, I deleted all of the text as it was getting a bit long.
>> >> >>
>> >> >> Here are my thoughts as of now, which is a combination of many suggestions I read from everyone’s emails. I hope this is not too hard to understand.
>> >> >>
>> >> >> - Break out the current command line options out of the DPDK common code and move into a new lib.
>> >> >> - At this point I was thinking of keeping the rte_eal_init(args, argv) API and just have it pass the args/argv to the new lib to create the data storage.
>> >> >> - Maybe move the rte_eal_init() API to the new lib or keep it in the common eal code. Do not want to go hog wild.
>> >> >> - The rte_eal_init(args, argv) would then call to the new API rte_eal_initialize(void), which in turn queries the data storage. (still thinking here)
>> >> >These three items seem to be the exact opposite of my suggestion. The point of
>> >> >this change was to segregate the parsing of configuration away from the
>> >> >initalization dpdk using that configurtion. By keeping rte_eal_init in such a
>> >> >way that the command line is directly passed into it, you've not changed that
>> >> >implicit binding to command line options.
>> >>
>> >> Neil,
>> >>
>> >> You maybe reading the above wrong or I wrote it wrong, which is a high possibility. I want to move the command line parsing out of DPDK an into a library, but I still believe I need to provide some backward compatibility for ABI and to reduce the learning curve. The current applications can still call the rte_eal_init(), which then calls the new lib parser for dpdk command line options and then calls rte_eal_initialize() or move to the new API rte_eal_initialize() preceded by a new library call to parse the old command line args. At some point we can deprecate the rte_eal_init() if we think it is reasonable.
>> >>
>> >> >
>> >> >I can understand if you want to keep rte_eal_init as is for ABI purposes, but
>> >> >then you should create an rte_eal_init2(foo), where foo is some handle to in
>> >> >memory parsed configuration, so that applications can preform that separation.
>> >>
>> >> I think you describe what I had planned here. The rte_eal_initialize() routine is the new rte_eal_init2() API and the rte_eal_init() was only for backward compatibility was my thinking. I figured the argument to rte_eal_initialize() would be something to be decided, but it will mostly likely be some type of pointer to the storage.
>> >>
>> >> I hope that clears that up, but let me know.
>> >>
>> >yes, that clarifies your thinking, and I agree with it. Thank you!
>> >Neil
>> >
>> >> ++Keith
>> >>
>> >> >
>> >> >Neil
>> >> >
>> >> >> - The example apps args needs to be passed to the examples as is for now, then we can convert them one at a time if needed.
>> >> >>
>> >> >> - I would like to keep the storage of the data separate from the file parser as they can use the ‘set’ routines to build the data storage up.
>> >> >> - Keeping them split allows for new parsers to be created, while keeping the data storage from changing.
>> >> >> - The rte_cfg code could be modified to use the new configuration if someone wants to take on that task ☺
>> >> >>
>> >> >> - Next is the data storage and how we can access the data in a clean simple way.
>> >> >> - I want to have some simple level of hierarchy in the data.
>> >> >> - Having a string containing at least two levels “primary:secondary”.
>> >> >> - Primary string is something like “EAL” or “Pktgen” or “testpmd” to divide the data storage into logical major groups.
>> >> >> - The primary allows us to have groups and then we can have common secondary strings in different groups if needed.
>> >> >> - Secondary string can be whatever the developer of that group would like e.g. simple “EAL:foobar”, two levels “testpmd:foo.bar”
>> >> >>
>> >> >> - The secondary string is treated as a single string if it has a hierarchy or not, but referencing a single value in the data storage.
>> >> >> - Key value pairs (KVP) or a hashmap data store.
>> >> >> - The key here is the whole string “EAL:foobar” not just “foobar” secondary string.
>> >> >> - If we want to have the two split I am ok with that as well meaning the API would be:
>> >> >> rte_map_get(mapObj, “EAL”, “foo.bar”);
>> >> >> rte_map_set(mapObj, “EAL”, “foo.bar”, value);
>> >> >> - Have the primary as a different section in the data store, would allow for dumping that section maybe easier, not sure.
>> >> >> - I am leaning toward
>> >> >> - Not going to try splitting up the string or parse it as it is up to the developer to make it unique in the data store.
>> >> >> - Use a code design to make the strings simple to use without having typos be a problem.
>> >> >> - Not sure what the design is yet, but I do not want to have to concat two string or split strings in the code.
>> >> >>
>> >> >> This is as far as I have gotten and got tired of typing ☺
>> >> >>
>> >> >> I hope this will satisfy most everyone’s needs for now.
>> >> >>
>> >> >>
>> >> >> Regards,
>> >> >> Keith
>> >> >>
>> >> >>
>> >> >>
>> >> >
>> >>
>> >>
>> >>
>> >
>> >Keith
>> >What about the data types of the values?
>> >I would assume that as a library it can provide the service of typed get/set and not leave conversion and validation to the app.
>> >
>> >rte_map_get_int(map,section,key)
>> >rte_map_get_double(...)
>> >rte_map_get_string(...)
>> >rte_map_get_bytes(...,destBuff , destBuffSize) //e.g byte array of RSS key
>> >This may also allow some basic validity of the configuration file
>> >Another point I forgot about is default values.
>> >We sometimes use a notation where the app also specifies a default value in case the configuration did not specify it
>> > rte_map_get_int(map,section,key , defaultValue )
>> >and specify if this was a mandatory that has no default
>> > rte_map_get_int_crash_if_missing (map,section,key)
>> >
>> >
>> >
>> >
>> >/Arnon
>> >
>> >Arnon,
>> >
>> >Yes, I too was thinking about access type APIs, but had not come to a full conclusion yet. As long as the API for get/put can return any value, we can add a layer on top of these primary get/put APIs to do some basic type checking. This way the developer can add his/her own type checking APIs or we provide a couple basic types for simple values.
>>
>> One more thing. I had not thought about default values as the defaults are handle directly by the code when an option is not applied. I think it should be left up to the developer to add default values to the storage or handle it when an option is not found in the storage.
>>
>> If I understand your code above the API would pass in a default value if one did not exist in the storage, which I guess is reasonable. Anyone think this is a good idea or not?
>>
>
>I'm not opposed to default values, but it seems to me that if we are splitting
>out a configuration storage library from dpdk, part of the initzliation of that
>library can be installing default values. That is to say, instead of having the
>code specific areas assume a default value if none is present in the config, an
>init function for the configuration storage library would just populate the
>keystore. That way all the dpdk itself has to do is a key lookup.
+1
If someone needs or wants default values in the API call then a wrapper functions around the basic keystore APIs can be done by the developer or we can add a new set of APIs to provide that type of feature, just like the variable type APIs. Just as long as the basic APIs do not exclude we can add it later.
>
>Neil
>
>> >
>> >Does that make sense?
>> >
>> >++Keith
>> >
>>
>>
>>
>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 19:07 0% ` Wiles, Keith
@ 2016-06-03 19:18 0% ` Neil Horman
2016-06-03 19:23 0% ` Wiles, Keith
0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2016-06-03 19:18 UTC (permalink / raw)
To: Wiles, Keith
Cc: Arnon Warshavsky, Panu Matilainen, Richardson, Bruce,
Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Olivier Matz
On Fri, Jun 03, 2016 at 07:07:50PM +0000, Wiles, Keith wrote:
> On 6/3/16, 2:00 PM, "dev on behalf of Wiles, Keith" <dev-bounces@dpdk.org on behalf of keith.wiles@intel.com> wrote:
>
> >On 6/3/16, 1:52 PM, "Arnon Warshavsky" <arnon@qwilt.com<mailto:arnon@qwilt.com>> wrote:
> >
> >
> >
> >On Fri, Jun 3, 2016 at 9:38 PM, Neil Horman <nhorman@tuxdriver.com<mailto:nhorman@tuxdriver.com>> wrote:
> >On Fri, Jun 03, 2016 at 06:29:13PM +0000, Wiles, Keith wrote:
> >>
> >> On 6/3/16, 12:44 PM, "Neil Horman" <nhorman@tuxdriver.com<mailto:nhorman@tuxdriver.com>> wrote:
> >>
> >> >On Fri, Jun 03, 2016 at 04:04:14PM +0000, Wiles, Keith wrote:
> >> >> Sorry, I deleted all of the text as it was getting a bit long.
> >> >>
> >> >> Here are my thoughts as of now, which is a combination of many suggestions I read from everyone’s emails. I hope this is not too hard to understand.
> >> >>
> >> >> - Break out the current command line options out of the DPDK common code and move into a new lib.
> >> >> - At this point I was thinking of keeping the rte_eal_init(args, argv) API and just have it pass the args/argv to the new lib to create the data storage.
> >> >> - Maybe move the rte_eal_init() API to the new lib or keep it in the common eal code. Do not want to go hog wild.
> >> >> - The rte_eal_init(args, argv) would then call to the new API rte_eal_initialize(void), which in turn queries the data storage. (still thinking here)
> >> >These three items seem to be the exact opposite of my suggestion. The point of
> >> >this change was to segregate the parsing of configuration away from the
> >> >initalization dpdk using that configurtion. By keeping rte_eal_init in such a
> >> >way that the command line is directly passed into it, you've not changed that
> >> >implicit binding to command line options.
> >>
> >> Neil,
> >>
> >> You maybe reading the above wrong or I wrote it wrong, which is a high possibility. I want to move the command line parsing out of DPDK an into a library, but I still believe I need to provide some backward compatibility for ABI and to reduce the learning curve. The current applications can still call the rte_eal_init(), which then calls the new lib parser for dpdk command line options and then calls rte_eal_initialize() or move to the new API rte_eal_initialize() preceded by a new library call to parse the old command line args. At some point we can deprecate the rte_eal_init() if we think it is reasonable.
> >>
> >> >
> >> >I can understand if you want to keep rte_eal_init as is for ABI purposes, but
> >> >then you should create an rte_eal_init2(foo), where foo is some handle to in
> >> >memory parsed configuration, so that applications can preform that separation.
> >>
> >> I think you describe what I had planned here. The rte_eal_initialize() routine is the new rte_eal_init2() API and the rte_eal_init() was only for backward compatibility was my thinking. I figured the argument to rte_eal_initialize() would be something to be decided, but it will mostly likely be some type of pointer to the storage.
> >>
> >> I hope that clears that up, but let me know.
> >>
> >yes, that clarifies your thinking, and I agree with it. Thank you!
> >Neil
> >
> >> ++Keith
> >>
> >> >
> >> >Neil
> >> >
> >> >> - The example apps args needs to be passed to the examples as is for now, then we can convert them one at a time if needed.
> >> >>
> >> >> - I would like to keep the storage of the data separate from the file parser as they can use the ‘set’ routines to build the data storage up.
> >> >> - Keeping them split allows for new parsers to be created, while keeping the data storage from changing.
> >> >> - The rte_cfg code could be modified to use the new configuration if someone wants to take on that task ☺
> >> >>
> >> >> - Next is the data storage and how we can access the data in a clean simple way.
> >> >> - I want to have some simple level of hierarchy in the data.
> >> >> - Having a string containing at least two levels “primary:secondary”.
> >> >> - Primary string is something like “EAL” or “Pktgen” or “testpmd” to divide the data storage into logical major groups.
> >> >> - The primary allows us to have groups and then we can have common secondary strings in different groups if needed.
> >> >> - Secondary string can be whatever the developer of that group would like e.g. simple “EAL:foobar”, two levels “testpmd:foo.bar”
> >> >>
> >> >> - The secondary string is treated as a single string if it has a hierarchy or not, but referencing a single value in the data storage.
> >> >> - Key value pairs (KVP) or a hashmap data store.
> >> >> - The key here is the whole string “EAL:foobar” not just “foobar” secondary string.
> >> >> - If we want to have the two split I am ok with that as well meaning the API would be:
> >> >> rte_map_get(mapObj, “EAL”, “foo.bar”);
> >> >> rte_map_set(mapObj, “EAL”, “foo.bar”, value);
> >> >> - Have the primary as a different section in the data store, would allow for dumping that section maybe easier, not sure.
> >> >> - I am leaning toward
> >> >> - Not going to try splitting up the string or parse it as it is up to the developer to make it unique in the data store.
> >> >> - Use a code design to make the strings simple to use without having typos be a problem.
> >> >> - Not sure what the design is yet, but I do not want to have to concat two string or split strings in the code.
> >> >>
> >> >> This is as far as I have gotten and got tired of typing ☺
> >> >>
> >> >> I hope this will satisfy most everyone’s needs for now.
> >> >>
> >> >>
> >> >> Regards,
> >> >> Keith
> >> >>
> >> >>
> >> >>
> >> >
> >>
> >>
> >>
> >
> >Keith
> >What about the data types of the values?
> >I would assume that as a library it can provide the service of typed get/set and not leave conversion and validation to the app.
> >
> >rte_map_get_int(map,section,key)
> >rte_map_get_double(...)
> >rte_map_get_string(...)
> >rte_map_get_bytes(...,destBuff , destBuffSize) //e.g byte array of RSS key
> >This may also allow some basic validity of the configuration file
> >Another point I forgot about is default values.
> >We sometimes use a notation where the app also specifies a default value in case the configuration did not specify it
> > rte_map_get_int(map,section,key , defaultValue )
> >and specify if this was a mandatory that has no default
> > rte_map_get_int_crash_if_missing (map,section,key)
> >
> >
> >
> >
> >/Arnon
> >
> >Arnon,
> >
> >Yes, I too was thinking about access type APIs, but had not come to a full conclusion yet. As long as the API for get/put can return any value, we can add a layer on top of these primary get/put APIs to do some basic type checking. This way the developer can add his/her own type checking APIs or we provide a couple basic types for simple values.
>
> One more thing. I had not thought about default values as the defaults are handle directly by the code when an option is not applied. I think it should be left up to the developer to add default values to the storage or handle it when an option is not found in the storage.
>
> If I understand your code above the API would pass in a default value if one did not exist in the storage, which I guess is reasonable. Anyone think this is a good idea or not?
>
I'm not opposed to default values, but it seems to me that if we are splitting
out a configuration storage library from dpdk, part of the initzliation of that
library can be installing default values. That is to say, instead of having the
code specific areas assume a default value if none is present in the config, an
init function for the configuration storage library would just populate the
keystore. That way all the dpdk itself has to do is a key lookup.
Neil
> >
> >Does that make sense?
> >
> >++Keith
> >
>
>
>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 19:00 0% ` Wiles, Keith
@ 2016-06-03 19:07 0% ` Wiles, Keith
2016-06-03 19:18 0% ` Neil Horman
0 siblings, 1 reply; 200+ results
From: Wiles, Keith @ 2016-06-03 19:07 UTC (permalink / raw)
To: Arnon Warshavsky, Neil Horman
Cc: Panu Matilainen, Richardson, Bruce, Thomas Monjalon, Yuanhan Liu,
dev, Tan, Jianfeng, Stephen Hemminger, Christian Ehrhardt,
Olivier Matz
On 6/3/16, 2:00 PM, "dev on behalf of Wiles, Keith" <dev-bounces@dpdk.org on behalf of keith.wiles@intel.com> wrote:
>On 6/3/16, 1:52 PM, "Arnon Warshavsky" <arnon@qwilt.com<mailto:arnon@qwilt.com>> wrote:
>
>
>
>On Fri, Jun 3, 2016 at 9:38 PM, Neil Horman <nhorman@tuxdriver.com<mailto:nhorman@tuxdriver.com>> wrote:
>On Fri, Jun 03, 2016 at 06:29:13PM +0000, Wiles, Keith wrote:
>>
>> On 6/3/16, 12:44 PM, "Neil Horman" <nhorman@tuxdriver.com<mailto:nhorman@tuxdriver.com>> wrote:
>>
>> >On Fri, Jun 03, 2016 at 04:04:14PM +0000, Wiles, Keith wrote:
>> >> Sorry, I deleted all of the text as it was getting a bit long.
>> >>
>> >> Here are my thoughts as of now, which is a combination of many suggestions I read from everyone’s emails. I hope this is not too hard to understand.
>> >>
>> >> - Break out the current command line options out of the DPDK common code and move into a new lib.
>> >> - At this point I was thinking of keeping the rte_eal_init(args, argv) API and just have it pass the args/argv to the new lib to create the data storage.
>> >> - Maybe move the rte_eal_init() API to the new lib or keep it in the common eal code. Do not want to go hog wild.
>> >> - The rte_eal_init(args, argv) would then call to the new API rte_eal_initialize(void), which in turn queries the data storage. (still thinking here)
>> >These three items seem to be the exact opposite of my suggestion. The point of
>> >this change was to segregate the parsing of configuration away from the
>> >initalization dpdk using that configurtion. By keeping rte_eal_init in such a
>> >way that the command line is directly passed into it, you've not changed that
>> >implicit binding to command line options.
>>
>> Neil,
>>
>> You maybe reading the above wrong or I wrote it wrong, which is a high possibility. I want to move the command line parsing out of DPDK an into a library, but I still believe I need to provide some backward compatibility for ABI and to reduce the learning curve. The current applications can still call the rte_eal_init(), which then calls the new lib parser for dpdk command line options and then calls rte_eal_initialize() or move to the new API rte_eal_initialize() preceded by a new library call to parse the old command line args. At some point we can deprecate the rte_eal_init() if we think it is reasonable.
>>
>> >
>> >I can understand if you want to keep rte_eal_init as is for ABI purposes, but
>> >then you should create an rte_eal_init2(foo), where foo is some handle to in
>> >memory parsed configuration, so that applications can preform that separation.
>>
>> I think you describe what I had planned here. The rte_eal_initialize() routine is the new rte_eal_init2() API and the rte_eal_init() was only for backward compatibility was my thinking. I figured the argument to rte_eal_initialize() would be something to be decided, but it will mostly likely be some type of pointer to the storage.
>>
>> I hope that clears that up, but let me know.
>>
>yes, that clarifies your thinking, and I agree with it. Thank you!
>Neil
>
>> ++Keith
>>
>> >
>> >Neil
>> >
>> >> - The example apps args needs to be passed to the examples as is for now, then we can convert them one at a time if needed.
>> >>
>> >> - I would like to keep the storage of the data separate from the file parser as they can use the ‘set’ routines to build the data storage up.
>> >> - Keeping them split allows for new parsers to be created, while keeping the data storage from changing.
>> >> - The rte_cfg code could be modified to use the new configuration if someone wants to take on that task ☺
>> >>
>> >> - Next is the data storage and how we can access the data in a clean simple way.
>> >> - I want to have some simple level of hierarchy in the data.
>> >> - Having a string containing at least two levels “primary:secondary”.
>> >> - Primary string is something like “EAL” or “Pktgen” or “testpmd” to divide the data storage into logical major groups.
>> >> - The primary allows us to have groups and then we can have common secondary strings in different groups if needed.
>> >> - Secondary string can be whatever the developer of that group would like e.g. simple “EAL:foobar”, two levels “testpmd:foo.bar”
>> >>
>> >> - The secondary string is treated as a single string if it has a hierarchy or not, but referencing a single value in the data storage.
>> >> - Key value pairs (KVP) or a hashmap data store.
>> >> - The key here is the whole string “EAL:foobar” not just “foobar” secondary string.
>> >> - If we want to have the two split I am ok with that as well meaning the API would be:
>> >> rte_map_get(mapObj, “EAL”, “foo.bar”);
>> >> rte_map_set(mapObj, “EAL”, “foo.bar”, value);
>> >> - Have the primary as a different section in the data store, would allow for dumping that section maybe easier, not sure.
>> >> - I am leaning toward
>> >> - Not going to try splitting up the string or parse it as it is up to the developer to make it unique in the data store.
>> >> - Use a code design to make the strings simple to use without having typos be a problem.
>> >> - Not sure what the design is yet, but I do not want to have to concat two string or split strings in the code.
>> >>
>> >> This is as far as I have gotten and got tired of typing ☺
>> >>
>> >> I hope this will satisfy most everyone’s needs for now.
>> >>
>> >>
>> >> Regards,
>> >> Keith
>> >>
>> >>
>> >>
>> >
>>
>>
>>
>
>Keith
>What about the data types of the values?
>I would assume that as a library it can provide the service of typed get/set and not leave conversion and validation to the app.
>
>rte_map_get_int(map,section,key)
>rte_map_get_double(...)
>rte_map_get_string(...)
>rte_map_get_bytes(...,destBuff , destBuffSize) //e.g byte array of RSS key
>This may also allow some basic validity of the configuration file
>Another point I forgot about is default values.
>We sometimes use a notation where the app also specifies a default value in case the configuration did not specify it
> rte_map_get_int(map,section,key , defaultValue )
>and specify if this was a mandatory that has no default
> rte_map_get_int_crash_if_missing (map,section,key)
>
>
>
>
>/Arnon
>
>Arnon,
>
>Yes, I too was thinking about access type APIs, but had not come to a full conclusion yet. As long as the API for get/put can return any value, we can add a layer on top of these primary get/put APIs to do some basic type checking. This way the developer can add his/her own type checking APIs or we provide a couple basic types for simple values.
One more thing. I had not thought about default values as the defaults are handle directly by the code when an option is not applied. I think it should be left up to the developer to add default values to the storage or handle it when an option is not found in the storage.
If I understand your code above the API would pass in a default value if one did not exist in the storage, which I guess is reasonable. Anyone think this is a good idea or not?
>
>Does that make sense?
>
>++Keith
>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 18:52 0% ` Arnon Warshavsky
@ 2016-06-03 19:00 0% ` Wiles, Keith
2016-06-03 19:07 0% ` Wiles, Keith
0 siblings, 1 reply; 200+ results
From: Wiles, Keith @ 2016-06-03 19:00 UTC (permalink / raw)
To: Arnon Warshavsky, Neil Horman
Cc: Panu Matilainen, Richardson, Bruce, Thomas Monjalon, Yuanhan Liu,
dev, Tan, Jianfeng, Stephen Hemminger, Christian Ehrhardt,
Olivier Matz
On 6/3/16, 1:52 PM, "Arnon Warshavsky" <arnon@qwilt.com<mailto:arnon@qwilt.com>> wrote:
On Fri, Jun 3, 2016 at 9:38 PM, Neil Horman <nhorman@tuxdriver.com<mailto:nhorman@tuxdriver.com>> wrote:
On Fri, Jun 03, 2016 at 06:29:13PM +0000, Wiles, Keith wrote:
>
> On 6/3/16, 12:44 PM, "Neil Horman" <nhorman@tuxdriver.com<mailto:nhorman@tuxdriver.com>> wrote:
>
> >On Fri, Jun 03, 2016 at 04:04:14PM +0000, Wiles, Keith wrote:
> >> Sorry, I deleted all of the text as it was getting a bit long.
> >>
> >> Here are my thoughts as of now, which is a combination of many suggestions I read from everyone’s emails. I hope this is not too hard to understand.
> >>
> >> - Break out the current command line options out of the DPDK common code and move into a new lib.
> >> - At this point I was thinking of keeping the rte_eal_init(args, argv) API and just have it pass the args/argv to the new lib to create the data storage.
> >> - Maybe move the rte_eal_init() API to the new lib or keep it in the common eal code. Do not want to go hog wild.
> >> - The rte_eal_init(args, argv) would then call to the new API rte_eal_initialize(void), which in turn queries the data storage. (still thinking here)
> >These three items seem to be the exact opposite of my suggestion. The point of
> >this change was to segregate the parsing of configuration away from the
> >initalization dpdk using that configurtion. By keeping rte_eal_init in such a
> >way that the command line is directly passed into it, you've not changed that
> >implicit binding to command line options.
>
> Neil,
>
> You maybe reading the above wrong or I wrote it wrong, which is a high possibility. I want to move the command line parsing out of DPDK an into a library, but I still believe I need to provide some backward compatibility for ABI and to reduce the learning curve. The current applications can still call the rte_eal_init(), which then calls the new lib parser for dpdk command line options and then calls rte_eal_initialize() or move to the new API rte_eal_initialize() preceded by a new library call to parse the old command line args. At some point we can deprecate the rte_eal_init() if we think it is reasonable.
>
> >
> >I can understand if you want to keep rte_eal_init as is for ABI purposes, but
> >then you should create an rte_eal_init2(foo), where foo is some handle to in
> >memory parsed configuration, so that applications can preform that separation.
>
> I think you describe what I had planned here. The rte_eal_initialize() routine is the new rte_eal_init2() API and the rte_eal_init() was only for backward compatibility was my thinking. I figured the argument to rte_eal_initialize() would be something to be decided, but it will mostly likely be some type of pointer to the storage.
>
> I hope that clears that up, but let me know.
>
yes, that clarifies your thinking, and I agree with it. Thank you!
Neil
> ++Keith
>
> >
> >Neil
> >
> >> - The example apps args needs to be passed to the examples as is for now, then we can convert them one at a time if needed.
> >>
> >> - I would like to keep the storage of the data separate from the file parser as they can use the ‘set’ routines to build the data storage up.
> >> - Keeping them split allows for new parsers to be created, while keeping the data storage from changing.
> >> - The rte_cfg code could be modified to use the new configuration if someone wants to take on that task ☺
> >>
> >> - Next is the data storage and how we can access the data in a clean simple way.
> >> - I want to have some simple level of hierarchy in the data.
> >> - Having a string containing at least two levels “primary:secondary”.
> >> - Primary string is something like “EAL” or “Pktgen” or “testpmd” to divide the data storage into logical major groups.
> >> - The primary allows us to have groups and then we can have common secondary strings in different groups if needed.
> >> - Secondary string can be whatever the developer of that group would like e.g. simple “EAL:foobar”, two levels “testpmd:foo.bar”
> >>
> >> - The secondary string is treated as a single string if it has a hierarchy or not, but referencing a single value in the data storage.
> >> - Key value pairs (KVP) or a hashmap data store.
> >> - The key here is the whole string “EAL:foobar” not just “foobar” secondary string.
> >> - If we want to have the two split I am ok with that as well meaning the API would be:
> >> rte_map_get(mapObj, “EAL”, “foo.bar”);
> >> rte_map_set(mapObj, “EAL”, “foo.bar”, value);
> >> - Have the primary as a different section in the data store, would allow for dumping that section maybe easier, not sure.
> >> - I am leaning toward
> >> - Not going to try splitting up the string or parse it as it is up to the developer to make it unique in the data store.
> >> - Use a code design to make the strings simple to use without having typos be a problem.
> >> - Not sure what the design is yet, but I do not want to have to concat two string or split strings in the code.
> >>
> >> This is as far as I have gotten and got tired of typing ☺
> >>
> >> I hope this will satisfy most everyone’s needs for now.
> >>
> >>
> >> Regards,
> >> Keith
> >>
> >>
> >>
> >
>
>
>
Keith
What about the data types of the values?
I would assume that as a library it can provide the service of typed get/set and not leave conversion and validation to the app.
rte_map_get_int(map,section,key)
rte_map_get_double(...)
rte_map_get_string(...)
rte_map_get_bytes(...,destBuff , destBuffSize) //e.g byte array of RSS key
This may also allow some basic validity of the configuration file
Another point I forgot about is default values.
We sometimes use a notation where the app also specifies a default value in case the configuration did not specify it
rte_map_get_int(map,section,key , defaultValue )
and specify if this was a mandatory that has no default
rte_map_get_int_crash_if_missing (map,section,key)
/Arnon
Arnon,
Yes, I too was thinking about access type APIs, but had not come to a full conclusion yet. As long as the API for get/put can return any value, we can add a layer on top of these primary get/put APIs to do some basic type checking. This way the developer can add his/her own type checking APIs or we provide a couple basic types for simple values.
Does that make sense?
++Keith
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 18:38 0% ` Neil Horman
@ 2016-06-03 18:52 0% ` Arnon Warshavsky
2016-06-03 19:00 0% ` Wiles, Keith
0 siblings, 1 reply; 200+ results
From: Arnon Warshavsky @ 2016-06-03 18:52 UTC (permalink / raw)
To: Neil Horman
Cc: Wiles, Keith, Panu Matilainen, Richardson, Bruce,
Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Olivier Matz
On Fri, Jun 3, 2016 at 9:38 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
> On Fri, Jun 03, 2016 at 06:29:13PM +0000, Wiles, Keith wrote:
> >
> > On 6/3/16, 12:44 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> >
> > >On Fri, Jun 03, 2016 at 04:04:14PM +0000, Wiles, Keith wrote:
> > >> Sorry, I deleted all of the text as it was getting a bit long.
> > >>
> > >> Here are my thoughts as of now, which is a combination of many
> suggestions I read from everyone’s emails. I hope this is not too hard to
> understand.
> > >>
> > >> - Break out the current command line options out of the DPDK common
> code and move into a new lib.
> > >> - At this point I was thinking of keeping the rte_eal_init(args,
> argv) API and just have it pass the args/argv to the new lib to create the
> data storage.
> > >> - Maybe move the rte_eal_init() API to the new lib or keep it in
> the common eal code. Do not want to go hog wild.
> > >> - The rte_eal_init(args, argv) would then call to the new API
> rte_eal_initialize(void), which in turn queries the data storage. (still
> thinking here)
> > >These three items seem to be the exact opposite of my suggestion. The
> point of
> > >this change was to segregate the parsing of configuration away from the
> > >initalization dpdk using that configurtion. By keeping rte_eal_init in
> such a
> > >way that the command line is directly passed into it, you've not
> changed that
> > >implicit binding to command line options.
> >
> > Neil,
> >
> > You maybe reading the above wrong or I wrote it wrong, which is a high
> possibility. I want to move the command line parsing out of DPDK an into a
> library, but I still believe I need to provide some backward compatibility
> for ABI and to reduce the learning curve. The current applications can
> still call the rte_eal_init(), which then calls the new lib parser for dpdk
> command line options and then calls rte_eal_initialize() or move to the new
> API rte_eal_initialize() preceded by a new library call to parse the old
> command line args. At some point we can deprecate the rte_eal_init() if we
> think it is reasonable.
> >
> > >
> > >I can understand if you want to keep rte_eal_init as is for ABI
> purposes, but
> > >then you should create an rte_eal_init2(foo), where foo is some handle
> to in
> > >memory parsed configuration, so that applications can preform that
> separation.
> >
> > I think you describe what I had planned here. The rte_eal_initialize()
> routine is the new rte_eal_init2() API and the rte_eal_init() was only for
> backward compatibility was my thinking. I figured the argument to
> rte_eal_initialize() would be something to be decided, but it will mostly
> likely be some type of pointer to the storage.
> >
> > I hope that clears that up, but let me know.
> >
> yes, that clarifies your thinking, and I agree with it. Thank you!
> Neil
>
> > ++Keith
> >
> > >
> > >Neil
> > >
> > >> - The example apps args needs to be passed to the examples as is
> for now, then we can convert them one at a time if needed.
> > >>
> > >> - I would like to keep the storage of the data separate from the file
> parser as they can use the ‘set’ routines to build the data storage up.
> > >> - Keeping them split allows for new parsers to be created, while
> keeping the data storage from changing.
> > >> - The rte_cfg code could be modified to use the new configuration if
> someone wants to take on that task ☺
> > >>
> > >> - Next is the data storage and how we can access the data in a clean
> simple way.
> > >> - I want to have some simple level of hierarchy in the data.
> > >> - Having a string containing at least two levels
> “primary:secondary”.
> > >> - Primary string is something like “EAL” or “Pktgen” or
> “testpmd” to divide the data storage into logical major groups.
> > >> - The primary allows us to have groups and then we can have
> common secondary strings in different groups if needed.
> > >> - Secondary string can be whatever the developer of that group
> would like e.g. simple “EAL:foobar”, two levels “testpmd:foo.bar”
> > >>
> > >> - The secondary string is treated as a single string if it has a
> hierarchy or not, but referencing a single value in the data storage.
> > >> - Key value pairs (KVP) or a hashmap data store.
> > >> - The key here is the whole string “EAL:foobar” not just
> “foobar” secondary string.
> > >> - If we want to have the two split I am ok with that as
> well meaning the API would be:
> > >> rte_map_get(mapObj, “EAL”, “foo.bar”);
> > >> rte_map_set(mapObj, “EAL”, “foo.bar”, value);
> > >> - Have the primary as a different section in the data
> store, would allow for dumping that section maybe easier, not sure.
> > >> - I am leaning toward
> > >> - Not going to try splitting up the string or parse it as it is
> up to the developer to make it unique in the data store.
> > >> - Use a code design to make the strings simple to use without having
> typos be a problem.
> > >> - Not sure what the design is yet, but I do not want to have to
> concat two string or split strings in the code.
> > >>
> > >> This is as far as I have gotten and got tired of typing ☺
> > >>
> > >> I hope this will satisfy most everyone’s needs for now.
> > >>
> > >>
> > >> Regards,
> > >> Keith
> > >>
> > >>
> > >>
> > >
> >
> >
> >
>
Keith
What about the data types of the values?
I would assume that as a library it can provide the service of typed
get/set and not leave conversion and validation to the app.
rte_map_get_int(map,section,key)
rte_map_get_double(...)
rte_map_get_string(...)
rte_map_get_bytes(...,destBuff , destBuffSize) //e.g byte array of RSS key
This may also allow some basic validity of the configuration file
Another point I forgot about is default values.
We sometimes use a notation where the app also specifies a default value in
case the configuration did not specify it
rte_map_get_int(map,section,key , defaultValue )
and specify if this was a mandatory that has no default
rte_map_get_int_crash_if_missing (map,section,key)
/Arnon
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 18:29 3% ` Wiles, Keith
@ 2016-06-03 18:38 0% ` Neil Horman
2016-06-03 18:52 0% ` Arnon Warshavsky
0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2016-06-03 18:38 UTC (permalink / raw)
To: Wiles, Keith
Cc: Arnon Warshavsky, Panu Matilainen, Richardson, Bruce,
Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Olivier Matz
On Fri, Jun 03, 2016 at 06:29:13PM +0000, Wiles, Keith wrote:
>
> On 6/3/16, 12:44 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>
> >On Fri, Jun 03, 2016 at 04:04:14PM +0000, Wiles, Keith wrote:
> >> Sorry, I deleted all of the text as it was getting a bit long.
> >>
> >> Here are my thoughts as of now, which is a combination of many suggestions I read from everyone’s emails. I hope this is not too hard to understand.
> >>
> >> - Break out the current command line options out of the DPDK common code and move into a new lib.
> >> - At this point I was thinking of keeping the rte_eal_init(args, argv) API and just have it pass the args/argv to the new lib to create the data storage.
> >> - Maybe move the rte_eal_init() API to the new lib or keep it in the common eal code. Do not want to go hog wild.
> >> - The rte_eal_init(args, argv) would then call to the new API rte_eal_initialize(void), which in turn queries the data storage. (still thinking here)
> >These three items seem to be the exact opposite of my suggestion. The point of
> >this change was to segregate the parsing of configuration away from the
> >initalization dpdk using that configurtion. By keeping rte_eal_init in such a
> >way that the command line is directly passed into it, you've not changed that
> >implicit binding to command line options.
>
> Neil,
>
> You maybe reading the above wrong or I wrote it wrong, which is a high possibility. I want to move the command line parsing out of DPDK an into a library, but I still believe I need to provide some backward compatibility for ABI and to reduce the learning curve. The current applications can still call the rte_eal_init(), which then calls the new lib parser for dpdk command line options and then calls rte_eal_initialize() or move to the new API rte_eal_initialize() preceded by a new library call to parse the old command line args. At some point we can deprecate the rte_eal_init() if we think it is reasonable.
>
> >
> >I can understand if you want to keep rte_eal_init as is for ABI purposes, but
> >then you should create an rte_eal_init2(foo), where foo is some handle to in
> >memory parsed configuration, so that applications can preform that separation.
>
> I think you describe what I had planned here. The rte_eal_initialize() routine is the new rte_eal_init2() API and the rte_eal_init() was only for backward compatibility was my thinking. I figured the argument to rte_eal_initialize() would be something to be decided, but it will mostly likely be some type of pointer to the storage.
>
> I hope that clears that up, but let me know.
>
yes, that clarifies your thinking, and I agree with it. Thank you!
Neil
> ++Keith
>
> >
> >Neil
> >
> >> - The example apps args needs to be passed to the examples as is for now, then we can convert them one at a time if needed.
> >>
> >> - I would like to keep the storage of the data separate from the file parser as they can use the ‘set’ routines to build the data storage up.
> >> - Keeping them split allows for new parsers to be created, while keeping the data storage from changing.
> >> - The rte_cfg code could be modified to use the new configuration if someone wants to take on that task ☺
> >>
> >> - Next is the data storage and how we can access the data in a clean simple way.
> >> - I want to have some simple level of hierarchy in the data.
> >> - Having a string containing at least two levels “primary:secondary”.
> >> - Primary string is something like “EAL” or “Pktgen” or “testpmd” to divide the data storage into logical major groups.
> >> - The primary allows us to have groups and then we can have common secondary strings in different groups if needed.
> >> - Secondary string can be whatever the developer of that group would like e.g. simple “EAL:foobar”, two levels “testpmd:foo.bar”
> >>
> >> - The secondary string is treated as a single string if it has a hierarchy or not, but referencing a single value in the data storage.
> >> - Key value pairs (KVP) or a hashmap data store.
> >> - The key here is the whole string “EAL:foobar” not just “foobar” secondary string.
> >> - If we want to have the two split I am ok with that as well meaning the API would be:
> >> rte_map_get(mapObj, “EAL”, “foo.bar”);
> >> rte_map_set(mapObj, “EAL”, “foo.bar”, value);
> >> - Have the primary as a different section in the data store, would allow for dumping that section maybe easier, not sure.
> >> - I am leaning toward
> >> - Not going to try splitting up the string or parse it as it is up to the developer to make it unique in the data store.
> >> - Use a code design to make the strings simple to use without having typos be a problem.
> >> - Not sure what the design is yet, but I do not want to have to concat two string or split strings in the code.
> >>
> >> This is as far as I have gotten and got tired of typing ☺
> >>
> >> I hope this will satisfy most everyone’s needs for now.
> >>
> >>
> >> Regards,
> >> Keith
> >>
> >>
> >>
> >
>
>
>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 17:44 3% ` Neil Horman
@ 2016-06-03 18:29 3% ` Wiles, Keith
2016-06-03 18:38 0% ` Neil Horman
0 siblings, 1 reply; 200+ results
From: Wiles, Keith @ 2016-06-03 18:29 UTC (permalink / raw)
To: Neil Horman
Cc: Arnon Warshavsky, Panu Matilainen, Richardson, Bruce,
Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Olivier Matz
On 6/3/16, 12:44 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>On Fri, Jun 03, 2016 at 04:04:14PM +0000, Wiles, Keith wrote:
>> Sorry, I deleted all of the text as it was getting a bit long.
>>
>> Here are my thoughts as of now, which is a combination of many suggestions I read from everyone’s emails. I hope this is not too hard to understand.
>>
>> - Break out the current command line options out of the DPDK common code and move into a new lib.
>> - At this point I was thinking of keeping the rte_eal_init(args, argv) API and just have it pass the args/argv to the new lib to create the data storage.
>> - Maybe move the rte_eal_init() API to the new lib or keep it in the common eal code. Do not want to go hog wild.
>> - The rte_eal_init(args, argv) would then call to the new API rte_eal_initialize(void), which in turn queries the data storage. (still thinking here)
>These three items seem to be the exact opposite of my suggestion. The point of
>this change was to segregate the parsing of configuration away from the
>initalization dpdk using that configurtion. By keeping rte_eal_init in such a
>way that the command line is directly passed into it, you've not changed that
>implicit binding to command line options.
Neil,
You maybe reading the above wrong or I wrote it wrong, which is a high possibility. I want to move the command line parsing out of DPDK an into a library, but I still believe I need to provide some backward compatibility for ABI and to reduce the learning curve. The current applications can still call the rte_eal_init(), which then calls the new lib parser for dpdk command line options and then calls rte_eal_initialize() or move to the new API rte_eal_initialize() preceded by a new library call to parse the old command line args. At some point we can deprecate the rte_eal_init() if we think it is reasonable.
>
>I can understand if you want to keep rte_eal_init as is for ABI purposes, but
>then you should create an rte_eal_init2(foo), where foo is some handle to in
>memory parsed configuration, so that applications can preform that separation.
I think you describe what I had planned here. The rte_eal_initialize() routine is the new rte_eal_init2() API and the rte_eal_init() was only for backward compatibility was my thinking. I figured the argument to rte_eal_initialize() would be something to be decided, but it will mostly likely be some type of pointer to the storage.
I hope that clears that up, but let me know.
++Keith
>
>Neil
>
>> - The example apps args needs to be passed to the examples as is for now, then we can convert them one at a time if needed.
>>
>> - I would like to keep the storage of the data separate from the file parser as they can use the ‘set’ routines to build the data storage up.
>> - Keeping them split allows for new parsers to be created, while keeping the data storage from changing.
>> - The rte_cfg code could be modified to use the new configuration if someone wants to take on that task ☺
>>
>> - Next is the data storage and how we can access the data in a clean simple way.
>> - I want to have some simple level of hierarchy in the data.
>> - Having a string containing at least two levels “primary:secondary”.
>> - Primary string is something like “EAL” or “Pktgen” or “testpmd” to divide the data storage into logical major groups.
>> - The primary allows us to have groups and then we can have common secondary strings in different groups if needed.
>> - Secondary string can be whatever the developer of that group would like e.g. simple “EAL:foobar”, two levels “testpmd:foo.bar”
>>
>> - The secondary string is treated as a single string if it has a hierarchy or not, but referencing a single value in the data storage.
>> - Key value pairs (KVP) or a hashmap data store.
>> - The key here is the whole string “EAL:foobar” not just “foobar” secondary string.
>> - If we want to have the two split I am ok with that as well meaning the API would be:
>> rte_map_get(mapObj, “EAL”, “foo.bar”);
>> rte_map_set(mapObj, “EAL”, “foo.bar”, value);
>> - Have the primary as a different section in the data store, would allow for dumping that section maybe easier, not sure.
>> - I am leaning toward
>> - Not going to try splitting up the string or parse it as it is up to the developer to make it unique in the data store.
>> - Use a code design to make the strings simple to use without having typos be a problem.
>> - Not sure what the design is yet, but I do not want to have to concat two string or split strings in the code.
>>
>> This is as far as I have gotten and got tired of typing ☺
>>
>> I hope this will satisfy most everyone’s needs for now.
>>
>>
>> Regards,
>> Keith
>>
>>
>>
>
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] RFC: DPDK Long Term Support
2016-06-03 15:07 4% [dpdk-dev] RFC: DPDK Long Term Support Mcnamara, John
2016-06-03 16:05 0% ` Thomas Monjalon
@ 2016-06-03 18:17 3% ` Matthew Hall
2016-06-05 18:15 5% ` Neil Horman
2 siblings, 0 replies; 200+ results
From: Matthew Hall @ 2016-06-03 18:17 UTC (permalink / raw)
To: Mcnamara, John; +Cc: dev, Christian Ehrhardt, Markos Chandras, Panu Matilainen
On Fri, Jun 03, 2016 at 03:07:49PM +0000, Mcnamara, John wrote:
> What changes should be backported
> ---------------------------------
>
> * Bug fixes that don't break the ABI.
>
>
> What changes should not be backported
> -------------------------------------
>
> * API or ABI breaking changes.
I think this part needs some adjusting.
It seems like there should be allowance for bug fixes where the original does
break ABI but it is possible to make a version that doesn't.
A lot of DPDK bug fixes I see would fall into this category and it isn't
discussed.
Matthew.
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
@ 2016-06-03 17:44 3% ` Neil Horman
2016-06-03 18:29 3% ` Wiles, Keith
0 siblings, 1 reply; 200+ results
From: Neil Horman @ 2016-06-03 17:44 UTC (permalink / raw)
To: Wiles, Keith
Cc: Arnon Warshavsky, Panu Matilainen, Richardson, Bruce,
Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Olivier Matz
On Fri, Jun 03, 2016 at 04:04:14PM +0000, Wiles, Keith wrote:
> Sorry, I deleted all of the text as it was getting a bit long.
>
> Here are my thoughts as of now, which is a combination of many suggestions I read from everyone’s emails. I hope this is not too hard to understand.
>
> - Break out the current command line options out of the DPDK common code and move into a new lib.
> - At this point I was thinking of keeping the rte_eal_init(args, argv) API and just have it pass the args/argv to the new lib to create the data storage.
> - Maybe move the rte_eal_init() API to the new lib or keep it in the common eal code. Do not want to go hog wild.
> - The rte_eal_init(args, argv) would then call to the new API rte_eal_initialize(void), which in turn queries the data storage. (still thinking here)
These three items seem to be the exact opposite of my suggestion. The point of
this change was to segregate the parsing of configuration away from the
initalization dpdk using that configurtion. By keeping rte_eal_init in such a
way that the command line is directly passed into it, you've not changed that
implicit binding to command line options.
I can understand if you want to keep rte_eal_init as is for ABI purposes, but
then you should create an rte_eal_init2(foo), where foo is some handle to in
memory parsed configuration, so that applications can preform that separation.
Neil
> - The example apps args needs to be passed to the examples as is for now, then we can convert them one at a time if needed.
>
> - I would like to keep the storage of the data separate from the file parser as they can use the ‘set’ routines to build the data storage up.
> - Keeping them split allows for new parsers to be created, while keeping the data storage from changing.
> - The rte_cfg code could be modified to use the new configuration if someone wants to take on that task ☺
>
> - Next is the data storage and how we can access the data in a clean simple way.
> - I want to have some simple level of hierarchy in the data.
> - Having a string containing at least two levels “primary:secondary”.
> - Primary string is something like “EAL” or “Pktgen” or “testpmd” to divide the data storage into logical major groups.
> - The primary allows us to have groups and then we can have common secondary strings in different groups if needed.
> - Secondary string can be whatever the developer of that group would like e.g. simple “EAL:foobar”, two levels “testpmd:foo.bar”
>
> - The secondary string is treated as a single string if it has a hierarchy or not, but referencing a single value in the data storage.
> - Key value pairs (KVP) or a hashmap data store.
> - The key here is the whole string “EAL:foobar” not just “foobar” secondary string.
> - If we want to have the two split I am ok with that as well meaning the API would be:
> rte_map_get(mapObj, “EAL”, “foo.bar”);
> rte_map_set(mapObj, “EAL”, “foo.bar”, value);
> - Have the primary as a different section in the data store, would allow for dumping that section maybe easier, not sure.
> - I am leaning toward
> - Not going to try splitting up the string or parse it as it is up to the developer to make it unique in the data store.
> - Use a code design to make the strings simple to use without having typos be a problem.
> - Not sure what the design is yet, but I do not want to have to concat two string or split strings in the code.
>
> This is as far as I have gotten and got tired of typing ☺
>
> I hope this will satisfy most everyone’s needs for now.
>
>
> Regards,
> Keith
>
>
>
^ permalink raw reply [relevance 3%]
* [dpdk-dev] RFC: DPDK Long Term Support
@ 2016-06-03 15:07 4% Mcnamara, John
2016-06-03 16:05 0% ` Thomas Monjalon
` (2 more replies)
0 siblings, 3 replies; 200+ results
From: Mcnamara, John @ 2016-06-03 15:07 UTC (permalink / raw)
To: dev; +Cc: Christian Ehrhardt, Markos Chandras, Panu Matilainen
Introduction
------------
This document sets out a proposal for a DPDK Long Term Support release (LTS).
The purpose of the DPDK LTS will be to maintain a stable release of DPDK with
backported bug fixes over an extended period of time. This will provide
downstream consumers of DPDK with a stable target on which to base
applications or packages.
As with previous DPDK guidelines this proposal is open for discussion within
the community. The consensus view will be included in the DPDK documentation
as a guideline.
LTS Maintainer
--------------
The proposed maintainer for the LTS is Yuanhan Liu
<yuanhan.liu@linux.intel.com>.
LTS Duration
------------
The proposed duration of the LTS support is 2 years.
There will only be one LTS branch being maintained at any time. At the end of
the 2 year cycle the maintenance on the previous LTS will be wound down.
LTS Version
------------
The proposed initial LTS version will be DPDK 16.07. The next versions, based
on a 2 year cycle, will be DPDK 18.08, 20.08, etc.
What changes should be backported
---------------------------------
* Bug fixes that don't break the ABI.
What changes should not be backported
-------------------------------------
* API or ABI breaking changes.
* Features should not be backported. Unless:
* There is a justifiable use case (for example a new PMD).
* The change is non-invasive.
* The work of preparing the backport is done by the proposer.
* There is support within the community.
Role of the maintainer
----------------------
* The maintainer will evaluate fixes to the DPDK master submitted by the
fixing developer and apply them to the LTS branch/tree.
* The maintainer will evaluate backported patches from downstream consumers
and apply them to the LTS branch/tree.
* The maintainer will not backport non-trivial fixes without assistance from
the downstream consumers or requester.
Role of the downstream consumers
--------------------------------
Developers submitting fixes to the mainline should also CC the maintainer so
that they can evaluate the patch. A <stable@dpdk.org> email address could be
provided for this so that it can be included as a CC in the commit messages
and documented in the Code Contribution Guidelines.
The downstream consumers (OSVs and DPDK dependent application and framework
developers) should identify issues in the field that have been fixed in the
mainline release and report them to the maintainer. They should, ideally,
assist with backporting any required fixes.
Testing
-------
Intel will provide validation engineers to test the LTS branch/tree. Tested
releases can be marked using a Git tag with an incremented revision number. For
example: 16.07.00_LTS -> 16.07.01_LTS. The testing cadence should be quarterly
but will be best effort only and dependent on available resources.
Validated OSes
--------------
In order to reduce the testing effort the number of OSes which will be
officially validated should be as small as possible. The proposal is that the
following long term OSes are used for validation:
(OSV reps please confirm.)
* Ubuntu 16.04 LTS
* RHEL 7.3
* SuSE 11 SP4 or 12
* FreeBSD 10.3
Fixes for newer OSes, kernels (and associated KNI fixes), and newer GCC/Clang
versions can be backported but the validation effort will be limited to the
above platforms.
Release Notes
-------------
The LTS release notes should be updated to include a section with backported
fixes. Patches for backporting should include additions to the release notes
like patches to the mainline branch.
LTS Review
----------
The LTS guidelines shall be reviewed after 1 year to adjust for any experiences
from LTS maintainership.
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v8 0/5] mempool: add external mempool manager
2016-06-02 13:27 2% ` [dpdk-dev] [PATCH v7 " David Hunt
@ 2016-06-03 14:58 2% ` David Hunt
1 sibling, 0 replies; 200+ results
From: David Hunt @ 2016-06-03 14:58 UTC (permalink / raw)
To: dev; +Cc: olivier.matz, viktorin, jerin.jacob
Here's the latest version of the External Mempool Manager patchset.
It's re-based on top of the latest head as of 19/5/2016, including
Olivier's 35-part patch series on mempool re-org [1]
[1] http://dpdk.org/ml/archives/dev/2016-May/039229.html
v8 changes:
* merged first three patches in the series into one.
* changed parameters to ops callback to all be rte_mempool pointer
rather than than pointer to opaque data or uint64.
* comment fixes.
* fixed parameter to _free function (was inconsistent).
* changed MEMPOOL_F_RING_CREATED to MEMPOOL_F_POOL_CREATED
v7 changes:
* Changed rte_mempool_handler_table to rte_mempool_ops_table
* Changed hander_idx to ops_index in rte_mempool struct
* Reworked comments in rte_mempool.h around ops functions
* Changed rte_mempool_hander.c to rte_mempool_ops.c
* Changed all functions containing _handler_ to _ops_
* Now there is no mention of 'handler' left
* Other small changes out of review of mailing list
v6 changes:
* Moved the flags handling from rte_mempool_create_empty to
rte_mempool_create, as it's only there for backward compatibility
* Various comment additions and cleanup
* Renamed rte_mempool_handler to rte_mempool_ops
* Added a union for *pool and u64 pool_id in struct rte_mempool
* split the original patch into a few parts for easier review.
* rename functions with _ext_ to _ops_.
* addressed review comments
* renamed put and get functions to enqueue and dequeue
* changed occurences of rte_mempool_ops to const, as they
contain function pointers (security)
* split out the default external mempool handler into a separate
patch for easier review
v5 changes:
* rebasing, as it is dependent on another patch series [1]
v4 changes (Olivier Matz):
* remove the rte_mempool_create_ext() function. To change the handler, the
user has to do the following:
- mp = rte_mempool_create_empty()
- rte_mempool_set_handler(mp, "my_handler")
- rte_mempool_populate_default(mp)
This avoids to add another function with more than 10 arguments, duplicating
the doxygen comments
* change the api of rte_mempool_alloc_t: only the mempool pointer is required
as all information is available in it
* change the api of rte_mempool_free_t: remove return value
* move inline wrapper functions from the .c to the .h (else they won't be
inlined). This implies to have one header file (rte_mempool.h), or it
would have generate cross dependencies issues.
* remove now unused MEMPOOL_F_INT_HANDLER (note: it was misused anyway due
to the use of && instead of &)
* fix build in debug mode (__MEMPOOL_STAT_ADD(mp, put_pool, n) remaining)
* fix build with shared libraries (global handler has to be declared in
the .map file)
* rationalize #include order
* remove unused function rte_mempool_get_handler_name()
* rename some structures, fields, functions
* remove the static in front of rte_tailq_elem rte_mempool_tailq (comment
from Yuanhan)
* test the ext mempool handler in the same file than standard mempool tests,
avoiding to duplicate the code
* rework the custom handler in mempool_test
* rework a bit the patch selecting default mbuf pool handler
* fix some doxygen comments
v3 changes:
* simplified the file layout, renamed to rte_mempool_handler.[hc]
* moved the default handlers into rte_mempool_default.c
* moved the example handler out into app/test/test_ext_mempool.c
* removed is_mc/is_mp change, slight perf degredation on sp cached operation
* removed stack hanler, may re-introduce at a later date
* Changes out of code reviews
v2 changes:
* There was a lot of duplicate code between rte_mempool_xmem_create and
rte_mempool_create_ext. This has now been refactored and is now
hopefully cleaner.
* The RTE_NEXT_ABI define is now used to allow building of the library
in a format that is compatible with binaries built against previous
versions of DPDK.
* Changes out of code reviews. Hopefully I've got most of them included.
The External Mempool Manager is an extension to the mempool API that allows
users to add and use an external mempool manager, which allows external memory
subsystems such as external hardware memory management systems and software
based memory allocators to be used with DPDK.
The existing API to the internal DPDK mempool manager will remain unchanged
and will be backward compatible. However, there will be an ABI breakage, as
the mempool struct is changing. These changes are all contained withing
RTE_NEXT_ABI defs, and the current or next code can be changed with
the CONFIG_RTE_NEXT_ABI config setting
There are two aspects to external mempool manager.
1. Adding the code for your new mempool operations (ops). This is
achieved by adding a new mempool ops source file into the
librte_mempool library, and using the REGISTER_MEMPOOL_HANDLER macro.
2. Using the new API to call rte_mempool_create_empty and
rte_mempool_set_ops to create a new mempool
using the name parameter to identify which ops to use.
New API calls added
1. A new rte_mempool_create_empty() function
2. rte_mempool_set_ops_byname() which sets the mempool's ops (functions)
3. An rte_mempool_populate_default() and rte_mempool_populate_anon() functions
which populates the mempool using the relevant ops
Several external mempool managers may be used in the same application. A new
mempool can then be created by using the new 'create' function, providing the
mempool ops struct name to point the mempool to the relevant mempool manager
callback structure.
The old 'create' function can still be called by legacy programs, and will
internally work out the mempool handle based on the flags provided (single
producer, single consumer, etc). By default handles are created internally to
implement the built-in DPDK mempool manager and mempool types.
The external mempool manager needs to provide the following functions.
1. alloc - allocates the mempool memory, and adds each object onto a ring
2. put - puts an object back into the mempool once an application has
finished with it
3. get - gets an object from the mempool for use by the application
4. get_count - gets the number of available objects in the mempool
5. free - frees the mempool memory
Every time a get/put/get_count is called from the application/PMD, the
callback for that mempool is called. These functions are in the fastpath,
and any unoptimised ops may limit performance.
The new APIs are as follows:
1. rte_mempool_create_empty
struct rte_mempool *
rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
int socket_id, unsigned flags);
2. rte_mempool_set_ops_byname()
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name);
3. rte_mempool_populate_default()
int rte_mempool_populate_default(struct rte_mempool *mp);
4. rte_mempool_populate_anon()
int rte_mempool_populate_anon(struct rte_mempool *mp);
Please see rte_mempool.h for further information on the parameters.
The important thing to note is that the mempool ops struct is passed by name
to rte_mempool_set_ops_byname, which looks through the ops struct array to
get the ops_index, which is then stored in the rte_memool structure. This
allow multiple processes to use the same mempool, as the function pointers
are accessed via ops index.
The mempool ops structure contains callbacks to the implementation of
the ops function, and is set up for registration as follows:
static const struct rte_mempool_ops ops_sp_mc = {
.name = "ring_sp_mc",
.alloc = rte_mempool_common_ring_alloc,
.put = common_ring_sp_put,
.get = common_ring_mc_get,
.get_count = common_ring_get_count,
.free = common_ring_free,
};
And then the following macro will register the ops in the array of ops
structures
REGISTER_MEMPOOL_OPS(ops_mp_mc);
For an example of API usage, please see app/test/test_mempool.c, which
implements a rudimentary "custom_handler" mempool manager using simple mallocs
for each mempool object. This file also contains the callbacks and self
registration for the new handler.
David Hunt (2):
mempool: support external mempool operations
mbuf: make default mempool ops configurable at build
Olivier Matz (1):
app/test: test external mempool manager
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 12:53 0% ` Panu Matilainen
@ 2016-06-03 14:31 0% ` Arnon Warshavsky
0 siblings, 1 reply; 200+ results
From: Arnon Warshavsky @ 2016-06-03 14:31 UTC (permalink / raw)
To: Panu Matilainen
Cc: Neil Horman, Bruce Richardson, Wiles, Keith, Thomas Monjalon,
Yuanhan Liu, dev, Tan, Jianfeng, Stephen Hemminger,
Christian Ehrhardt, Olivier Matz
On Fri, Jun 3, 2016 at 3:53 PM, Panu Matilainen <pmatilai@redhat.com> wrote:
> On 06/03/2016 03:01 PM, Arnon Warshavsky wrote:
>
>>
>>
>> On Fri, Jun 3, 2016 at 2:50 PM, Neil Horman <nhorman@tuxdriver.com
>> <mailto:nhorman@tuxdriver.com>> wrote:
>>
>> On Fri, Jun 03, 2016 at 12:01:30PM +0100, Bruce Richardson wrote:
>> > On Fri, Jun 03, 2016 at 11:29:43AM +0100, Bruce Richardson wrote:
>> > > On Thu, Jun 02, 2016 at 04:08:37PM -0400, Neil Horman wrote:
>> > > > On Thu, Jun 02, 2016 at 07:41:10PM +0000, Wiles, Keith wrote:
>> > > > >
>> > > > > On 6/2/16, 12:11 PM, "Neil Horman" <nhorman@tuxdriver.com
>> <mailto:nhorman@tuxdriver.com>> wrote:
>> > > > >
>> > > > > >
>> > > > > >1) The definition of a config structure that can be passed
>> to rte_eal_init,
>> > > > > >defining the configuration for that running process
>> > > > >
>> > > > > Having a configuration structure means we have to have an
>> ABI change to that structure anytime we add or remove an option. I
>> was thinking a very simple DB of some kind would be better. Have the
>> code query the DB to obtain the needed information. The APIs used to
>> query and set the DB needs to be very easy to use as well.
>> > > >
>> > > > Thats a fair point. A decent starting point is likely a
>> simple struct that
>> > > > looks like this:
>> > > >
>> > > > struct key_vals {
>> > > > char *key;
>> > > > union {
>> > > > ulong longval;
>> > > > void *ptrval;
>> > > > } value;
>> > > > };
>> > > >
>> > > > struct config {
>> > > > size_t count;
>> > > > struct key_vals kvp[0];
>> > > > };
>> > > >
>> > > > >
>> > > > > Maybe each option can define its own structure if needed or
>> just a simple variable type can be used for the basic types (int,
>> string, bool, …)
>> > > > >
>> > > > Well, if you have config sections that require mulitiple
>> elements, I'd handle
>> > > > that with naming, i.e. if you have a config group that has an
>> int and char
>> > > > value, I'd name them "group.intval", and "group.charval", so
>> they are
>> > > > independently searchable, but linked from a nomenclature
>> standpoint.
>> > > >
>> > > > > Would this work better in the long run, does a fixed
>> structure still make sense?
>> > > > >
>> > > > No. I think you're ABI concerns are valid, but the above is
>> likely a good
>> > > > starting point to address them.
>> > > >
>> > > > Best
>> > > > Neil
>> > >
>> > > I'll throw out one implementation idea here that I looked at
>> previously, for
>> > > the reason that it was simple enough implement with existing code.
>> > >
>> > > We already have the cfgfile library which works with name/value
>> pairs read from
>> > > ini files on disk. However, it would be easy enough to add
>> couple of APIs to
>> > > that to allow the user to "set" values inside an ini structure
>> as well. With
>> > > that done we can then just add a new eal_init api which takes a
>> single
>> > > "struct rte_cfgfile *" as parameter. For those apps that want to
>> just use
>> > > inifiles for configuration straight, they can then do:
>> > >
>> > > cfg = rte_cfgfile_load("my_cfg_file");
>> > > rte_eal_newinit(cfg);
>> > >
>> > > Those who want a different config can instead do:
>> > >
>> > > cfg = rte_cfgfile_new();
>> > > rte_cfgfile_add_section(cfg, "dpdk");
>> > > foreach_eal_setting_wanted:
>> > > rte_cfgfile_set(cfg, "dpdk", mysetting, myvalue);
>> > > rte_eal_newinit(cfg);
>> > >
>> > From chatting to a couple of other DPDK dev's here I suspect I may
>> not have
>> > been entirely clear here with this example. What is being shown
>> above is building
>> > up a "config-file" in memory - or rather a config structure which
>> happens to
>> > have the idea of sections and values as an ini file has. There is
>> no actual
>> > file ever being written to disk, and for those using any non-ini
>> config file
>> > structure for their app, the code overhead of using the APIs above
>> should be
>> > pretty much the same as building up any other set of key-value
>> pairs in
>> > memory to pass to an init function.
>> >
>> > Hope this is a little clearer now.
>> >
>> I'm fine with the idea of reusing the config file library that
>> currently exists,
>> or more to the point, modifying it to be usable as a configuration
>> API, rather
>> than a configuration file parser. My primary interest is in
>> separating the user
>> configuration mechanism from the internal library configuration lookup
>> mechanism. What I would really like to be able to see is
>> application developers
>> have the flexibiilty to choose their own configuration method and
>> format, and
>> programatically build a configuration for the dpdk on a per-instance
>> basis prior
>> to calling rte_eal_init
>>
>> It seems like this approach satisfies that requirement
>> Neil
>>
>>
>> If the there is no configuration structure , rather an opaque
>> configuration key/value store ,
>> the user applications can set and get knobs that are not seen by anyone
>> (library) that does not know them by name
>>
>> e.g
>>
>> int num_nodes = getCfgInt ( cfgObject , "eal" , "num_numa_nodes");
>> int delay = getCfgInt ( cfgObject , "drivers.ixgbe" , "some_delay");
>> setCfgInt (cfgObject , "my_app" , "num_days" , 7);
>> setCfgString (cfgObject , "my_app" , "best_day" , "Wednesday");
>>
>
> I dont see why it would not be possible to have the libraries export their
> known config keys in one way or the other. Or more.
>
> One aspect is runtime queries which would need an API of some kind. Being
> able to query default values should work for that purpose and be handy for
> various other uses as well.
>
> Another one is build-time sanity checking which could be doen by
> auto-generating header(s) from the library known keys, eg
>
> #define CFG_NUM_NUMA_NODES "num_numa_nodes"
>
> so if you use the macro instead of the actual string, you'll get a
> compiler error in case of unknown key instead of runtime misbehavior in
> case of typoed values etc. Whether that's worth it is an entirely different
> question.
>
> - Panu -
Thanks Panu .
I was not clear here.
Naturally libraries are better off accessed using well known macro keys.
The other way around that does not require the library to know the keys of
the applications.
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 12:01 0% ` Arnon Warshavsky
@ 2016-06-03 12:53 0% ` Panu Matilainen
2016-06-03 14:31 0% ` Arnon Warshavsky
0 siblings, 1 reply; 200+ results
From: Panu Matilainen @ 2016-06-03 12:53 UTC (permalink / raw)
To: Arnon Warshavsky, Neil Horman
Cc: Bruce Richardson, Wiles, Keith, Thomas Monjalon, Yuanhan Liu,
dev, Tan, Jianfeng, Stephen Hemminger, Christian Ehrhardt,
Olivier Matz
On 06/03/2016 03:01 PM, Arnon Warshavsky wrote:
>
>
> On Fri, Jun 3, 2016 at 2:50 PM, Neil Horman <nhorman@tuxdriver.com
> <mailto:nhorman@tuxdriver.com>> wrote:
>
> On Fri, Jun 03, 2016 at 12:01:30PM +0100, Bruce Richardson wrote:
> > On Fri, Jun 03, 2016 at 11:29:43AM +0100, Bruce Richardson wrote:
> > > On Thu, Jun 02, 2016 at 04:08:37PM -0400, Neil Horman wrote:
> > > > On Thu, Jun 02, 2016 at 07:41:10PM +0000, Wiles, Keith wrote:
> > > > >
> > > > > On 6/2/16, 12:11 PM, "Neil Horman" <nhorman@tuxdriver.com
> <mailto:nhorman@tuxdriver.com>> wrote:
> > > > >
> > > > > >
> > > > > >1) The definition of a config structure that can be passed
> to rte_eal_init,
> > > > > >defining the configuration for that running process
> > > > >
> > > > > Having a configuration structure means we have to have an
> ABI change to that structure anytime we add or remove an option. I
> was thinking a very simple DB of some kind would be better. Have the
> code query the DB to obtain the needed information. The APIs used to
> query and set the DB needs to be very easy to use as well.
> > > >
> > > > Thats a fair point. A decent starting point is likely a
> simple struct that
> > > > looks like this:
> > > >
> > > > struct key_vals {
> > > > char *key;
> > > > union {
> > > > ulong longval;
> > > > void *ptrval;
> > > > } value;
> > > > };
> > > >
> > > > struct config {
> > > > size_t count;
> > > > struct key_vals kvp[0];
> > > > };
> > > >
> > > > >
> > > > > Maybe each option can define its own structure if needed or
> just a simple variable type can be used for the basic types (int,
> string, bool, …)
> > > > >
> > > > Well, if you have config sections that require mulitiple
> elements, I'd handle
> > > > that with naming, i.e. if you have a config group that has an
> int and char
> > > > value, I'd name them "group.intval", and "group.charval", so
> they are
> > > > independently searchable, but linked from a nomenclature
> standpoint.
> > > >
> > > > > Would this work better in the long run, does a fixed
> structure still make sense?
> > > > >
> > > > No. I think you're ABI concerns are valid, but the above is
> likely a good
> > > > starting point to address them.
> > > >
> > > > Best
> > > > Neil
> > >
> > > I'll throw out one implementation idea here that I looked at
> previously, for
> > > the reason that it was simple enough implement with existing code.
> > >
> > > We already have the cfgfile library which works with name/value
> pairs read from
> > > ini files on disk. However, it would be easy enough to add
> couple of APIs to
> > > that to allow the user to "set" values inside an ini structure
> as well. With
> > > that done we can then just add a new eal_init api which takes a
> single
> > > "struct rte_cfgfile *" as parameter. For those apps that want to
> just use
> > > inifiles for configuration straight, they can then do:
> > >
> > > cfg = rte_cfgfile_load("my_cfg_file");
> > > rte_eal_newinit(cfg);
> > >
> > > Those who want a different config can instead do:
> > >
> > > cfg = rte_cfgfile_new();
> > > rte_cfgfile_add_section(cfg, "dpdk");
> > > foreach_eal_setting_wanted:
> > > rte_cfgfile_set(cfg, "dpdk", mysetting, myvalue);
> > > rte_eal_newinit(cfg);
> > >
> > From chatting to a couple of other DPDK dev's here I suspect I may
> not have
> > been entirely clear here with this example. What is being shown
> above is building
> > up a "config-file" in memory - or rather a config structure which
> happens to
> > have the idea of sections and values as an ini file has. There is
> no actual
> > file ever being written to disk, and for those using any non-ini
> config file
> > structure for their app, the code overhead of using the APIs above
> should be
> > pretty much the same as building up any other set of key-value
> pairs in
> > memory to pass to an init function.
> >
> > Hope this is a little clearer now.
> >
> I'm fine with the idea of reusing the config file library that
> currently exists,
> or more to the point, modifying it to be usable as a configuration
> API, rather
> than a configuration file parser. My primary interest is in
> separating the user
> configuration mechanism from the internal library configuration lookup
> mechanism. What I would really like to be able to see is
> application developers
> have the flexibiilty to choose their own configuration method and
> format, and
> programatically build a configuration for the dpdk on a per-instance
> basis prior
> to calling rte_eal_init
>
> It seems like this approach satisfies that requirement
> Neil
>
>
> If the there is no configuration structure , rather an opaque
> configuration key/value store ,
> the user applications can set and get knobs that are not seen by anyone
> (library) that does not know them by name
>
> e.g
>
> int num_nodes = getCfgInt ( cfgObject , "eal" , "num_numa_nodes");
> int delay = getCfgInt ( cfgObject , "drivers.ixgbe" , "some_delay");
> setCfgInt (cfgObject , "my_app" , "num_days" , 7);
> setCfgString (cfgObject , "my_app" , "best_day" , "Wednesday");
I dont see why it would not be possible to have the libraries export
their known config keys in one way or the other. Or more.
One aspect is runtime queries which would need an API of some kind.
Being able to query default values should work for that purpose and be
handy for various other uses as well.
Another one is build-time sanity checking which could be doen by
auto-generating header(s) from the library known keys, eg
#define CFG_NUM_NUMA_NODES "num_numa_nodes"
so if you use the macro instead of the actual string, you'll get a
compiler error in case of unknown key instead of runtime misbehavior in
case of typoed values etc. Whether that's worth it is an entirely
different question.
- Panu -
> /Arnon
>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 11:50 0% ` Neil Horman
2016-06-03 12:01 0% ` Arnon Warshavsky
@ 2016-06-03 12:14 0% ` Panu Matilainen
1 sibling, 0 replies; 200+ results
From: Panu Matilainen @ 2016-06-03 12:14 UTC (permalink / raw)
To: Neil Horman, Bruce Richardson
Cc: Wiles, Keith, Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Olivier Matz
On 06/03/2016 02:50 PM, Neil Horman wrote:
> On Fri, Jun 03, 2016 at 12:01:30PM +0100, Bruce Richardson wrote:
>> On Fri, Jun 03, 2016 at 11:29:43AM +0100, Bruce Richardson wrote:
>>> On Thu, Jun 02, 2016 at 04:08:37PM -0400, Neil Horman wrote:
>>>> On Thu, Jun 02, 2016 at 07:41:10PM +0000, Wiles, Keith wrote:
>>>>>
>>>>> On 6/2/16, 12:11 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>>>>>
>>>>>>
>>>>>> 1) The definition of a config structure that can be passed to rte_eal_init,
>>>>>> defining the configuration for that running process
>>>>>
>>>>> Having a configuration structure means we have to have an ABI change to that structure anytime we add or remove an option. I was thinking a very simple DB of some kind would be better. Have the code query the DB to obtain the needed information. The APIs used to query and set the DB needs to be very easy to use as well.
>>>>
>>>> Thats a fair point. A decent starting point is likely a simple struct that
>>>> looks like this:
>>>>
>>>> struct key_vals {
>>>> char *key;
>>>> union {
>>>> ulong longval;
>>>> void *ptrval;
>>>> } value;
>>>> };
>>>>
>>>> struct config {
>>>> size_t count;
>>>> struct key_vals kvp[0];
>>>> };
>>>>
>>>>>
>>>>> Maybe each option can define its own structure if needed or just a simple variable type can be used for the basic types (int, string, bool, …)
>>>>>
>>>> Well, if you have config sections that require mulitiple elements, I'd handle
>>>> that with naming, i.e. if you have a config group that has an int and char
>>>> value, I'd name them "group.intval", and "group.charval", so they are
>>>> independently searchable, but linked from a nomenclature standpoint.
>>>>
>>>>> Would this work better in the long run, does a fixed structure still make sense?
>>>>>
>>>> No. I think you're ABI concerns are valid, but the above is likely a good
>>>> starting point to address them.
>>>>
>>>> Best
>>>> Neil
>>>
>>> I'll throw out one implementation idea here that I looked at previously, for
>>> the reason that it was simple enough implement with existing code.
>>>
>>> We already have the cfgfile library which works with name/value pairs read from
>>> ini files on disk. However, it would be easy enough to add couple of APIs to
>>> that to allow the user to "set" values inside an ini structure as well. With
>>> that done we can then just add a new eal_init api which takes a single
>>> "struct rte_cfgfile *" as parameter. For those apps that want to just use
>>> inifiles for configuration straight, they can then do:
>>>
>>> cfg = rte_cfgfile_load("my_cfg_file");
>>> rte_eal_newinit(cfg);
>>>
>>> Those who want a different config can instead do:
>>>
>>> cfg = rte_cfgfile_new();
>>> rte_cfgfile_add_section(cfg, "dpdk");
>>> foreach_eal_setting_wanted:
>>> rte_cfgfile_set(cfg, "dpdk", mysetting, myvalue);
>>> rte_eal_newinit(cfg);
>>>
>> From chatting to a couple of other DPDK dev's here I suspect I may not have
>> been entirely clear here with this example. What is being shown above is building
>> up a "config-file" in memory - or rather a config structure which happens to
>> have the idea of sections and values as an ini file has. There is no actual
>> file ever being written to disk, and for those using any non-ini config file
>> structure for their app, the code overhead of using the APIs above should be
>> pretty much the same as building up any other set of key-value pairs in
>> memory to pass to an init function.
/me nods.
This is pretty much exactly what I suggested (only in much less detail)
last year :) http://dpdk.org/ml/archives/dev/2015-October/024803.html
>> Hope this is a little clearer now.
> I'm fine with the idea of reusing the config file library that currently exists,
> or more to the point, modifying it to be usable as a configuration API, rather
> than a configuration file parser. My primary interest is in separating the user
> configuration mechanism from the internal library configuration lookup
> mechanism. What I would really like to be able to see is application developers
> have the flexibiilty to choose their own configuration method and format, and
> programatically build a configuration for the dpdk on a per-instance basis prior
> to calling rte_eal_init
>
> It seems like this approach satisfies that requirement
/me nods some more.
What the key-value config also can buy us is a direct mapping to cli
options (which is something Keith has been looking into IIRC), at which
point I think all the bases are quite nicely covered.
- Panu -
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 11:50 0% ` Neil Horman
@ 2016-06-03 12:01 0% ` Arnon Warshavsky
2016-06-03 12:53 0% ` Panu Matilainen
2016-06-03 12:14 0% ` Panu Matilainen
1 sibling, 1 reply; 200+ results
From: Arnon Warshavsky @ 2016-06-03 12:01 UTC (permalink / raw)
To: Neil Horman
Cc: Bruce Richardson, Wiles, Keith, Thomas Monjalon, Yuanhan Liu,
dev, Tan, Jianfeng, Stephen Hemminger, Christian Ehrhardt,
Panu Matilainen, Olivier Matz
On Fri, Jun 3, 2016 at 2:50 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
> On Fri, Jun 03, 2016 at 12:01:30PM +0100, Bruce Richardson wrote:
> > On Fri, Jun 03, 2016 at 11:29:43AM +0100, Bruce Richardson wrote:
> > > On Thu, Jun 02, 2016 at 04:08:37PM -0400, Neil Horman wrote:
> > > > On Thu, Jun 02, 2016 at 07:41:10PM +0000, Wiles, Keith wrote:
> > > > >
> > > > > On 6/2/16, 12:11 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> > > > >
> > > > > >
> > > > > >1) The definition of a config structure that can be passed to
> rte_eal_init,
> > > > > >defining the configuration for that running process
> > > > >
> > > > > Having a configuration structure means we have to have an ABI
> change to that structure anytime we add or remove an option. I was thinking
> a very simple DB of some kind would be better. Have the code query the DB
> to obtain the needed information. The APIs used to query and set the DB
> needs to be very easy to use as well.
> > > >
> > > > Thats a fair point. A decent starting point is likely a simple
> struct that
> > > > looks like this:
> > > >
> > > > struct key_vals {
> > > > char *key;
> > > > union {
> > > > ulong longval;
> > > > void *ptrval;
> > > > } value;
> > > > };
> > > >
> > > > struct config {
> > > > size_t count;
> > > > struct key_vals kvp[0];
> > > > };
> > > >
> > > > >
> > > > > Maybe each option can define its own structure if needed or just a
> simple variable type can be used for the basic types (int, string, bool, …)
> > > > >
> > > > Well, if you have config sections that require mulitiple elements,
> I'd handle
> > > > that with naming, i.e. if you have a config group that has an int
> and char
> > > > value, I'd name them "group.intval", and "group.charval", so they are
> > > > independently searchable, but linked from a nomenclature standpoint.
> > > >
> > > > > Would this work better in the long run, does a fixed structure
> still make sense?
> > > > >
> > > > No. I think you're ABI concerns are valid, but the above is likely a
> good
> > > > starting point to address them.
> > > >
> > > > Best
> > > > Neil
> > >
> > > I'll throw out one implementation idea here that I looked at
> previously, for
> > > the reason that it was simple enough implement with existing code.
> > >
> > > We already have the cfgfile library which works with name/value pairs
> read from
> > > ini files on disk. However, it would be easy enough to add couple of
> APIs to
> > > that to allow the user to "set" values inside an ini structure as
> well. With
> > > that done we can then just add a new eal_init api which takes a single
> > > "struct rte_cfgfile *" as parameter. For those apps that want to just
> use
> > > inifiles for configuration straight, they can then do:
> > >
> > > cfg = rte_cfgfile_load("my_cfg_file");
> > > rte_eal_newinit(cfg);
> > >
> > > Those who want a different config can instead do:
> > >
> > > cfg = rte_cfgfile_new();
> > > rte_cfgfile_add_section(cfg, "dpdk");
> > > foreach_eal_setting_wanted:
> > > rte_cfgfile_set(cfg, "dpdk", mysetting, myvalue);
> > > rte_eal_newinit(cfg);
> > >
> > From chatting to a couple of other DPDK dev's here I suspect I may not
> have
> > been entirely clear here with this example. What is being shown above is
> building
> > up a "config-file" in memory - or rather a config structure which
> happens to
> > have the idea of sections and values as an ini file has. There is no
> actual
> > file ever being written to disk, and for those using any non-ini config
> file
> > structure for their app, the code overhead of using the APIs above
> should be
> > pretty much the same as building up any other set of key-value pairs in
> > memory to pass to an init function.
> >
> > Hope this is a little clearer now.
> >
> I'm fine with the idea of reusing the config file library that currently
> exists,
> or more to the point, modifying it to be usable as a configuration API,
> rather
> than a configuration file parser. My primary interest is in separating
> the user
> configuration mechanism from the internal library configuration lookup
> mechanism. What I would really like to be able to see is application
> developers
> have the flexibiilty to choose their own configuration method and format,
> and
> programatically build a configuration for the dpdk on a per-instance basis
> prior
> to calling rte_eal_init
>
> It seems like this approach satisfies that requirement
> Neil
>
>
If the there is no configuration structure , rather an opaque configuration
key/value store ,
the user applications can set and get knobs that are not seen by anyone
(library) that does not know them by name
e.g
int num_nodes = getCfgInt ( cfgObject , "eal" , "num_numa_nodes");
int delay = getCfgInt ( cfgObject , "drivers.ixgbe" , "some_delay");
setCfgInt (cfgObject , "my_app" , "num_days" , 7);
setCfgString (cfgObject , "my_app" , "best_day" , "Wednesday");
/Arnon
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 11:01 0% ` Bruce Richardson
@ 2016-06-03 11:50 0% ` Neil Horman
2016-06-03 12:01 0% ` Arnon Warshavsky
2016-06-03 12:14 0% ` Panu Matilainen
0 siblings, 2 replies; 200+ results
From: Neil Horman @ 2016-06-03 11:50 UTC (permalink / raw)
To: Bruce Richardson
Cc: Wiles, Keith, Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Panu Matilainen,
Olivier Matz
On Fri, Jun 03, 2016 at 12:01:30PM +0100, Bruce Richardson wrote:
> On Fri, Jun 03, 2016 at 11:29:43AM +0100, Bruce Richardson wrote:
> > On Thu, Jun 02, 2016 at 04:08:37PM -0400, Neil Horman wrote:
> > > On Thu, Jun 02, 2016 at 07:41:10PM +0000, Wiles, Keith wrote:
> > > >
> > > > On 6/2/16, 12:11 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> > > >
> > > > >
> > > > >1) The definition of a config structure that can be passed to rte_eal_init,
> > > > >defining the configuration for that running process
> > > >
> > > > Having a configuration structure means we have to have an ABI change to that structure anytime we add or remove an option. I was thinking a very simple DB of some kind would be better. Have the code query the DB to obtain the needed information. The APIs used to query and set the DB needs to be very easy to use as well.
> > >
> > > Thats a fair point. A decent starting point is likely a simple struct that
> > > looks like this:
> > >
> > > struct key_vals {
> > > char *key;
> > > union {
> > > ulong longval;
> > > void *ptrval;
> > > } value;
> > > };
> > >
> > > struct config {
> > > size_t count;
> > > struct key_vals kvp[0];
> > > };
> > >
> > > >
> > > > Maybe each option can define its own structure if needed or just a simple variable type can be used for the basic types (int, string, bool, …)
> > > >
> > > Well, if you have config sections that require mulitiple elements, I'd handle
> > > that with naming, i.e. if you have a config group that has an int and char
> > > value, I'd name them "group.intval", and "group.charval", so they are
> > > independently searchable, but linked from a nomenclature standpoint.
> > >
> > > > Would this work better in the long run, does a fixed structure still make sense?
> > > >
> > > No. I think you're ABI concerns are valid, but the above is likely a good
> > > starting point to address them.
> > >
> > > Best
> > > Neil
> >
> > I'll throw out one implementation idea here that I looked at previously, for
> > the reason that it was simple enough implement with existing code.
> >
> > We already have the cfgfile library which works with name/value pairs read from
> > ini files on disk. However, it would be easy enough to add couple of APIs to
> > that to allow the user to "set" values inside an ini structure as well. With
> > that done we can then just add a new eal_init api which takes a single
> > "struct rte_cfgfile *" as parameter. For those apps that want to just use
> > inifiles for configuration straight, they can then do:
> >
> > cfg = rte_cfgfile_load("my_cfg_file");
> > rte_eal_newinit(cfg);
> >
> > Those who want a different config can instead do:
> >
> > cfg = rte_cfgfile_new();
> > rte_cfgfile_add_section(cfg, "dpdk");
> > foreach_eal_setting_wanted:
> > rte_cfgfile_set(cfg, "dpdk", mysetting, myvalue);
> > rte_eal_newinit(cfg);
> >
> From chatting to a couple of other DPDK dev's here I suspect I may not have
> been entirely clear here with this example. What is being shown above is building
> up a "config-file" in memory - or rather a config structure which happens to
> have the idea of sections and values as an ini file has. There is no actual
> file ever being written to disk, and for those using any non-ini config file
> structure for their app, the code overhead of using the APIs above should be
> pretty much the same as building up any other set of key-value pairs in
> memory to pass to an init function.
>
> Hope this is a little clearer now.
>
I'm fine with the idea of reusing the config file library that currently exists,
or more to the point, modifying it to be usable as a configuration API, rather
than a configuration file parser. My primary interest is in separating the user
configuration mechanism from the internal library configuration lookup
mechanism. What I would really like to be able to see is application developers
have the flexibiilty to choose their own configuration method and format, and
programatically build a configuration for the dpdk on a per-instance basis prior
to calling rte_eal_init
It seems like this approach satisfies that requirement
Neil
> /Bruce
>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-03 10:29 0% ` Bruce Richardson
@ 2016-06-03 11:01 0% ` Bruce Richardson
2016-06-03 11:50 0% ` Neil Horman
0 siblings, 1 reply; 200+ results
From: Bruce Richardson @ 2016-06-03 11:01 UTC (permalink / raw)
To: Neil Horman
Cc: Wiles, Keith, Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Panu Matilainen,
Olivier Matz
On Fri, Jun 03, 2016 at 11:29:43AM +0100, Bruce Richardson wrote:
> On Thu, Jun 02, 2016 at 04:08:37PM -0400, Neil Horman wrote:
> > On Thu, Jun 02, 2016 at 07:41:10PM +0000, Wiles, Keith wrote:
> > >
> > > On 6/2/16, 12:11 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> > >
> > > >
> > > >1) The definition of a config structure that can be passed to rte_eal_init,
> > > >defining the configuration for that running process
> > >
> > > Having a configuration structure means we have to have an ABI change to that structure anytime we add or remove an option. I was thinking a very simple DB of some kind would be better. Have the code query the DB to obtain the needed information. The APIs used to query and set the DB needs to be very easy to use as well.
> >
> > Thats a fair point. A decent starting point is likely a simple struct that
> > looks like this:
> >
> > struct key_vals {
> > char *key;
> > union {
> > ulong longval;
> > void *ptrval;
> > } value;
> > };
> >
> > struct config {
> > size_t count;
> > struct key_vals kvp[0];
> > };
> >
> > >
> > > Maybe each option can define its own structure if needed or just a simple variable type can be used for the basic types (int, string, bool, …)
> > >
> > Well, if you have config sections that require mulitiple elements, I'd handle
> > that with naming, i.e. if you have a config group that has an int and char
> > value, I'd name them "group.intval", and "group.charval", so they are
> > independently searchable, but linked from a nomenclature standpoint.
> >
> > > Would this work better in the long run, does a fixed structure still make sense?
> > >
> > No. I think you're ABI concerns are valid, but the above is likely a good
> > starting point to address them.
> >
> > Best
> > Neil
>
> I'll throw out one implementation idea here that I looked at previously, for
> the reason that it was simple enough implement with existing code.
>
> We already have the cfgfile library which works with name/value pairs read from
> ini files on disk. However, it would be easy enough to add couple of APIs to
> that to allow the user to "set" values inside an ini structure as well. With
> that done we can then just add a new eal_init api which takes a single
> "struct rte_cfgfile *" as parameter. For those apps that want to just use
> inifiles for configuration straight, they can then do:
>
> cfg = rte_cfgfile_load("my_cfg_file");
> rte_eal_newinit(cfg);
>
> Those who want a different config can instead do:
>
> cfg = rte_cfgfile_new();
> rte_cfgfile_add_section(cfg, "dpdk");
> foreach_eal_setting_wanted:
> rte_cfgfile_set(cfg, "dpdk", mysetting, myvalue);
> rte_eal_newinit(cfg);
>
>From chatting to a couple of other DPDK dev's here I suspect I may not have
been entirely clear here with this example. What is being shown above is building
up a "config-file" in memory - or rather a config structure which happens to
have the idea of sections and values as an ini file has. There is no actual
file ever being written to disk, and for those using any non-ini config file
structure for their app, the code overhead of using the APIs above should be
pretty much the same as building up any other set of key-value pairs in
memory to pass to an init function.
Hope this is a little clearer now.
/Bruce
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-02 20:08 3% ` Neil Horman
@ 2016-06-03 10:29 0% ` Bruce Richardson
2016-06-03 11:01 0% ` Bruce Richardson
1 sibling, 1 reply; 200+ results
From: Bruce Richardson @ 2016-06-03 10:29 UTC (permalink / raw)
To: Neil Horman
Cc: Wiles, Keith, Thomas Monjalon, Yuanhan Liu, dev, Tan, Jianfeng,
Stephen Hemminger, Christian Ehrhardt, Panu Matilainen,
Olivier Matz
On Thu, Jun 02, 2016 at 04:08:37PM -0400, Neil Horman wrote:
> On Thu, Jun 02, 2016 at 07:41:10PM +0000, Wiles, Keith wrote:
> >
> > On 6/2/16, 12:11 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> >
> > >
> > >1) The definition of a config structure that can be passed to rte_eal_init,
> > >defining the configuration for that running process
> >
> > Having a configuration structure means we have to have an ABI change to that structure anytime we add or remove an option. I was thinking a very simple DB of some kind would be better. Have the code query the DB to obtain the needed information. The APIs used to query and set the DB needs to be very easy to use as well.
>
> Thats a fair point. A decent starting point is likely a simple struct that
> looks like this:
>
> struct key_vals {
> char *key;
> union {
> ulong longval;
> void *ptrval;
> } value;
> };
>
> struct config {
> size_t count;
> struct key_vals kvp[0];
> };
>
> >
> > Maybe each option can define its own structure if needed or just a simple variable type can be used for the basic types (int, string, bool, …)
> >
> Well, if you have config sections that require mulitiple elements, I'd handle
> that with naming, i.e. if you have a config group that has an int and char
> value, I'd name them "group.intval", and "group.charval", so they are
> independently searchable, but linked from a nomenclature standpoint.
>
> > Would this work better in the long run, does a fixed structure still make sense?
> >
> No. I think you're ABI concerns are valid, but the above is likely a good
> starting point to address them.
>
> Best
> Neil
I'll throw out one implementation idea here that I looked at previously, for
the reason that it was simple enough implement with existing code.
We already have the cfgfile library which works with name/value pairs read from
ini files on disk. However, it would be easy enough to add couple of APIs to
that to allow the user to "set" values inside an ini structure as well. With
that done we can then just add a new eal_init api which takes a single
"struct rte_cfgfile *" as parameter. For those apps that want to just use
inifiles for configuration straight, they can then do:
cfg = rte_cfgfile_load("my_cfg_file");
rte_eal_newinit(cfg);
Those who want a different config can instead do:
cfg = rte_cfgfile_new();
rte_cfgfile_add_section(cfg, "dpdk");
foreach_eal_setting_wanted:
rte_cfgfile_set(cfg, "dpdk", mysetting, myvalue);
rte_eal_newinit(cfg);
We can standardize on a sectionname, or a couple of standard section names that
are used by DPDK, so that the rest of the config file can contain other data
for the app itself.
What do people think. I mainly like it because it gives us good reuse of what
is already there, and enhances our existing library. As well as this it makes
it trivially easy for apps to use ini files - which seem to be very popular here
- while still giving flexibility for others to use whatever other config format
their app prefers.
/Bruce
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
@ 2016-06-02 22:34 3% ` Neil Horman
0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2016-06-02 22:34 UTC (permalink / raw)
To: Matthew Hall
Cc: Wiles, Keith, Thomas Monjalon, Yuanhan Liu, dev, Richardson,
Bruce, Tan, Jianfeng, Stephen Hemminger, Christian Ehrhardt,
Panu Matilainen, Olivier Matz
On Thu, Jun 02, 2016 at 01:53:55PM -0700, Matthew Hall wrote:
> On Thu, Jun 02, 2016 at 04:08:37PM -0400, Neil Horman wrote:
> > struct key_vals {
> > char *key;
> > union {
> > ulong longval;
> > void *ptrval;
> > } value;
> > };
> >
> > struct config {
> > size_t count;
> > struct key_vals kvp[0];
> > };
>
> This sort of code is very 1970s / ioctl / messy binary. And doesn't buy any
> performance advantage because it's just for config.
>
What!? I can't even parse that sentence. Of course its just for config, we're
talking about a configuration structure. If you want to make it more
complex/heirarchical/whatever, fine, propose a way to do that that isnt ABI
variant in response to config additions. Its just a starting point.
> Something that looks more like sysctl MIBs with hierarchical names or like
> JSON w/ a hierarchy of hash tables and arrays is much less user-hostile.
>
> https://www.freebsd.org/cgi/man.cgi?sysctl(3)
>
I can't even begin to understand what you're after here. sysctl provides a
heirarchy in _exactly_ the same way that I just proposed, by texual consistency
in naming.
> http://json-c.github.io/json-c/json-c-0.12/doc/html/json__object_8h.html
>
So, this is a fine interface to convert text config to a code format, but thats
a decision that application should be making, not something dpdk should mandate
Neil
> Matthew.
>
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
2016-06-02 19:41 3% ` Wiles, Keith
@ 2016-06-02 20:08 3% ` Neil Horman
2016-06-03 10:29 0% ` Bruce Richardson
0 siblings, 2 replies; 200+ results
From: Neil Horman @ 2016-06-02 20:08 UTC (permalink / raw)
To: Wiles, Keith
Cc: Thomas Monjalon, Yuanhan Liu, dev, Richardson, Bruce, Tan,
Jianfeng, Stephen Hemminger, Christian Ehrhardt, Panu Matilainen,
Olivier Matz
On Thu, Jun 02, 2016 at 07:41:10PM +0000, Wiles, Keith wrote:
>
> On 6/2/16, 12:11 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>
> >
> >1) The definition of a config structure that can be passed to rte_eal_init,
> >defining the configuration for that running process
>
> Having a configuration structure means we have to have an ABI change to that structure anytime we add or remove an option. I was thinking a very simple DB of some kind would be better. Have the code query the DB to obtain the needed information. The APIs used to query and set the DB needs to be very easy to use as well.
Thats a fair point. A decent starting point is likely a simple struct that
looks like this:
struct key_vals {
char *key;
union {
ulong longval;
void *ptrval;
} value;
};
struct config {
size_t count;
struct key_vals kvp[0];
};
>
> Maybe each option can define its own structure if needed or just a simple variable type can be used for the basic types (int, string, bool, …)
>
Well, if you have config sections that require mulitiple elements, I'd handle
that with naming, i.e. if you have a config group that has an int and char
value, I'd name them "group.intval", and "group.charval", so they are
independently searchable, but linked from a nomenclature standpoint.
> Would this work better in the long run, does a fixed structure still make sense?
>
No. I think you're ABI concerns are valid, but the above is likely a good
starting point to address them.
Best
Neil
> >
> >2) The creation and use of an API that various DPDK libraries can use to
> >retrieve that structure (or elements thereof), based on some explicit or imlicit
> >id, so that the configuration can be used (I'm thinking here specifically of
> >multiple dpdk applications using a dpdk shared library)
> >
> >3) The removal of the eal_parse_args code from the core dpdk library entirely,
> >packaging it instead as its own library that interprets command line arguments
> >as currently defined, and populates an instance of the structure defined in (1)
> >
> >4) Altering the Makefiles, so that the example apps link against the new library
> >in (3), altering the app source code to work with the config structure defined
> >in (1)
> >
> >With those steps, I think we will remove the command line bits from the dpdk
> >core, and do so without altering the user experience for any of the sample apps
> >(which will demonstrate to other developers that the same can be done with their
> >applications). From there we will be free to create alternate methods of
> >populating the config struct defined in (1) (via JSON file, YAML, XML, or
> >whatever).
> >
> >Neil
> >
> >> >>
> >> >> For the purposes of the example apps, it would seem that either JSON, YAML, or
> >> >> the above Lua format would work just fine.
> >> >
> >> >+1
> >> >
> >>
> >> Regards,
> >> ++Keith
> >>
> >>
> >
>
>
>
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [RFC] Yet another option for DPDK options
@ 2016-06-02 19:41 3% ` Wiles, Keith
2016-06-02 20:08 3% ` Neil Horman
0 siblings, 1 reply; 200+ results
From: Wiles, Keith @ 2016-06-02 19:41 UTC (permalink / raw)
To: Neil Horman
Cc: Thomas Monjalon, Yuanhan Liu, dev, Richardson, Bruce, Tan,
Jianfeng, Stephen Hemminger, Christian Ehrhardt, Panu Matilainen,
Olivier Matz
On 6/2/16, 12:11 PM, "Neil Horman" <nhorman@tuxdriver.com> wrote:
>
>1) The definition of a config structure that can be passed to rte_eal_init,
>defining the configuration for that running process
Having a configuration structure means we have to have an ABI change to that structure anytime we add or remove an option. I was thinking a very simple DB of some kind would be better. Have the code query the DB to obtain the needed information. The APIs used to query and set the DB needs to be very easy to use as well.
Maybe each option can define its own structure if needed or just a simple variable type can be used for the basic types (int, string, bool, …)
Would this work better in the long run, does a fixed structure still make sense?
>
>2) The creation and use of an API that various DPDK libraries can use to
>retrieve that structure (or elements thereof), based on some explicit or imlicit
>id, so that the configuration can be used (I'm thinking here specifically of
>multiple dpdk applications using a dpdk shared library)
>
>3) The removal of the eal_parse_args code from the core dpdk library entirely,
>packaging it instead as its own library that interprets command line arguments
>as currently defined, and populates an instance of the structure defined in (1)
>
>4) Altering the Makefiles, so that the example apps link against the new library
>in (3), altering the app source code to work with the config structure defined
>in (1)
>
>With those steps, I think we will remove the command line bits from the dpdk
>core, and do so without altering the user experience for any of the sample apps
>(which will demonstrate to other developers that the same can be done with their
>applications). From there we will be free to create alternate methods of
>populating the config struct defined in (1) (via JSON file, YAML, XML, or
>whatever).
>
>Neil
>
>> >>
>> >> For the purposes of the example apps, it would seem that either JSON, YAML, or
>> >> the above Lua format would work just fine.
>> >
>> >+1
>> >
>>
>> Regards,
>> ++Keith
>>
>>
>
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH v7 0/5] mempool: add external mempool manager
@ 2016-06-02 13:38 2% ` Hunt, David
0 siblings, 0 replies; 200+ results
From: Hunt, David @ 2016-06-02 13:38 UTC (permalink / raw)
To: dev; +Cc: olivier.matz, viktorin, jerin.jacob
Since the cover letter seems to have gone missing, sending it again:
Here's the latest version of the External Mempool Manager patchset.
It's re-based on top of the latest head as of 19/5/2016, including
Olivier's 35-part patch series on mempool re-org [1]
[1] http://dpdk.org/ml/archives/dev/2016-May/039229.html
v7 changes:
* Changed rte_mempool_handler_table to rte_mempool_ops_table
* Changed hander_idx to ops_index in rte_mempool struct
* Reworked comments in rte_mempool.h around ops functions
* Changed rte_mempool_hander.c to rte_mempool_ops.c
* Changed all functions containing _handler_ to _ops_
* Now there is no mention of 'handler' left
* Other small changes out of review of mailing list
v6 changes:
* Moved the flags handling from rte_mempool_create_empty to
rte_mempool_create, as it's only there for backward compatibility
* Various comment additions and cleanup
* Renamed rte_mempool_handler to rte_mempool_ops
* Added a union for *pool and u64 pool_id in struct rte_mempool
* split the original patch into a few parts for easier review.
* rename functions with _ext_ to _ops_.
* addressed review comments
* renamed put and get functions to enqueue and dequeue
* changed occurences of rte_mempool_ops to const, as they
contain function pointers (security)
* split out the default external mempool handler into a separate
patch for easier review
v5 changes:
* rebasing, as it is dependent on another patch series [1]
v4 changes (Olivier Matz):
* remove the rte_mempool_create_ext() function. To change the handler, the
user has to do the following:
- mp = rte_mempool_create_empty()
- rte_mempool_set_handler(mp, "my_handler")
- rte_mempool_populate_default(mp)
This avoids to add another function with more than 10 arguments,
duplicating
the doxygen comments
* change the api of rte_mempool_alloc_t: only the mempool pointer is
required
as all information is available in it
* change the api of rte_mempool_free_t: remove return value
* move inline wrapper functions from the .c to the .h (else they won't be
inlined). This implies to have one header file (rte_mempool.h), or it
would have generate cross dependencies issues.
* remove now unused MEMPOOL_F_INT_HANDLER (note: it was misused anyway due
to the use of && instead of &)
* fix build in debug mode (__MEMPOOL_STAT_ADD(mp, put_pool, n) remaining)
* fix build with shared libraries (global handler has to be declared in
the .map file)
* rationalize #include order
* remove unused function rte_mempool_get_handler_name()
* rename some structures, fields, functions
* remove the static in front of rte_tailq_elem rte_mempool_tailq (comment
from Yuanhan)
* test the ext mempool handler in the same file than standard mempool
tests,
avoiding to duplicate the code
* rework the custom handler in mempool_test
* rework a bit the patch selecting default mbuf pool handler
* fix some doxygen comments
v3 changes:
* simplified the file layout, renamed to rte_mempool_handler.[hc]
* moved the default handlers into rte_mempool_default.c
* moved the example handler out into app/test/test_ext_mempool.c
* removed is_mc/is_mp change, slight perf degredation on sp cached
operation
* removed stack hanler, may re-introduce at a later date
* Changes out of code reviews
v2 changes:
* There was a lot of duplicate code between rte_mempool_xmem_create and
rte_mempool_create_ext. This has now been refactored and is now
hopefully cleaner.
* The RTE_NEXT_ABI define is now used to allow building of the library
in a format that is compatible with binaries built against previous
versions of DPDK.
* Changes out of code reviews. Hopefully I've got most of them included.
The External Mempool Manager is an extension to the mempool API that allows
users to add and use an external mempool manager, which allows external
memory
subsystems such as external hardware memory management systems and software
based memory allocators to be used with DPDK.
The existing API to the internal DPDK mempool manager will remain unchanged
and will be backward compatible. However, there will be an ABI breakage, as
the mempool struct is changing. These changes are all contained withing
RTE_NEXT_ABI defs, and the current or next code can be changed with
the CONFIG_RTE_NEXT_ABI config setting
There are two aspects to external mempool manager.
1. Adding the code for your new mempool operations (ops). This is
achieved by adding a new mempool ops source file into the
librte_mempool library, and using the REGISTER_MEMPOOL_HANDLER macro.
2. Using the new API to call rte_mempool_create_empty and
rte_mempool_set_ops to create a new mempool
using the name parameter to identify which ops to use.
New API calls added
1. A new rte_mempool_create_empty() function
2. rte_mempool_set_ops_byname() which sets the mempool's ops (functions)
3. An rte_mempool_populate_default() and rte_mempool_populate_anon()
functions
which populates the mempool using the relevant ops
Several external mempool managers may be used in the same application. A new
mempool can then be created by using the new 'create' function,
providing the
mempool ops struct name to point the mempool to the relevant mempool manager
callback structure.
The old 'create' function can still be called by legacy programs, and will
internally work out the mempool handle based on the flags provided (single
producer, single consumer, etc). By default handles are created
internally to
implement the built-in DPDK mempool manager and mempool types.
The external mempool manager needs to provide the following functions.
1. alloc - allocates the mempool memory, and adds each object onto
a ring
2. put - puts an object back into the mempool once an
application has
finished with it
3. get - gets an object from the mempool for use by the application
4. get_count - gets the number of available objects in the mempool
5. free - frees the mempool memory
Every time a get/put/get_count is called from the application/PMD, the
callback for that mempool is called. These functions are in the fastpath,
and any unoptimised ops may limit performance.
The new APIs are as follows:
1. rte_mempool_create_empty
struct rte_mempool *
rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
int socket_id, unsigned flags);
2. rte_mempool_set_ops_byname()
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name);
3. rte_mempool_populate_default()
int rte_mempool_populate_default(struct rte_mempool *mp);
4. rte_mempool_populate_anon()
int rte_mempool_populate_anon(struct rte_mempool *mp);
Please see rte_mempool.h for further information on the parameters.
The important thing to note is that the mempool ops struct is passed by name
to rte_mempool_set_ops_byname, which looks through the ops struct array to
get the ops_index, which is then stored in the rte_memool structure. This
allow multiple processes to use the same mempool, as the function pointers
are accessed via ops index.
The mempool ops structure contains callbacks to the implementation of
the ops function, and is set up for registration as follows:
static const struct rte_mempool_ops ops_sp_mc = {
.name = "ring_sp_mc",
.alloc = rte_mempool_common_ring_alloc,
.put = common_ring_sp_put,
.get = common_ring_mc_get,
.get_count = common_ring_get_count,
.free = common_ring_free,
};
And then the following macro will register the ops in the array of ops
structures
REGISTER_MEMPOOL_OPS(ops_mp_mc);
For an example of API usage, please see app/test/test_mempool.c, which
implements a rudimentary "custom_handler" mempool manager using simple
mallocs
for each mempool object. This file also contains the callbacks and self
registration for the new handler.
David Hunt (4):
mempool: support external mempool operations
mempool: remove rte_ring from rte_mempool struct
mempool: add default external mempool ops
mbuf: allow apps to change default mempool ops
Olivier Matz (1):
app/test: test external mempool manager
^ permalink raw reply [relevance 2%]
* [dpdk-dev] [PATCH v7 0/5] mempool: add external mempool manager
2016-06-01 16:19 2% ` [dpdk-dev] [PATCH v6 0/5] mempool: add external mempool manager David Hunt
@ 2016-06-02 13:27 2% ` David Hunt
2016-06-03 14:58 2% ` [dpdk-dev] [PATCH v8 " David Hunt
0 siblings, 2 replies; 200+ results
From: David Hunt @ 2016-06-02 13:27 UTC (permalink / raw)
To: dev; +Cc: olivier.matz, viktorin, jerin.jacob
Here's the latest version of the External Mempool Manager patchset.
It's re-based on top of the latest head as of 19/5/2016, including
Olivier's 35-part patch series on mempool re-org [1]
[1] http://dpdk.org/ml/archives/dev/2016-May/039229.html
v7 changes:
* Changed rte_mempool_handler_table to rte_mempool_ops_table
* Changed hander_idx to ops_index in rte_mempool struct
* Reworked comments in rte_mempool.h around ops functions
* Changed rte_mempool_hander.c to rte_mempool_ops.c
* Changed all functions containing _handler_ to _ops_
* Now there is no mention of 'handler' left
* Other small changes out of review of mailing list
v6 changes:
* Moved the flags handling from rte_mempool_create_empty to
rte_mempool_create, as it's only there for backward compatibility
* Various comment additions and cleanup
* Renamed rte_mempool_handler to rte_mempool_ops
* Added a union for *pool and u64 pool_id in struct rte_mempool
* split the original patch into a few parts for easier review.
* rename functions with _ext_ to _ops_.
* addressed review comments
* renamed put and get functions to enqueue and dequeue
* changed occurences of rte_mempool_ops to const, as they
contain function pointers (security)
* split out the default external mempool handler into a separate
patch for easier review
v5 changes:
* rebasing, as it is dependent on another patch series [1]
v4 changes (Olivier Matz):
* remove the rte_mempool_create_ext() function. To change the handler, the
user has to do the following:
- mp = rte_mempool_create_empty()
- rte_mempool_set_handler(mp, "my_handler")
- rte_mempool_populate_default(mp)
This avoids to add another function with more than 10 arguments, duplicating
the doxygen comments
* change the api of rte_mempool_alloc_t: only the mempool pointer is required
as all information is available in it
* change the api of rte_mempool_free_t: remove return value
* move inline wrapper functions from the .c to the .h (else they won't be
inlined). This implies to have one header file (rte_mempool.h), or it
would have generate cross dependencies issues.
* remove now unused MEMPOOL_F_INT_HANDLER (note: it was misused anyway due
to the use of && instead of &)
* fix build in debug mode (__MEMPOOL_STAT_ADD(mp, put_pool, n) remaining)
* fix build with shared libraries (global handler has to be declared in
the .map file)
* rationalize #include order
* remove unused function rte_mempool_get_handler_name()
* rename some structures, fields, functions
* remove the static in front of rte_tailq_elem rte_mempool_tailq (comment
from Yuanhan)
* test the ext mempool handler in the same file than standard mempool tests,
avoiding to duplicate the code
* rework the custom handler in mempool_test
* rework a bit the patch selecting default mbuf pool handler
* fix some doxygen comments
v3 changes:
* simplified the file layout, renamed to rte_mempool_handler.[hc]
* moved the default handlers into rte_mempool_default.c
* moved the example handler out into app/test/test_ext_mempool.c
* removed is_mc/is_mp change, slight perf degredation on sp cached operation
* removed stack hanler, may re-introduce at a later date
* Changes out of code reviews
v2 changes:
* There was a lot of duplicate code between rte_mempool_xmem_create and
rte_mempool_create_ext. This has now been refactored and is now
hopefully cleaner.
* The RTE_NEXT_ABI define is now used to allow building of the library
in a format that is compatible with binaries built against previous
versions of DPDK.
* Changes out of code reviews. Hopefully I've got most of them included.
The External Mempool Manager is an extension to the mempool API that allows
users to add and use an external mempool manager, which allows external memory
subsystems such as external hardware memory management systems and software
based memory allocators to be used with DPDK.
The existing API to the internal DPDK mempool manager will remain unchanged
and will be backward compatible. However, there will be an ABI breakage, as
the mempool struct is changing. These changes are all contained withing
RTE_NEXT_ABI defs, and the current or next code can be changed with
the CONFIG_RTE_NEXT_ABI config setting
There are two aspects to external mempool manager.
1. Adding the code for your new mempool operations (ops). This is
achieved by adding a new mempool ops source file into the
librte_mempool library, and using the REGISTER_MEMPOOL_HANDLER macro.
2. Using the new API to call rte_mempool_create_empty and
rte_mempool_set_ops to create a new mempool
using the name parameter to identify which ops to use.
New API calls added
1. A new rte_mempool_create_empty() function
2. rte_mempool_set_ops_byname() which sets the mempool's ops (functions)
3. An rte_mempool_populate_default() and rte_mempool_populate_anon() functions
which populates the mempool using the relevant ops
Several external mempool managers may be used in the same application. A new
mempool can then be created by using the new 'create' function, providing the
mempool ops struct name to point the mempool to the relevant mempool manager
callback structure.
The old 'create' function can still be called by legacy programs, and will
internally work out the mempool handle based on the flags provided (single
producer, single consumer, etc). By default handles are created internally to
implement the built-in DPDK mempool manager and mempool types.
The external mempool manager needs to provide the following functions.
1. alloc - allocates the mempool memory, and adds each object onto a ring
2. put - puts an object back into the mempool once an application has
finished with it
3. get - gets an object from the mempool for use by the application
4. get_count - gets the number of available objects in the mempool
5. free - frees the mempool memory
Every time a get/put/get_count is called from the application/PMD, the
callback for that mempool is called. These functions are in the fastpath,
and any unoptimised ops may limit performance.
The new APIs are as follows:
1. rte_mempool_create_empty
struct rte_mempool *
rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
int socket_id, unsigned flags);
2. rte_mempool_set_ops_byname()
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name);
3. rte_mempool_populate_default()
int rte_mempool_populate_default(struct rte_mempool *mp);
4. rte_mempool_populate_anon()
int rte_mempool_populate_anon(struct rte_mempool *mp);
Please see rte_mempool.h for further information on the parameters.
The important thing to note is that the mempool ops struct is passed by name
to rte_mempool_set_ops_byname, which looks through the ops struct array to
get the ops_index, which is then stored in the rte_memool structure. This
allow multiple processes to use the same mempool, as the function pointers
are accessed via ops index.
The mempool ops structure contains callbacks to the implementation of
the ops function, and is set up for registration as follows:
static const struct rte_mempool_ops ops_sp_mc = {
.name = "ring_sp_mc",
.alloc = rte_mempool_common_ring_alloc,
.put = common_ring_sp_put,
.get = common_ring_mc_get,
.get_count = common_ring_get_count,
.free = common_ring_free,
};
And then the following macro will register the ops in the array of ops
structures
REGISTER_MEMPOOL_OPS(ops_mp_mc);
For an example of API usage, please see app/test/test_mempool.c, which
implements a rudimentary "custom_handler" mempool manager using simple mallocs
for each mempool object. This file also contains the callbacks and self
registration for the new handler.
David Hunt (4):
mempool: support external mempool operations
mempool: remove rte_ring from rte_mempool struct
mempool: add default external mempool ops
mbuf: allow apps to change default mempool ops
Olivier Matz (1):
app/test: test external mempool manager
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] about rx checksum flags
2016-06-01 9:06 0% ` Ananyev, Konstantin
@ 2016-06-02 7:42 0% ` Chandran, Sugesh
0 siblings, 0 replies; 200+ results
From: Chandran, Sugesh @ 2016-06-02 7:42 UTC (permalink / raw)
To: Ananyev, Konstantin, Stephen Hemminger, Olivier MATZ
Cc: Yuanhan Liu, dev, Richardson, Bruce, Adrien Mazarguil, Tan, Jianfeng
Hi Olivier,
Thank you for working on this..
A comment on the proposal is given below,
Regards
_Sugesh
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev,
> Konstantin
> Sent: Wednesday, June 1, 2016 10:07 AM
> To: Stephen Hemminger <stephen@networkplumber.org>; Olivier MATZ
> <olivier.matz@6wind.com>
> Cc: Yuanhan Liu <yuanhan.liu@linux.intel.com>; dev@dpdk.org; Richardson,
> Bruce <bruce.richardson@intel.com>; Adrien Mazarguil
> <adrien.mazarguil@6wind.com>; Tan, Jianfeng <jianfeng.tan@intel.com>
> Subject: Re: [dpdk-dev] about rx checksum flags
>
>
>
> > -----Original Message-----
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Tuesday, May 31, 2016 11:03 PM
> > To: Olivier MATZ
> > Cc: Yuanhan Liu; dev@dpdk.org; Ananyev, Konstantin; Richardson, Bruce;
> > Adrien Mazarguil; Tan, Jianfeng
> > Subject: Re: [dpdk-dev] about rx checksum flags
> >
> > On Tue, 31 May 2016 22:58:57 +0200
> > Olivier MATZ <olivier.matz@6wind.com> wrote:
> >
> > > Hi Stephen,
> > >
> > > On 05/31/2016 10:28 PM, Stephen Hemminger wrote:
> > > > On Tue, 31 May 2016 21:11:59 +0200 Olivier MATZ
> > > > <olivier.matz@6wind.com> wrote:
> > > >
> > > >>
> > > >>
> > > >> On 05/31/2016 10:09 AM, Yuanhan Liu wrote:
> > > >>> On Mon, May 30, 2016 at 05:26:21PM +0200, Olivier Matz wrote:
> > > >>>> PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the
> > > >>>> packet data, but the integrity of the L4 header is verified.
> > > >>>> -> the application can process the packet but must not verify the
> > > >>>> checksum by sw. It has to take care to recalculate the cksum
> > > >>>> if the packet is transmitted (either by sw or using tx
> > > >>>> offload)
> > > >>>
> > > >>> I like the explanation you made at [1] better :)
> > > >>>
> > > >>> So in general, I think this proposal is good to have.
> > > >>
> > > >> Thanks everyone for your feedback.
> > > >>
> > > >> I'll try to send a first patch proposition soon.
> > > >>
> > > >> Regards,
> > > >> Olivier
> > > >
> > > > I think it is time to ditch the old definitions of Rx checksum and
> > > > instead use something more compatiable with virtio (and Linux). I.e
> have three values
> > > > 1) checksum is know good for packet contents
> > > > 2) checksum value one's complement for packet contents
> > > > 3) checksum is undetermined
> > > > The original definition seems to be Intel HW centric and applies
> > > > to a limited range of devices making it unusable by general application.
> > > >
> > > > Break the ABI, and ditch the old values (ok mark
> > > > PKT_RX_L4_CKSUM_BAD as __deprecated and remove all usage).
> > > >
> > >
> > > Don't you think knowing that a checksum is bad could be useful?
> >
> > Not really. They should be mark as undetermined, then software can
> > recheck for the possibly buggy hardware.
>
> Hmm, I don't see the point here.
> If the HW clearly reports that checksum is invalid (not unknown), why SW has
> to assume it is ' undetermined' and recheck it?
> To me that means just wasted cycles.
> In general, it sounds like really strange approach to me:
> write your SW with assumption that all HW you are going to use will not work
> correctly.
>
> >
> > > In that case the application can drop/log the packet without any
> > > additional cpu cost.
> > >
> > > What do you mean by beeing unusable by general application?
> >
> > Right now application can only see "known bad" or "indeterminate"
> > there is no way to no which packets are good. Since good is the
> > desired/expected case, software has to checksum every packet.
> >
> > >
> > > I think the "2)" also requires a csum_start + csum_offset in mbuf
> > > structure, right?
> >
> > Not really, it would mean having a way to get the raw one's complement
> > sum out of the hardware. This is a good way to support the tunnel
> > protocol du jour without having to have firmware support.
> > Unfortunately, most hardware vendors don't believe in doing it that way.
>
> It might be a good feature to have, but if most HW vendors don't support it
> why to bother?
>
> >
> >
> > > Do you also suggest to drop IP checksum flags?
> >
> > IP checksum offload is mostly useless. If application needs to look at
> > IP, it can do whole checksum in very few instructions, the whole
> > header is in the same cache line as src/dst so the HW offload is really no
> help.
> >
[Sugesh] The checksum offload can boost the tunneling performance in OVS.
I guess the IP checksum also important as L4. In some cases, UDP checksum is
zero and no need to validate it. But Ip checksum is present on all the packets and that must be
validated all the time. At higher packet rate, the ip checksum offload can offer slight
performance improvement. What do you think??
> > >
> > > Will it be possible to manage tunnel checksums?
> > >
> > > I think this would be a pretty big change. If there is no additional
> > > argument than beeing more compatible with virtio/linux, I'm
> > > wondering if it's worth breaking the API. Let's wait for other opinions.
>
> I think that what Olivier proposed is good enough and definitely a step
> forward from what we have right now.
>
> Konstantin
>
> > >
> > > Thanks for your feedback.
> > > Olivier
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] Suggestions for the dpdk stable tree
2016-05-23 2:21 3% ` Yuanhan Liu
@ 2016-06-01 19:01 0% ` Mcnamara, John
0 siblings, 0 replies; 200+ results
From: Mcnamara, John @ 2016-06-01 19:01 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: Christian Ehrhardt, dev, Stephen Hemminger, Thomas Monjalon
> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Monday, May 23, 2016 3:22 AM
> To: Mcnamara, John <john.mcnamara@intel.com>
> Cc: Christian Ehrhardt <christian.ehrhardt@canonical.com>; dev
> <dev@dpdk.org>; Stephen Hemminger <stephen@networkplumber.org>; Thomas
> Monjalon <thomas.monjalon@6wind.com>
> Subject: Re: [dpdk-dev] Suggestions for the dpdk stable tree
>
> > We have been looking at identifying a maintainer and validation engineer
> internally to support the effort but haven't be able to finalize that.
> Once we do we will come back to the mailing list with a proposal and a
> request for comments.
>
> I would nominate myself as the LTS tree maintainer, if it makes sense to
> have one.
Hi Yuanhan,
Thanks for putting your name forward. I think your experience as the dpdk-next-virtio
maintainer will help with this.
> > We would probably be looking at 16.04 or even 16.07 as the basis for the
> LTS at this stage.
>
> Just one opinion from the view of vhost: since 16.07 is a vhost ABI/API
> refactoring release, I'd suggest to base on 16.07, and then we could have
> less conflicts to apply later bug fix patches.
Agreed. At this stage 16.07 make more sense.
I'll start a separate discussion thread about how the LTS process would work
to see if we can get some consensus from interested parties.
John.
--
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH v6 0/5] mempool: add external mempool manager
2016-05-19 13:44 2% ` [dpdk-dev] mempool: " David Hunt
@ 2016-06-01 16:19 2% ` David Hunt
2016-06-02 13:27 2% ` [dpdk-dev] [PATCH v7 " David Hunt
1 sibling, 1 reply; 200+ results
From: David Hunt @ 2016-06-01 16:19 UTC (permalink / raw)
To: dev; +Cc: olivier.matz, viktorin, jerin.jacob
Here's the latest version of the External Mempool Manager patchset.
It's re-based on top of the latest head as of 1st June 2016, including
Olivier's 35-part patch series on mempool re-org [1]
[1] http://dpdk.org/ml/archives/dev/2016-May/039229.html
Note: After applying the last patch, run "make config ..." before
compiling. It introduces a config file change.
Note: Hopefully I've addressed all the extensive comments over the
last week. If I've missed any, please let me know, as it would
not have been intentional. I hop I've responded to all comments
via email on the mailing list.
v6 changes:
* Moved the flags handling from rte_mempool_create_empty to
rte_mempool_create, as it's only there for backward compatibility
* Various comment additions and cleanup
* Renamed rte_mempool_handler to rte_mempool_ops
* Added a union for *pool and u64 pool_id in struct rte_mempool
* split the original patch into a few parts for easier review.
* rename functions with _ext_ to _ops_.
* addressed review comments
* renamed put and get functions to enqueue and dequeue
* changed occurences of rte_mempool_ops to const, as they
contain function pointers (security)
* split out the default external mempool handler into a separate
patch for easier review
v5 changes:
* rebasing, as it is dependent on another patch series [1]
v4 changes (Olivier Matz):
* remove the rte_mempool_create_ext() function. To change the handler, the
user has to do the following:
- mp = rte_mempool_create_empty()
- rte_mempool_set_handler(mp, "my_handler")
- rte_mempool_populate_default(mp)
This avoids to add another function with more than 10 arguments, duplicating
the doxygen comments
* change the api of rte_mempool_alloc_t: only the mempool pointer is required
as all information is available in it
* change the api of rte_mempool_free_t: remove return value
* move inline wrapper functions from the .c to the .h (else they won't be
inlined). This implies to have one header file (rte_mempool.h), or it
would have generate cross dependencies issues.
* remove now unused MEMPOOL_F_INT_HANDLER (note: it was misused anyway due
to the use of && instead of &)
* fix build in debug mode (__MEMPOOL_STAT_ADD(mp, put_pool, n) remaining)
* fix build with shared libraries (global handler has to be declared in
the .map file)
* rationalize #include order
* remove unused function rte_mempool_get_handler_name()
* rename some structures, fields, functions
* remove the static in front of rte_tailq_elem rte_mempool_tailq (comment
from Yuanhan)
* test the ext mempool handler in the same file than standard mempool tests,
avoiding to duplicate the code
* rework the custom handler in mempool_test
* rework a bit the patch selecting default mbuf pool handler
* fix some doxygen comments
v3 changes:
* simplified the file layout, renamed to rte_mempool_handler.[hc]
* moved the default handlers into rte_mempool_default.c
* moved the example handler out into app/test/test_ext_mempool.c
* removed is_mc/is_mp change, slight perf degredation on sp cached operation
* removed stack hanler, may re-introduce at a later date
* Changes out of code reviews
v2 changes:
* There was a lot of duplicate code between rte_mempool_xmem_create and
rte_mempool_create_ext. This has now been refactored and is now
hopefully cleaner.
* The RTE_NEXT_ABI define is now used to allow building of the library
in a format that is compatible with binaries built against previous
versions of DPDK.
* Changes out of code reviews. Hopefully I've got most of them included.
The External Mempool Manager is an extension to the mempool API that allows
users to add and use an external mempool manager, which allows external memory
subsystems such as external hardware memory management systems and software
based memory allocators to be used with DPDK.
The existing API to the internal DPDK mempool manager will remain unchanged
and will be backward compatible. However, there will be an ABI breakage, as
the mempool struct is changing. These changes are all contained withing
RTE_NEXT_ABI defs, and the current or next code can be changed with
the CONFIG_RTE_NEXT_ABI config setting
There are two aspects to external mempool manager.
1. Adding the code for your new mempool handler. This is achieved by adding a
new mempool handler source file into the librte_mempool library, and
using the REGISTER_MEMPOOL_HANDLER macro.
2. Using the new API to call rte_mempool_create_empty and
rte_mempool_set_handler to create a new mempool
using the name parameter to identify which handler to use.
New API calls added
1. A new rte_mempool_create_empty() function
2. rte_mempool_set_handler() which sets the mempool's handler
3. An rte_mempool_populate_default() and rte_mempool_populate_anon() functions
which populates the mempool using the relevant handler
Several external mempool managers may be used in the same application. A new
mempool can then be created by using the new 'create' function, providing the
mempool handler name to point the mempool to the relevant mempool manager
callback structure.
The old 'create' function can still be called by legacy programs, and will
internally work out the mempool handle based on the flags provided (single
producer, single consumer, etc). By default handles are created internally to
implement the built-in DPDK mempool manager and mempool types.
The external mempool manager needs to provide the following functions.
1. alloc - allocates the mempool memory, and adds each object onto a ring
2. put - puts an object back into the mempool once an application has
finished with it
3. get - gets an object from the mempool for use by the application
4. get_count - gets the number of available objects in the mempool
5. free - frees the mempool memory
Every time a get/put/get_count is called from the application/PMD, the
callback for that mempool is called. These functions are in the fastpath,
and any unoptimised handlers may limit performance.
The new APIs are as follows:
1. rte_mempool_create_empty
struct rte_mempool *
rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
int socket_id, unsigned flags);
2. rte_mempool_set_handler()
int
rte_mempool_set_handler(struct rte_mempool *mp, const char *name);
3. rte_mempool_populate_default()
int rte_mempool_populate_default(struct rte_mempool *mp);
4. rte_mempool_populate_anon()
int rte_mempool_populate_anon(struct rte_mempool *mp);
Please see rte_mempool.h for further information on the parameters.
The important thing to note is that the mempool handler is passed by name
to rte_mempool_set_handler, which looks through the handler array to
get the handler index, which is then stored in the rte_memool structure. This
allow multiple processes to use the same mempool, as the function pointers
are accessed via handler index.
The mempool handler structure contains callbacks to the implementation of
the handler, and is set up for registration as follows:
static const struct rte_mempool_handler handler_sp_mc = {
.name = "ring_sp_mc",
.alloc = rte_mempool_common_ring_alloc,
.put = common_ring_sp_put,
.get = common_ring_mc_get,
.get_count = common_ring_get_count,
.free = common_ring_free,
};
And then the following macro will register the handler in the array of handlers
REGISTER_MEMPOOL_HANDLER(handler_mp_mc);
For and example of a simple malloc based mempool manager, see
lib/librte_mempool/custom_mempool.c
For an example of API usage, please see app/test/test_mempool.c, which
implements a rudimentary "custom_handler" mempool manager using simple mallocs
for each mempool object. This file also contains the callbacks and self
registration for the new handler.
David Hunt (4):
mempool: support external handler
mempool: remove rte_ring from rte_mempool struct
mempool: add default external mempool handler
mbuf: get default mempool handler from configuration
Olivier Matz (1):
app/test: test external mempool handler
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] about rx checksum flags
2016-05-31 22:02 0% ` Stephen Hemminger
@ 2016-06-01 9:06 0% ` Ananyev, Konstantin
2016-06-02 7:42 0% ` Chandran, Sugesh
0 siblings, 1 reply; 200+ results
From: Ananyev, Konstantin @ 2016-06-01 9:06 UTC (permalink / raw)
To: Stephen Hemminger, Olivier MATZ
Cc: Yuanhan Liu, dev, Richardson, Bruce, Adrien Mazarguil, Tan, Jianfeng
> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, May 31, 2016 11:03 PM
> To: Olivier MATZ
> Cc: Yuanhan Liu; dev@dpdk.org; Ananyev, Konstantin; Richardson, Bruce; Adrien Mazarguil; Tan, Jianfeng
> Subject: Re: [dpdk-dev] about rx checksum flags
>
> On Tue, 31 May 2016 22:58:57 +0200
> Olivier MATZ <olivier.matz@6wind.com> wrote:
>
> > Hi Stephen,
> >
> > On 05/31/2016 10:28 PM, Stephen Hemminger wrote:
> > > On Tue, 31 May 2016 21:11:59 +0200
> > > Olivier MATZ <olivier.matz@6wind.com> wrote:
> > >
> > >>
> > >>
> > >> On 05/31/2016 10:09 AM, Yuanhan Liu wrote:
> > >>> On Mon, May 30, 2016 at 05:26:21PM +0200, Olivier Matz wrote:
> > >>>> PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
> > >>>> data, but the integrity of the L4 header is verified.
> > >>>> -> the application can process the packet but must not verify the
> > >>>> checksum by sw. It has to take care to recalculate the cksum
> > >>>> if the packet is transmitted (either by sw or using tx offload)
> > >>>
> > >>> I like the explanation you made at [1] better :)
> > >>>
> > >>> So in general, I think this proposal is good to have.
> > >>
> > >> Thanks everyone for your feedback.
> > >>
> > >> I'll try to send a first patch proposition soon.
> > >>
> > >> Regards,
> > >> Olivier
> > >
> > > I think it is time to ditch the old definitions of Rx checksum and instead
> > > use something more compatiable with virtio (and Linux). I.e have three values
> > > 1) checksum is know good for packet contents
> > > 2) checksum value one's complement for packet contents
> > > 3) checksum is undetermined
> > > The original definition seems to be Intel HW centric and applies to a limited
> > > range of devices making it unusable by general application.
> > >
> > > Break the ABI, and ditch the old values (ok mark PKT_RX_L4_CKSUM_BAD as __deprecated
> > > and remove all usage).
> > >
> >
> > Don't you think knowing that a checksum is bad could be useful?
>
> Not really. They should be mark as undetermined, then software can recheck
> for the possibly buggy hardware.
Hmm, I don't see the point here.
If the HW clearly reports that checksum is invalid (not unknown),
why SW has to assume it is ' undetermined' and recheck it?
To me that means just wasted cycles.
In general, it sounds like really strange approach to me:
write your SW with assumption that all HW you are going to use
will not work correctly.
>
> > In that case the application can drop/log the packet without any
> > additional cpu cost.
> >
> > What do you mean by beeing unusable by general application?
>
> Right now application can only see "known bad" or "indeterminate"
> there is no way to no which packets are good. Since good is the desired/expected
> case, software has to checksum every packet.
>
> >
> > I think the "2)" also requires a csum_start + csum_offset in
> > mbuf structure, right?
>
> Not really, it would mean having a way to get the raw one's complement sum
> out of the hardware. This is a good way to support the tunnel protocol du jour
> without having to have firmware support. Unfortunately, most hardware vendors
> don't believe in doing it that way.
It might be a good feature to have, but if most HW vendors don't support it
why to bother?
>
>
> > Do you also suggest to drop IP checksum flags?
>
> IP checksum offload is mostly useless. If application needs to look
> at IP, it can do whole checksum in very few instructions, the whole header
> is in the same cache line as src/dst so the HW offload is really no help.
>
> >
> > Will it be possible to manage tunnel checksums?
> >
> > I think this would be a pretty big change. If there is no additional
> > argument than beeing more compatible with virtio/linux, I'm wondering
> > if it's worth breaking the API. Let's wait for other opinions.
I think that what Olivier proposed is good enough and
definitely a step forward from what we have right now.
Konstantin
> >
> > Thanks for your feedback.
> > Olivier
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] about rx checksum flags
2016-05-31 20:58 0% ` Olivier MATZ
@ 2016-05-31 22:02 0% ` Stephen Hemminger
2016-06-01 9:06 0% ` Ananyev, Konstantin
0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2016-05-31 22:02 UTC (permalink / raw)
To: Olivier MATZ
Cc: Yuanhan Liu, dev, Ananyev, Konstantin, Richardson, Bruce,
Adrien Mazarguil, Tan, Jianfeng
On Tue, 31 May 2016 22:58:57 +0200
Olivier MATZ <olivier.matz@6wind.com> wrote:
> Hi Stephen,
>
> On 05/31/2016 10:28 PM, Stephen Hemminger wrote:
> > On Tue, 31 May 2016 21:11:59 +0200
> > Olivier MATZ <olivier.matz@6wind.com> wrote:
> >
> >>
> >>
> >> On 05/31/2016 10:09 AM, Yuanhan Liu wrote:
> >>> On Mon, May 30, 2016 at 05:26:21PM +0200, Olivier Matz wrote:
> >>>> PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
> >>>> data, but the integrity of the L4 header is verified.
> >>>> -> the application can process the packet but must not verify the
> >>>> checksum by sw. It has to take care to recalculate the cksum
> >>>> if the packet is transmitted (either by sw or using tx offload)
> >>>
> >>> I like the explanation you made at [1] better :)
> >>>
> >>> So in general, I think this proposal is good to have.
> >>
> >> Thanks everyone for your feedback.
> >>
> >> I'll try to send a first patch proposition soon.
> >>
> >> Regards,
> >> Olivier
> >
> > I think it is time to ditch the old definitions of Rx checksum and instead
> > use something more compatiable with virtio (and Linux). I.e have three values
> > 1) checksum is know good for packet contents
> > 2) checksum value one's complement for packet contents
> > 3) checksum is undetermined
> > The original definition seems to be Intel HW centric and applies to a limited
> > range of devices making it unusable by general application.
> >
> > Break the ABI, and ditch the old values (ok mark PKT_RX_L4_CKSUM_BAD as __deprecated
> > and remove all usage).
> >
>
> Don't you think knowing that a checksum is bad could be useful?
Not really. They should be mark as undetermined, then software can recheck
for the possibly buggy hardware.
> In that case the application can drop/log the packet without any
> additional cpu cost.
>
> What do you mean by beeing unusable by general application?
Right now application can only see "known bad" or "indeterminate"
there is no way to no which packets are good. Since good is the desired/expected
case, software has to checksum every packet.
>
> I think the "2)" also requires a csum_start + csum_offset in
> mbuf structure, right?
Not really, it would mean having a way to get the raw one's complement sum
out of the hardware. This is a good way to support the tunnel protocol du jour
without having to have firmware support. Unfortunately, most hardware vendors
don't believe in doing it that way.
> Do you also suggest to drop IP checksum flags?
IP checksum offload is mostly useless. If application needs to look
at IP, it can do whole checksum in very few instructions, the whole header
is in the same cache line as src/dst so the HW offload is really no help.
>
> Will it be possible to manage tunnel checksums?
>
> I think this would be a pretty big change. If there is no additional
> argument than beeing more compatible with virtio/linux, I'm wondering
> if it's worth breaking the API. Let's wait for other opinions.
>
> Thanks for your feedback.
> Olivier
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] about rx checksum flags
2016-05-31 20:28 3% ` Stephen Hemminger
@ 2016-05-31 20:58 0% ` Olivier MATZ
2016-05-31 22:02 0% ` Stephen Hemminger
0 siblings, 1 reply; 200+ results
From: Olivier MATZ @ 2016-05-31 20:58 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Yuanhan Liu, dev, Ananyev, Konstantin, Richardson, Bruce,
Adrien Mazarguil, Tan, Jianfeng
Hi Stephen,
On 05/31/2016 10:28 PM, Stephen Hemminger wrote:
> On Tue, 31 May 2016 21:11:59 +0200
> Olivier MATZ <olivier.matz@6wind.com> wrote:
>
>>
>>
>> On 05/31/2016 10:09 AM, Yuanhan Liu wrote:
>>> On Mon, May 30, 2016 at 05:26:21PM +0200, Olivier Matz wrote:
>>>> PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
>>>> data, but the integrity of the L4 header is verified.
>>>> -> the application can process the packet but must not verify the
>>>> checksum by sw. It has to take care to recalculate the cksum
>>>> if the packet is transmitted (either by sw or using tx offload)
>>>
>>> I like the explanation you made at [1] better :)
>>>
>>> So in general, I think this proposal is good to have.
>>
>> Thanks everyone for your feedback.
>>
>> I'll try to send a first patch proposition soon.
>>
>> Regards,
>> Olivier
>
> I think it is time to ditch the old definitions of Rx checksum and instead
> use something more compatiable with virtio (and Linux). I.e have three values
> 1) checksum is know good for packet contents
> 2) checksum value one's complement for packet contents
> 3) checksum is undetermined
> The original definition seems to be Intel HW centric and applies to a limited
> range of devices making it unusable by general application.
>
> Break the ABI, and ditch the old values (ok mark PKT_RX_L4_CKSUM_BAD as __deprecated
> and remove all usage).
>
Don't you think knowing that a checksum is bad could be useful?
In that case the application can drop/log the packet without any
additional cpu cost.
What do you mean by beeing unusable by general application?
I think the "2)" also requires a csum_start + csum_offset in
mbuf structure, right?
Do you also suggest to drop IP checksum flags?
Will it be possible to manage tunnel checksums?
I think this would be a pretty big change. If there is no additional
argument than beeing more compatible with virtio/linux, I'm wondering
if it's worth breaking the API. Let's wait for other opinions.
Thanks for your feedback.
Olivier
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] about rx checksum flags
@ 2016-05-31 20:28 3% ` Stephen Hemminger
2016-05-31 20:58 0% ` Olivier MATZ
0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2016-05-31 20:28 UTC (permalink / raw)
To: Olivier MATZ
Cc: Yuanhan Liu, dev, Ananyev, Konstantin, Richardson, Bruce,
Adrien Mazarguil, Tan, Jianfeng
On Tue, 31 May 2016 21:11:59 +0200
Olivier MATZ <olivier.matz@6wind.com> wrote:
>
>
> On 05/31/2016 10:09 AM, Yuanhan Liu wrote:
> > On Mon, May 30, 2016 at 05:26:21PM +0200, Olivier Matz wrote:
> >> PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
> >> data, but the integrity of the L4 header is verified.
> >> -> the application can process the packet but must not verify the
> >> checksum by sw. It has to take care to recalculate the cksum
> >> if the packet is transmitted (either by sw or using tx offload)
> >
> > I like the explanation you made at [1] better :)
> >
> > So in general, I think this proposal is good to have.
>
> Thanks everyone for your feedback.
>
> I'll try to send a first patch proposition soon.
>
> Regards,
> Olivier
I think it is time to ditch the old definitions of Rx checksum and instead
use something more compatiable with virtio (and Linux). I.e have three values
1) checksum is know good for packet contents
2) checksum value one's complement for packet contents
3) checksum is undetermined
The original definition seems to be Intel HW centric and applies to a limited
range of devices making it unusable by general application.
Break the ABI, and ditch the old values (ok mark PKT_RX_L4_CKSUM_BAD as __deprecated
and remove all usage).
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH v5 1/3] mempool: support external handler
2016-05-30 9:41 3% ` Jerin Jacob
@ 2016-05-30 11:27 0% ` Hunt, David
0 siblings, 0 replies; 200+ results
From: Hunt, David @ 2016-05-30 11:27 UTC (permalink / raw)
To: Jerin Jacob; +Cc: dev, olivier.matz, yuanhan.liu, pmatilai
On 5/30/2016 10:41 AM, Jerin Jacob wrote:
--snip--
>> Of course, that won't help if we need to pass in more data, in which case
>> we'd probably need an
>> opaque data pointer somewhere. It would probably be most useful to pass it
>> in with the
>> alloc, which may need the data. Any suggestions?
> But the top level rte_mempool_create() function needs to pass the data. Right?
> That would be an ABI change. IMO, we need to start thinking about
> passing a struct of config data to rte_mempool_create to create
> backward compatibility on new argument addition to rte_mempool_create()
New mempool handlers will use rte_mempool_create_empty(),
rte_mempool_set_handler(),
then rte_mempool_populate_*(). These three functions are new to this
release, to no problem
to add a parameter to one of them for the config data. Also since we're
adding some new
elements to the mempool structure, how about we add a new pointer for a
void pointer to a
config data structure, as defined by the handler.
So, new element in rte_mempool struct alongside the *pool
void *pool;
void *pool_config;
Then add a param to the rte_mempool_set_handler function:
int
rte_mempool_set_handler(struct rte_mempool *mp, const char *name, void
*pool_config)
The function would simply set the pointer in the mempool struct, and the
custom handler
alloc/create function would use as apporopriate as needed. Handlers that
do not need this
data can be passed NULL.
> Other points in HW assisted pool manager perspective,
>
> 1) May be RING can be replaced with some other higher abstraction name
> for the internal MEMPOOL_F_RING_CREATED flag
Agreed. I'll change to MEMPOOL_F_POOL_CREATED, since we're already
changing the *ring
element of the mempool struct to *pool
> 2) IMO, It is better to change void *pool in struct rte_mempool to
> anonymous union type, something like below, so that mempool
> implementation can choose the best type.
> union {
> void *pool;
> uint64_t val;
> }
Could we do this by using the union for the *pool_config suggested
above, would that give
you what you need?
> 3) int32_t handler_idx creates 4 byte hole in struct rte_mempool in
> 64 bit system. IMO it better to rearrange.(as const struct rte_memzone
> *mz comes next)
OK, Will look at this.
> 4) IMO, It is better to change ext_alloc/ext_free to ext_create/ext_destroy
> as their is no allocation in HW assisted pool manager case,
> it will be mostly creating some HW initialization.
OK, I'll change. I think that makes more sense.
Regards,
Dave.
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH v5 1/3] mempool: support external handler
@ 2016-05-30 9:41 3% ` Jerin Jacob
2016-05-30 11:27 0% ` Hunt, David
0 siblings, 1 reply; 200+ results
From: Jerin Jacob @ 2016-05-30 9:41 UTC (permalink / raw)
To: Hunt, David; +Cc: dev, olivier.matz, yuanhan.liu, pmatilai
On Fri, May 27, 2016 at 03:44:31PM +0100, Hunt, David wrote:
>
>
Hi David,
[snip]
> That chunk of code above would be better moved all right. I'd suggest
> moving it to the
> rte_mempool_create function, as that's the one that needs the backward
> compatibility.
OK
>
> On the flags issue, each mempool handler can re-interpret the flags as
> needed. Maybe we
> could use the upper half of the bits for different handlers, changing the
> meaning of the
> bits depending on which handler is being set up. We can then keep the lower
> half for bits that are common across all handlers? That way the user can
Common lower half bit in flags looks good.
> just set the bits they
> are interested in for that handler. Also, the alloc function has access to
> the flags, so maybe the
> handler specific setup could be handled in the alloc function rather than
> adding a new function pointer?
Yes. I agree.
>
> Of course, that won't help if we need to pass in more data, in which case
> we'd probably need an
> opaque data pointer somewhere. It would probably be most useful to pass it
> in with the
> alloc, which may need the data. Any suggestions?
But the top level rte_mempool_create() function needs to pass the data. Right?
That would be an ABI change. IMO, we need to start thinking about
passing a struct of config data to rte_mempool_create to create
backward compatibility on new argument addition to rte_mempool_create()
Other points in HW assisted pool manager perspective,
1) May be RING can be replaced with some other higher abstraction name
for the internal MEMPOOL_F_RING_CREATED flag
2) IMO, It is better to change void *pool in struct rte_mempool to
anonymous union type, something like below, so that mempool
implementation can choose the best type.
union {
void *pool;
uint64_t val;
}
3) int32_t handler_idx creates 4 byte hole in struct rte_mempool in
64 bit system. IMO it better to rearrange.(as const struct rte_memzone
*mz comes next)
4) IMO, It is better to change ext_alloc/ext_free to ext_create/ext_destroy
as their is no allocation in HW assisted pool manager case,
it will be mostly creating some HW initialization.
>
> Regards,
> Dave.
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH 1/2] ethdev: add callback to get register size in bytes
2016-05-27 10:28 4% ` Panu Matilainen
2016-05-27 14:43 3% ` Thomas Monjalon
@ 2016-05-30 9:32 0% ` Zyta Szpak
1 sibling, 0 replies; 200+ results
From: Zyta Szpak @ 2016-05-30 9:32 UTC (permalink / raw)
To: Panu Matilainen, remy.horton, thomas.monjalon; +Cc: dev
On 27.05.2016 12:28, Panu Matilainen wrote:
> On 05/25/2016 09:36 AM, zr@semihalf.com wrote:
>> From: Zyta Szpak <zr@semihalf.com>
>>
>> Version 2 of fixing the fixed register width assumption.
>> rte_eth_dev_get_reg_length and rte_eth_dev_get_reg callbacks
>> do not provide register size to the app in any way. It is
>> needed to allocate proper number of bytes before retrieving
>> registers content with rte_eth_dev_get_reg.
>>
>> Signed-off-by: Zyta Szpak <zr@semihalf.com>
>> ---
>> lib/librte_ether/rte_ethdev.c | 12 ++++++++++++
>> lib/librte_ether/rte_ethdev.h | 18 ++++++++++++++++++
>> 2 files changed, 30 insertions(+)
>>
>> diff --git a/lib/librte_ether/rte_ethdev.c
>> b/lib/librte_ether/rte_ethdev.c
>> index a31018e..e0765f8 100644
>> --- a/lib/librte_ether/rte_ethdev.c
>> +++ b/lib/librte_ether/rte_ethdev.c
>> @@ -3231,6 +3231,18 @@ rte_eth_dev_get_reg_length(uint8_t port_id)
>> }
>>
>> int
>> +rte_eth_dev_get_reg_width(uint8_t port_id)
>> +{
>> + struct rte_eth_dev *dev;
>> +
>> + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>> +
>> + dev = &rte_eth_devices[port_id];
>> + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg_width, -ENOTSUP);
>> + return (*dev->dev_ops->get_reg_width)(dev);
>> +}
>> +
>> +int
>> rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info
>> *info)
>> {
>> struct rte_eth_dev *dev;
>> diff --git a/lib/librte_ether/rte_ethdev.h
>> b/lib/librte_ether/rte_ethdev.h
>> index 2757510..552eaed 100644
>> --- a/lib/librte_ether/rte_ethdev.h
>> +++ b/lib/librte_ether/rte_ethdev.h
>> @@ -1292,6 +1292,9 @@ typedef int (*eth_timesync_write_time)(struct
>> rte_eth_dev *dev,
>> typedef int (*eth_get_reg_length_t)(struct rte_eth_dev *dev);
>> /**< @internal Retrieve device register count */
>>
>> +typedef int (*eth_get_reg_width_t)(struct rte_eth_dev *dev);
>> +/**< @internal Retrieve device register byte number */
>> +
>> typedef int (*eth_get_reg_t)(struct rte_eth_dev *dev,
>> struct rte_dev_reg_info *info);
>> /**< @internal Retrieve registers */
>> @@ -1455,6 +1458,8 @@ struct eth_dev_ops {
>>
>> eth_get_reg_length_t get_reg_length;
>> /**< Get # of registers */
>> + eth_get_reg_width_t get_reg_width;
>> + /**< Get # of bytes in register */
>> eth_get_reg_t get_reg;
>> /**< Get registers */
>> eth_get_eeprom_length_t get_eeprom_length;
>
> This is an ABI break, but maybe it is part of that "driver
> implementation API" which is exempt from the ABI/API policies. Thomas?
>
>> @@ -3971,6 +3976,19 @@ int rte_eth_tx_queue_info_get(uint8_t port_id,
>> uint16_t queue_id,
>> */
>> int rte_eth_dev_get_reg_length(uint8_t port_id);
>>
>> +/*
>> + * Retrieve the number of bytes in register for a specific device
>> + *
>> + * @param port_id
>> + * The port identifier of the Ethernet device.
>> + * @return
>> + * - (>=0) number of registers if successful.
>> + * - (-ENOTSUP) if hardware doesn't support.
>> + * - (-ENODEV) if *port_id* invalid.
>> + * - others depends on the specific operations implementation.
>> + */
>> +int rte_eth_dev_get_reg_width(uint8_t port_id);
>> +
>> /**
>> * Retrieve device registers and register attributes
>> *
>
> The function needs to be exported via rte_ether_version.map as well.
OK, right!
>
> - Panu -
>>
>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH v2 5/7] eal/linux: mmap ioports on ppc64
2016-05-24 5:15 3% ` Yuanhan Liu
@ 2016-05-30 8:45 0% ` Olivier Matz
0 siblings, 0 replies; 200+ results
From: Olivier Matz @ 2016-05-30 8:45 UTC (permalink / raw)
To: Yuanhan Liu
Cc: David Marchand, dev, Chao Zhu, Xie, Huawei, Panu Matilainen,
Thomas Monjalon
On 05/24/2016 07:15 AM, Yuanhan Liu wrote:
> On Mon, May 23, 2016 at 03:40:58PM +0200, Olivier Matz wrote:
>> For reference, here is the report of the ABI checker for EAL:
>>
>> [−] struct rte_pci_ioport (2)
>>
>> 1 Field len has been added to this type.
>> 1) This field will not be initialized by old clients.
>> 2) Size of the inclusive type has been changed.
>> NOTE: this field should be accessed only from the new library
>> functions, otherwise it may result in crash or incorrect behavior
>> of applications.
>> 2 Size of this type has been changed from 16 bytes to 24 bytes.
>> The fields or parameters of such data type may be incorrectly
>> initialized or accessed by old client applications.
>>
>> [−] affected symbols (4)
>> rte_eal_pci_ioport_map ( struct rte_pci_device* dev, int bar,
>> struct rte_pci_ioport* p ) @@ DPDK_16.04
>> 3rd parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
>> rte_eal_pci_ioport_read ( struct rte_pci_ioport* p, void* data,
>> size_t len, off_t offset ) @@ DPDK_16.04
>> 1st parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
>> rte_eal_pci_ioport_unmap ( struct rte_pci_ioport* p ) @@ DPDK_16.04
>> 1st parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
>> rte_eal_pci_ioport_write ( struct rte_pci_ioport* p, void const* data,
>> size_t len, off_t offset ) @@ DPDK_16.04
>> 1st parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
>>
>>
>> My understanding of the comment for this structure is that it's
>> internal to EAL:
>
> I'm not quite sure that is enough. Cc'ed Panu, the guru on ABI stuff,
> hopefully he could shed some light on it.
>
>> /**
>> * A structure used to access io resources for a pci device.
>> * rte_pci_ioport is arch, os, driver specific, and should not be used
>> outside
>> * of pci ioport api.
>> */
>> struct rte_pci_ioport {
>> ...
>> }
>>
>> So I'd say it's ok to have it integrated for 16.07.
>
> I'll let Thomas to decide it :)
Panu or Thomas, do you have any comment on this?
Thanks,
Olivier
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 1/2] ethdev: add callback to get register size in bytes
2016-05-27 10:28 4% ` Panu Matilainen
@ 2016-05-27 14:43 3% ` Thomas Monjalon
2016-05-30 9:32 0% ` Zyta Szpak
1 sibling, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-05-27 14:43 UTC (permalink / raw)
To: Panu Matilainen; +Cc: zr, remy.horton, dev
2016-05-27 13:28, Panu Matilainen:
> On 05/25/2016 09:36 AM, zr@semihalf.com wrote:
> > @@ -1455,6 +1458,8 @@ struct eth_dev_ops {
> >
> > eth_get_reg_length_t get_reg_length;
> > /**< Get # of registers */
> > + eth_get_reg_width_t get_reg_width;
> > + /**< Get # of bytes in register */
> > eth_get_reg_t get_reg;
> > /**< Get registers */
> > eth_get_eeprom_length_t get_eeprom_length;
>
> This is an ABI break, but maybe it is part of that "driver
> implementation API" which is exempt from the ABI/API policies. Thomas?
Yes dev_ops are for drivers, not for applications.
Thus it should not be impacted by the ABI policy.
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH v2] mbuf: new flag when Vlan is stripped
2016-05-23 8:46 2% ` [dpdk-dev] [PATCH] mbuf: new flag when Vlan " Olivier Matz
2016-05-23 8:59 0% ` Ananyev, Konstantin
2016-05-23 9:20 0% ` Ananyev, Konstantin
@ 2016-05-27 14:33 2% ` Olivier Matz
2 siblings, 0 replies; 200+ results
From: Olivier Matz @ 2016-05-27 14:33 UTC (permalink / raw)
To: dev
Cc: johndale, konstantin.ananyev, helin.zhang, adrien.mazarguil,
rahul.lakkireddy, alejandro.lucero, sony.chacko
The behavior of PKT_RX_VLAN_PKT was not very well defined, resulting in
PMDs not advertising the same flags in similar conditions.
Following discussion in [1], introduce 2 new flags PKT_RX_VLAN_STRIPPED
and PKT_RX_QINQ_STRIPPED that are better defined:
PKT_RX_VLAN_STRIPPED: a vlan has been stripped by the hardware and its
tci is saved in mbuf->vlan_tci. This can only happen if vlan stripping
is enabled in the RX configuration of the PMD.
For now, the old flag PKT_RX_VLAN_PKT is kept but marked as deprecated.
It should be removed from applications and PMDs in a future revision.
This patch also updates the drivers. For PKT_RX_VLAN_PKT:
- e1000, enic, i40e, mlx5, nfp, vmxnet3: done, PKT_RX_VLAN_PKT already
had the same meaning than PKT_RX_VLAN_STRIPPED, minor update is
required.
- fm10k: done, PKT_RX_VLAN_PKT already had the same meaning than
PKT_RX_VLAN_STRIPPED, and vlan stripping is always enabled on fm10k.
- ixgbe: modification done (vector and normal), the old flag was set
when a vlan was recognized, even if vlan stripping was disabled.
- the other drivers do not support vlan stripping.
For PKT_RX_QINQ_PKT, it was only supported on i40e, and the behavior was
already correct, so we can reuse the same bit value for
PKT_RX_QINQ_STRIPPED.
[1] http://dpdk.org/ml/archives/dev/2016-April/037837.html,
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
v1 -> v2:
- fix ixgbe (vector mode) and i40e (normal and vector mode)
- store vlan flags instead of a boolean value in ixgbe rxq, as
suggested by Konstantin
- replay tests on ixgbe (normal + vector) and i40e (normal +
vector). See below.
RFC -> v1:
- fix checkpatch and check-git-log.sh issues
- add a deprecation notice for the old vlan flags
- rebase on head
This patch is tested on ixgbe (normal + vector), i40e (normal +
vector) and igb (hardware is a 82575):
# we use scapy to send packets like this:
# Ether(src="00:01:02:03:04:05", dst="00:1B:21:AB:8F:10")/Dot1Q(vlan=0x666)/IP()/UDP()/Raw("x"*32)
cd dpdk.org/
make config T=x86_64-native-linuxapp-gcc
make -j32
mkdir -p /mnt/huge
mount -t hugetlbfs nodev /mnt/huge
echo 256 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
modprobe uio_pci_generic
# test-pmd is started with vlan stripping, using the rx-vector
# function if available (i40e and ixgbe)
./build/app/testpmd -l 2,4 -- --total-num-mbufs=65536 -i --port-topology=chained \
--disable-hw-vlan-filter
# to disable vlan stripping, add:
--disable-hw-vlan-strip
# to disable the vector mode (it can be checked in debug logs), add:
--enable-rx-cksum
# we run test-pmd in rxonly mode, displaying the packet information.
set fwd rxonly
set verbose 1
start
==== IXGBE normal rx function
# ixgbe: the behavior of the flag PKT_RX_VLAN_PKT is kept as before,
# and the new flag PKT_RX_VLAN_STRIPPED is introduced when vlan stripping
# is enabled and a vlan is stripped.
--- vlan stripping enabled
# packet without vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1 - (outer) L2 type: ETHER - (outer) L3 type: IPV4 - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4 type: Unknown
- Receive queue=0x0
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1 - VLAN tci=0x666 - (outer) L2 type: ETHER - (outer) L3 type: IPV4 - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4 type: Unknown
- Receive queue=0x0
PKT_RX_VLAN_PKT
PKT_RX_VLAN_STRIPPED
--- vlan stripping disabled
# packet without vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1 - (outer) L2 type: ETHER - (outer) L3 type: IPV4 - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4 type: Unknown
- Receive queue=0x0
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x8100 - length=78 - nb_segs=1 - (outer) L2 type: ETHER - (outer) L3 type: IPV4 - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4 type: Unknown
- Receive queue=0x0
PKT_RX_VLAN_PKT
==== IXGBE vector rx function
--- vlan stripping enabled
# packet without vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1Unknown packet type
- Receive queue=0x0
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1 - VLAN tci=0x666Unknown packet type
- Receive queue=0x0
PKT_RX_VLAN_PKT
PKT_RX_VLAN_STRIPPED
--- vlan stripping disabled
# packet without vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1Unknown packet type
- Receive queue=0x0
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x8100 - length=78 - nb_segs=1Unknown packet type
- Receive queue=0x0
PKT_RX_VLAN_PKT
==== I40E normal rx function
--- vlan stripping enabled
# packet without vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1 - (outer) L2 type: ETHER - (outer) L3 type: IPV4_EXT_UNKNOWN - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4 type: Unknown
- Receive queue=0x0
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1 - VLAN tci=0x666 - (outer) L2 type: ETHER - (outer) L3 type: IPV4_EXT_UNKNOWN - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4 type: Unknown
- Receive queue=0x0
PKT_RX_VLAN_PKT
PKT_RX_VLAN_STRIPPED
--- vlan stripping disabled
# packet without vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1 - (outer) L2 type: ETHER - (outer) L3 type: IPV4_EXT_UNKNOWN - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4 type: Unknown
- Receive queue=0x0
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x8100 - length=78 - nb_segs=1 - (outer) L2 type: ETHER - (outer) L3 type: IPV4_EXT_UNKNOWN - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4
==== I40E vector rx function
--- vlan stripping enabled
# packet without vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1Unknown packet type
- Receive queue=0x0
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1 - VLAN tci=0x666Unknown packet type
- Receive queue=0x0
PKT_RX_VLAN_PKT
PKT_RX_VLAN_STRIPPED
--- vlan stripping disabled
# packet without vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1Unknown packet type
- Receive queue=0x0
port 0/queue 0: received 1 packets
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x8100 - length=78 - nb_segs=1Unknown packet type
- Receive queue=0x0
==== IGB
(not retested since RFC patch, but there was no code modification)
--- vlan stripping enabled
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x8100 - length=78 - nb_segs=1 - (outer) L2 type: ETHER - (outer) L3 type: IPV4 - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4 type: Unknown
- Receive queue=0x0
--- vlan stripping disabled
# packet with vlan
src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=74 - nb_segs=1 - VLAN tci=0x666 - (outer) L2 type: ETHER - (outer) L3 type: IPV4 - (outer) L4 type: UDP - Tunnel type: Unknown - Inner L2 type: Unknown - Inner L3 type: Unknown - Inner L4 type: Unknown
- Receive queue=0x0
PKT_RX_VLAN_PKT
PKT_RX_VLAN_STRIPPED
app/test-pmd/rxonly.c | 4 +--
doc/guides/rel_notes/deprecation.rst | 5 ++++
drivers/net/e1000/em_rxtx.c | 3 ++-
drivers/net/e1000/igb_rxtx.c | 3 ++-
drivers/net/enic/enic_rx.c | 2 +-
drivers/net/i40e/i40e_rxtx.c | 4 +--
drivers/net/i40e/i40e_rxtx_vec.c | 2 +-
drivers/net/ixgbe/ixgbe_ethdev.c | 11 ++++++++
drivers/net/ixgbe/ixgbe_rxtx.c | 14 ++++++----
drivers/net/ixgbe/ixgbe_rxtx.h | 2 ++
drivers/net/ixgbe/ixgbe_rxtx_vec.c | 36 +++++++++++++++++---------
drivers/net/mlx5/mlx5_rxtx.c | 6 +++--
drivers/net/nfp/nfp_net.c | 2 +-
drivers/net/vmxnet3/vmxnet3_rxtx.c | 2 +-
lib/librte_mbuf/rte_mbuf.c | 2 ++
lib/librte_mbuf/rte_mbuf.h | 50 ++++++++++++++++++++++++++++++++----
16 files changed, 114 insertions(+), 34 deletions(-)
diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index 14555ab..c69b344 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -156,9 +156,9 @@ pkt_burst_receive(struct fwd_stream *fs)
printf("hash=0x%x ID=0x%x ",
mb->hash.fdir.hash, mb->hash.fdir.id);
}
- if (ol_flags & PKT_RX_VLAN_PKT)
+ if (ol_flags & PKT_RX_VLAN_STRIPPED)
printf(" - VLAN tci=0x%x", mb->vlan_tci);
- if (ol_flags & PKT_RX_QINQ_PKT)
+ if (ol_flags & PKT_RX_QINQ_STRIPPED)
printf(" - QinQ VLAN tci=0x%x, VLAN tci outer=0x%x",
mb->vlan_tci, mb->vlan_tci_outer);
if (mb->packet_type) {
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index ad05eba..2233a90 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -57,3 +57,8 @@ Deprecation Notices
a handle, like the way kernel exposes an fd to user for locating a
specific file, and to keep all major structures internally, so that
we are likely to be free from ABI violations in future.
+
+* The mbuf flags PKT_RX_VLAN_PKT and PKT_RX_QINQ_PKT are deprecated and
+ are respectively replaced by PKT_RX_VLAN_STRIPPED and
+ PKT_RX_QINQ_STRIPPED, that are better described. The old flags and
+ their behavior will be kept in 16.07 and will be removed in 16.11.
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 3d36f21..6d8750a 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -629,7 +629,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
uint64_t pkt_flags;
/* Check if VLAN present */
- pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0);
+ pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
+ PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
return pkt_flags;
}
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 18aeead..9d80a0b 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -729,7 +729,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
uint64_t pkt_flags;
/* Check if VLAN present */
- pkt_flags = (rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
+ pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
+ PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
#if defined(RTE_LIBRTE_IEEE1588)
if (rx_status & E1000_RXD_STAT_TMST)
diff --git a/drivers/net/enic/enic_rx.c b/drivers/net/enic/enic_rx.c
index f92f6bc..6459e97 100644
--- a/drivers/net/enic/enic_rx.c
+++ b/drivers/net/enic/enic_rx.c
@@ -197,7 +197,7 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
/* VLAN stripping */
if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
- pkt_flags |= PKT_RX_VLAN_PKT;
+ pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
mbuf->vlan_tci = enic_cq_rx_desc_vlan(cqrd);
} else {
mbuf->vlan_tci = 0;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index c833aa3..eea246b 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -88,7 +88,7 @@ i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
{
if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
(1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
- mb->ol_flags |= PKT_RX_VLAN_PKT;
+ mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
mb->vlan_tci =
rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1);
PMD_RX_LOG(DEBUG, "Descriptor l2tag1: %u",
@@ -99,7 +99,7 @@ i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) &
(1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) {
- mb->ol_flags |= PKT_RX_QINQ_PKT;
+ mb->ol_flags |= PKT_RX_QINQ_STRIPPED;
mb->vlan_tci_outer = mb->vlan_tci;
mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2);
PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
diff --git a/drivers/net/i40e/i40e_rxtx_vec.c b/drivers/net/i40e/i40e_rxtx_vec.c
index eef80d9..634bd39 100644
--- a/drivers/net/i40e/i40e_rxtx_vec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec.c
@@ -154,7 +154,7 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
/* map rss and vlan type to rss hash and vlan flag */
const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0,
0, 0, 0, 0,
- 0, 0, 0, PKT_RX_VLAN_PKT,
+ 0, 0, 0, PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
0, 0, 0, 0);
const __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0,
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index a2b170b..5f3e047 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1636,6 +1636,7 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
{
struct ixgbe_hwstrip *hwstrip =
IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(dev->data->dev_private);
+ struct ixgbe_rx_queue *rxq;
if (queue >= IXGBE_MAX_RX_QUEUE_NUM)
return;
@@ -1644,6 +1645,16 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
IXGBE_SET_HWSTRIP(hwstrip, queue);
else
IXGBE_CLEAR_HWSTRIP(hwstrip, queue);
+
+ if (queue >= dev->data->nb_rx_queues)
+ return;
+
+ rxq = dev->data->rx_queues[queue];
+
+ if (on)
+ rxq->vlan_flags = PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+ else
+ rxq->vlan_flags = PKT_RX_VLAN_PKT;
}
static void
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 9c6eaf2..5a7064c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1221,7 +1221,7 @@ ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
}
static inline uint64_t
-rx_desc_status_to_pkt_flags(uint32_t rx_status)
+rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
{
uint64_t pkt_flags;
@@ -1230,7 +1230,7 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
* Do not check whether L3/L4 rx checksum done by NIC or not,
* That can be found from rte_eth_rxmode.hw_ip_checksum flag
*/
- pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
+ pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? vlan_flags : 0;
#ifdef RTE_LIBRTE_IEEE1588
if (rx_status & IXGBE_RXD_STAT_TMST)
@@ -1287,6 +1287,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
uint32_t pkt_info[LOOK_AHEAD];
int i, j, nb_rx = 0;
uint32_t status;
+ uint64_t vlan_flags = rxq->vlan_flags;
/* get references to current descriptor and S/W ring entry */
rxdp = &rxq->rx_ring[rxq->rx_tail];
@@ -1328,7 +1329,8 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
/* convert descriptor fields to rte mbuf flags */
- pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
+ pkt_flags = rx_desc_status_to_pkt_flags(s[j],
+ vlan_flags);
pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
((uint16_t)pkt_info[j]);
@@ -1544,6 +1546,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_rx;
uint16_t nb_hold;
uint64_t pkt_flags;
+ uint64_t vlan_flags;
nb_rx = 0;
nb_hold = 0;
@@ -1551,6 +1554,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
rx_id = rxq->rx_tail;
rx_ring = rxq->rx_ring;
sw_ring = rxq->sw_ring;
+ vlan_flags = rxq->vlan_flags;
while (nb_rx < nb_pkts) {
/*
* The order of operations here is important as the DD status
@@ -1660,7 +1664,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
/* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
- pkt_flags = rx_desc_status_to_pkt_flags(staterr);
+ pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
pkt_flags = pkt_flags |
ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
@@ -1753,7 +1757,7 @@ ixgbe_fill_cluster_head_buf(
*/
head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
- pkt_flags = rx_desc_status_to_pkt_flags(staterr);
+ pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
head->ol_flags = pkt_flags;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 3691a19..2608b36 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -146,6 +146,8 @@ struct ixgbe_rx_queue {
uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
uint8_t rx_deferred_start; /**< not in global dev start. */
+ /** flags to set in mbuf when a vlan is detected. */
+ uint64_t vlan_flags;
/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
struct rte_mbuf fake_mbuf;
/** hold packets to return to application */
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
index e97ea82..d895bf1 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
@@ -140,10 +140,9 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
*/
#ifdef RTE_IXGBE_RX_OLFLAGS_ENABLE
-#define VTAG_SHIFT (3)
-
static inline void
-desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
+desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
+ struct rte_mbuf **rx_pkts)
{
__m128i ptype0, ptype1, vtag0, vtag1;
union {
@@ -151,12 +150,6 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
uint64_t dword;
} vol;
- /* pkt type + vlan olflags mask */
- const __m128i pkttype_msk = _mm_set_epi16(
- 0x0000, 0x0000, 0x0000, 0x0000,
- PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
- PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT);
-
/* mask everything except rss type */
const __m128i rsstype_msk = _mm_set_epi16(
0x0000, 0x0000, 0x0000, 0x0000,
@@ -168,6 +161,19 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);
+ /* mask everything except vlan present bit */
+ const __m128i vlan_msk = _mm_set_epi16(
+ 0x0000, 0x0000,
+ 0x0000, 0x0000,
+ IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
+ IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP);
+ /* map vlan present (0x8) to ol_flags */
+ const __m128i vlan_map = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, vlan_flags,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0);
+
ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]);
@@ -178,8 +184,8 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
- vtag1 = _mm_srli_epi16(vtag1, VTAG_SHIFT);
- vtag1 = _mm_and_si128(vtag1, pkttype_msk);
+ vtag1 = _mm_and_si128(vtag1, vlan_msk);
+ vtag1 = _mm_shuffle_epi8(vlan_map, vtag1);
vtag1 = _mm_or_si128(ptype0, vtag1);
vol.dword = _mm_cvtsi128_si64(vtag1);
@@ -221,6 +227,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
0, 0 /* ignore pkt_type field */
);
__m128i dd_check, eop_check;
+ uint8_t vlan_flags;
/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
@@ -270,6 +277,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
*/
sw_ring = &rxq->sw_ring[rxq->rx_tail];
+ /* ensure these 2 flags are in the lower 8 bits */
+ RTE_BUILD_BUG_ON(((PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED) &
+ 0xffffffffffffff00ULL) != 0ULL);
+ vlan_flags = rxq->vlan_flags & 0xff;
+
/* A. load 4 packet in one loop
* [A*. mask out 4 unused dirty field in desc]
* B. copy 4 mbuf point from swring to rx_pkts
@@ -330,7 +342,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
/* set ol_flags with vlan packet type */
- desc_to_olflags_v(descs, &rx_pkts[pos]);
+ desc_to_olflags_v(descs, vlan_flags, &rx_pkts[pos]);
/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 29bfcec..d5b2286 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1051,7 +1051,8 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
pkt_buf->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS
if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
- pkt_buf->ol_flags |= PKT_RX_VLAN_PKT;
+ pkt_buf->ol_flags |= PKT_RX_VLAN_PKT |
+ PKT_RX_VLAN_STRIPPED;
pkt_buf->vlan_tci = vlan_tci;
}
#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */
@@ -1207,7 +1208,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS
if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
- seg->ol_flags |= PKT_RX_VLAN_PKT;
+ seg->ol_flags |= PKT_RX_VLAN_PKT |
+ PKT_RX_VLAN_STRIPPED;
seg->vlan_tci = vlan_tci;
}
#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index ea5a2a3..5c9f350 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -1800,7 +1800,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) &&
(hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) {
mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan);
- mb->ol_flags |= PKT_RX_VLAN_PKT;
+ mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
}
/* Adding the mbuff to the mbuff array passed by the app */
diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 9fe8752..ccafc0c 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -579,7 +579,7 @@ vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
{
/* Check for hardware stripped VLAN tag */
if (rcd->ts) {
- rxm->ol_flags |= PKT_RX_VLAN_PKT;
+ rxm->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
}
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index eec1456..2ece742 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -258,8 +258,10 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
/* case PKT_RX_HBUF_OVERFLOW: return "PKT_RX_HBUF_OVERFLOW"; */
/* case PKT_RX_RECIP_ERR: return "PKT_RX_RECIP_ERR"; */
/* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */
+ case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
+ case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
default: return NULL;
}
}
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 11fa06d..76b4f55 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -79,7 +79,16 @@ extern "C" {
* Keep these flags synchronized with rte_get_rx_ol_flag_name() and
* rte_get_tx_ol_flag_name().
*/
-#define PKT_RX_VLAN_PKT (1ULL << 0) /**< RX packet is a 802.1q VLAN packet. */
+
+/**
+ * Deprecated.
+ * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
+ * the packet is recognized as a VLAN, but the behavior between PMDs
+ * was not the same. This flag is kept for some time to avoid breaking
+ * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
+ */
+#define PKT_RX_VLAN_PKT (1ULL << 0)
+
#define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */
#define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */
#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */
@@ -89,11 +98,37 @@ extern "C" {
#define PKT_RX_HBUF_OVERFLOW (0ULL << 0) /**< Header buffer overflow. */
#define PKT_RX_RECIP_ERR (0ULL << 0) /**< Hardware processing error. */
#define PKT_RX_MAC_ERR (0ULL << 0) /**< MAC error. */
+
+/**
+ * A vlan has been stripped by the hardware and its tci is saved in
+ * mbuf->vlan_tci. This can only happen if vlan stripping is enabled
+ * in the RX configuration of the PMD.
+ */
+#define PKT_RX_VLAN_STRIPPED (1ULL << 6)
+
+/* hole, some bits can be reused here */
+
#define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */
#define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
#define PKT_RX_FDIR_ID (1ULL << 13) /**< FD id reported if FDIR match. */
#define PKT_RX_FDIR_FLX (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
-#define PKT_RX_QINQ_PKT (1ULL << 15) /**< RX packet with double VLAN stripped. */
+
+/**
+ * The 2 vlans have been stripped by the hardware and their tci are
+ * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
+ * This can only happen if vlan stripping is enabled in the RX
+ * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
+ * must also be set.
+ */
+#define PKT_RX_QINQ_STRIPPED (1ULL << 15)
+
+/**
+ * Deprecated.
+ * RX packet with double VLAN stripped.
+ * This flag is replaced by PKT_RX_QINQ_STRIPPED.
+ */
+#define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
+
/* add new RX flags here */
/* add new TX flags here */
@@ -761,7 +796,10 @@ struct rte_mbuf {
/*
* The packet type, which is the combination of outer/inner L2, L3, L4
- * and tunnel types.
+ * and tunnel types. The packet_type is about data really present in the
+ * mbuf. Example: if vlan stripping is enabled, a received vlan packet
+ * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
+ * vlan is stripped from the data.
*/
union {
uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
@@ -778,7 +816,8 @@ struct rte_mbuf {
uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
uint16_t data_len; /**< Amount of data in segment buffer. */
- uint16_t vlan_tci; /**< VLAN Tag Control Identifier (CPU order) */
+ /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
+ uint16_t vlan_tci;
union {
uint32_t rss; /**< RSS hash result if RSS enabled */
@@ -804,7 +843,8 @@ struct rte_mbuf {
uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */
- uint16_t vlan_tci_outer; /**< Outer VLAN Tag Control Identifier (CPU order) */
+ /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
+ uint16_t vlan_tci_outer;
/* second cache line - fields only used in slow path or on TX */
MARKER cacheline1 __rte_cache_min_aligned;
--
2.8.0.rc3
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] [PATCH 1/2] ethdev: add callback to get register size in bytes
@ 2016-05-27 10:28 4% ` Panu Matilainen
2016-05-27 14:43 3% ` Thomas Monjalon
2016-05-30 9:32 0% ` Zyta Szpak
0 siblings, 2 replies; 200+ results
From: Panu Matilainen @ 2016-05-27 10:28 UTC (permalink / raw)
To: zr, remy.horton, thomas.monjalon; +Cc: dev
On 05/25/2016 09:36 AM, zr@semihalf.com wrote:
> From: Zyta Szpak <zr@semihalf.com>
>
> Version 2 of fixing the fixed register width assumption.
> rte_eth_dev_get_reg_length and rte_eth_dev_get_reg callbacks
> do not provide register size to the app in any way. It is
> needed to allocate proper number of bytes before retrieving
> registers content with rte_eth_dev_get_reg.
>
> Signed-off-by: Zyta Szpak <zr@semihalf.com>
> ---
> lib/librte_ether/rte_ethdev.c | 12 ++++++++++++
> lib/librte_ether/rte_ethdev.h | 18 ++++++++++++++++++
> 2 files changed, 30 insertions(+)
>
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index a31018e..e0765f8 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -3231,6 +3231,18 @@ rte_eth_dev_get_reg_length(uint8_t port_id)
> }
>
> int
> +rte_eth_dev_get_reg_width(uint8_t port_id)
> +{
> + struct rte_eth_dev *dev;
> +
> + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> +
> + dev = &rte_eth_devices[port_id];
> + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg_width, -ENOTSUP);
> + return (*dev->dev_ops->get_reg_width)(dev);
> +}
> +
> +int
> rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info)
> {
> struct rte_eth_dev *dev;
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 2757510..552eaed 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -1292,6 +1292,9 @@ typedef int (*eth_timesync_write_time)(struct rte_eth_dev *dev,
> typedef int (*eth_get_reg_length_t)(struct rte_eth_dev *dev);
> /**< @internal Retrieve device register count */
>
> +typedef int (*eth_get_reg_width_t)(struct rte_eth_dev *dev);
> +/**< @internal Retrieve device register byte number */
> +
> typedef int (*eth_get_reg_t)(struct rte_eth_dev *dev,
> struct rte_dev_reg_info *info);
> /**< @internal Retrieve registers */
> @@ -1455,6 +1458,8 @@ struct eth_dev_ops {
>
> eth_get_reg_length_t get_reg_length;
> /**< Get # of registers */
> + eth_get_reg_width_t get_reg_width;
> + /**< Get # of bytes in register */
> eth_get_reg_t get_reg;
> /**< Get registers */
> eth_get_eeprom_length_t get_eeprom_length;
This is an ABI break, but maybe it is part of that "driver
implementation API" which is exempt from the ABI/API policies. Thomas?
> @@ -3971,6 +3976,19 @@ int rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id,
> */
> int rte_eth_dev_get_reg_length(uint8_t port_id);
>
> +/*
> + * Retrieve the number of bytes in register for a specific device
> + *
> + * @param port_id
> + * The port identifier of the Ethernet device.
> + * @return
> + * - (>=0) number of registers if successful.
> + * - (-ENOTSUP) if hardware doesn't support.
> + * - (-ENODEV) if *port_id* invalid.
> + * - others depends on the specific operations implementation.
> + */
> +int rte_eth_dev_get_reg_width(uint8_t port_id);
> +
> /**
> * Retrieve device registers and register attributes
> *
The function needs to be exported via rte_ether_version.map as well.
- Panu -
>
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring
2016-05-26 17:04 4% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Rich Lane
@ 2016-05-27 1:36 4% ` Yuanhan Liu
0 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-27 1:36 UTC (permalink / raw)
To: Rich Lane
Cc: dev, Thomas Monjalon, huawei.xie, Panu Matilainen,
Tetsuya Mukawa, Traynor Kevin
On Thu, May 26, 2016 at 10:04:23AM -0700, Rich Lane wrote:
> On Thu, May 12, 2016 at 10:24 PM, Yuanhan Liu <yuanhan.liu@linux.intel.com>
> wrote:
>
> v2: - exported ifname as well to fix a vhost-pmd issue reported
> by Rich
> - separated the big patch that introduces several new APIs
> into some small patches.
> - updated release note
> - updated version.map
>
>
> Tested-by: Rich Lane <rich.lane@bigswitch.com>
> Acked-by: Rich Lane <rich.lane@bigswitch.com>
Rich, appreciate your time for reviewing and testing!
--yliu
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring
2016-05-13 5:24 8% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Yuanhan Liu
` (3 preceding siblings ...)
2016-05-13 5:25 4% ` [dpdk-dev] [PATCH v2 17/19] vhost: reserve few more space for future extension Yuanhan Liu
@ 2016-05-26 17:04 4% ` Rich Lane
2016-05-27 1:36 4% ` Yuanhan Liu
2016-06-07 3:51 9% ` [dpdk-dev] [PATCH v3 00/20] " Yuanhan Liu
5 siblings, 1 reply; 200+ results
From: Rich Lane @ 2016-05-26 17:04 UTC (permalink / raw)
To: Yuanhan Liu
Cc: dev, Thomas Monjalon, huawei.xie, Panu Matilainen,
Tetsuya Mukawa, Traynor Kevin
On Thu, May 12, 2016 at 10:24 PM, Yuanhan Liu <yuanhan.liu@linux.intel.com>
wrote:
> v2: - exported ifname as well to fix a vhost-pmd issue reported
> by Rich
> - separated the big patch that introduces several new APIs
> into some small patches.
> - updated release note
> - updated version.map
>
Tested-by: Rich Lane <rich.lane@bigswitch.com>
Acked-by: Rich Lane <rich.lane@bigswitch.com>
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH] ethdev: change comments of VLAN type
@ 2016-05-26 7:28 4% Beilei Xing
0 siblings, 0 replies; 200+ results
From: Beilei Xing @ 2016-05-26 7:28 UTC (permalink / raw)
To: jingjing.wu; +Cc: dev, Beilei Xing
If the packet carries a single VLAN header, it is treated as the
outer header.
So change the comments of inner VLAN and outer VLAN.
Signed-off-by: Beilei Xing <beilei.xing@intel.com>
---
doc/guides/rel_notes/release_16_07.rst | 3 +++
lib/librte_ether/rte_ethdev.h | 4 ++--
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 30e78d4..29db86c 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -116,6 +116,9 @@ API Changes
ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+* The comments of ``ETH_VLAN_TYPE_INNER`` and ``ETH_VLAN_TYPE_OUTER`` in
+ ``rte_vlan_type`` are changed.
+
ABI Changes
-----------
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 2757510..c5c29fb 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -363,8 +363,8 @@ struct rte_eth_rxmode {
*/
enum rte_vlan_type {
ETH_VLAN_TYPE_UNKNOWN = 0,
- ETH_VLAN_TYPE_INNER, /**< Single VLAN, or inner VLAN. */
- ETH_VLAN_TYPE_OUTER, /**< Outer VLAN. */
+ ETH_VLAN_TYPE_INNER, /**< Inner VLAN. */
+ ETH_VLAN_TYPE_OUTER, /**< Single VLAN, or outer VLAN. */
ETH_VLAN_TYPE_MAX,
};
--
2.5.0
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v4] Pci: Add the class_id support
2016-05-19 13:17 7% ` [dpdk-dev] [PATCH v3] " Ziye Yang
@ 2016-05-24 12:50 7% ` Ziye Yang
0 siblings, 0 replies; 200+ results
From: Ziye Yang @ 2016-05-24 12:50 UTC (permalink / raw)
To: dev
This patch is used to add the class_id (class_code,
subclass_code, programming_interface) support for
pci_device probe. With this patch, it will be
flexible for users to probe a class of devices
by class_id.
Signed-off-by: Ziye Yang <ziye.yang@intel.com>
---
Changes in v4: adjust title name and change RTE_PCI_DEVICE macro
doc/guides/rel_notes/deprecation.rst | 6 ------
lib/librte_eal/bsdapp/eal/eal_pci.c | 5 +++++
lib/librte_eal/common/eal_common_pci.c | 3 +++
lib/librte_eal/common/include/rte_pci.h | 4 ++++
lib/librte_eal/linuxapp/eal/eal_pci.c | 10 ++++++++++
5 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index ad05eba..a300508 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -20,12 +20,6 @@ Deprecation Notices
do not need to care about the kind of devices that are being used, making it
easier to add new buses later.
-* ABI changes are planned for struct rte_pci_id, i.e., add new field ``class``.
- This new added ``class`` field can be used to probe pci device by class
- related info. This change should impact size of struct rte_pci_id and struct
- rte_pci_device. The release 16.04 does not contain these ABI changes, but
- release 16.07 will.
-
* The xstats API and rte_eth_xstats struct will be changed to allow retrieval
of values without any string copies or parsing.
No backwards compatibility is planned, as it would require code duplication
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 2d16d78..7fdd6f1 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -278,6 +278,11 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
/* get subsystem_device id */
dev->id.subsystem_device_id = conf->pc_subdevice;
+ /* get class id */
+ dev->id.class_id = (conf->pc_class << 16) |
+ (conf->pc_subclass << 8) |
+ (conf->pc_progif);
+
/* TODO: get max_vfs */
dev->max_vfs = 0;
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index 3cae4cb..6c3117d 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -162,6 +162,9 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
if (id_table->subsystem_device_id != dev->id.subsystem_device_id &&
id_table->subsystem_device_id != PCI_ANY_ID)
continue;
+ if (id_table->class_id != dev->id.class_id &&
+ id_table->class_id != RTE_CLASS_ANY_ID)
+ continue;
struct rte_pci_addr *loc = &dev->addr;
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 8fa2712..debc9ca 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -125,6 +125,7 @@ struct rte_pci_resource {
* table of these IDs for each device that it supports.
*/
struct rte_pci_id {
+ uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */
uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
@@ -170,10 +171,12 @@ struct rte_pci_device {
/** Any PCI device identifier (vendor, device, ...) */
#define PCI_ANY_ID (0xffff)
+#define RTE_CLASS_ANY_ID (0xffffff)
#ifdef __cplusplus
/** C++ macro used to help building up tables of device IDs */
#define RTE_PCI_DEVICE(vend, dev) \
+ RTE_CLASS_ANY_ID, \
(vend), \
(dev), \
PCI_ANY_ID, \
@@ -181,6 +184,7 @@ struct rte_pci_device {
#else
/** Macro used to help building up tables of device IDs */
#define RTE_PCI_DEVICE(vend, dev) \
+ .class_id = RTE_CLASS_ANY_ID, \
.vendor_id = (vend), \
.device_id = (dev), \
.subsystem_vendor_id = PCI_ANY_ID, \
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index bdc08a0..e6f0f13 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -306,6 +306,16 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
}
dev->id.subsystem_device_id = (uint16_t)tmp;
+ /* get class_id */
+ snprintf(filename, sizeof(filename), "%s/class",
+ dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ /* the least 24 bits are valid: class, subclass, program interface */
+ dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID;
+
/* get max_vfs */
dev->max_vfs = 0;
snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
--
1.9.3
^ permalink raw reply [relevance 7%]
* Re: [dpdk-dev] [PATCH v4 3/9] librte_ether: add new fields to rte_eth_dev_info struct
2016-05-23 22:24 3% ` Stephen Hemminger
@ 2016-05-24 8:09 3% ` Pattan, Reshma
0 siblings, 0 replies; 200+ results
From: Pattan, Reshma @ 2016-05-24 8:09 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: dev
> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Monday, May 23, 2016 11:25 PM
> To: Pattan, Reshma <reshma.pattan@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 3/9] librte_ether: add new fields to
> rte_eth_dev_info struct
>
> On Mon, 23 May 2016 22:38:26 +0100
> Reshma Pattan <reshma.pattan@intel.com> wrote:
>
> > Add new fields to rte_eth_dev_info struct New fields nb_rx_queues and
> > nb_tx_queues are added to rte_eth_dev_info structure.
> > Changes to API rte_eth_dev_info_get() are done to update these new
> > fields to rte_eth_dev_info object.
> >
> > Signed-off-by: Reshma Pattan <reshma.pattan@intel.com>
>
> This is an ABI break because rte_dev_info_get will clobber the the stack of the
> caller if the caller thinks dev_info is old size.
Yes and the ABI breakage was announced as RFC earlier, please check the below mails, now this is formal patch for the same.
http://dpdk.org/ml/archives/dev/2016-April/037458.html
http://dpdk.org/ml/archives/dev/2016-April/037459.html
http://dpdk.org/ml/archives/dev/2016-April/037460.html
Thanks,
Reshma
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH v2 5/7] eal/linux: mmap ioports on ppc64
2016-05-23 13:40 3% ` Olivier Matz
@ 2016-05-24 5:15 3% ` Yuanhan Liu
2016-05-30 8:45 0% ` Olivier Matz
0 siblings, 1 reply; 200+ results
From: Yuanhan Liu @ 2016-05-24 5:15 UTC (permalink / raw)
To: Olivier Matz
Cc: David Marchand, dev, Chao Zhu, Xie, Huawei, Panu Matilainen,
Thomas Monjalon
On Mon, May 23, 2016 at 03:40:58PM +0200, Olivier Matz wrote:
> For reference, here is the report of the ABI checker for EAL:
>
> [−] struct rte_pci_ioport (2)
>
> 1 Field len has been added to this type.
> 1) This field will not be initialized by old clients.
> 2) Size of the inclusive type has been changed.
> NOTE: this field should be accessed only from the new library
> functions, otherwise it may result in crash or incorrect behavior
> of applications.
> 2 Size of this type has been changed from 16 bytes to 24 bytes.
> The fields or parameters of such data type may be incorrectly
> initialized or accessed by old client applications.
>
> [−] affected symbols (4)
> rte_eal_pci_ioport_map ( struct rte_pci_device* dev, int bar,
> struct rte_pci_ioport* p ) @@ DPDK_16.04
> 3rd parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
> rte_eal_pci_ioport_read ( struct rte_pci_ioport* p, void* data,
> size_t len, off_t offset ) @@ DPDK_16.04
> 1st parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
> rte_eal_pci_ioport_unmap ( struct rte_pci_ioport* p ) @@ DPDK_16.04
> 1st parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
> rte_eal_pci_ioport_write ( struct rte_pci_ioport* p, void const* data,
> size_t len, off_t offset ) @@ DPDK_16.04
> 1st parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
>
>
> My understanding of the comment for this structure is that it's
> internal to EAL:
I'm not quite sure that is enough. Cc'ed Panu, the guru on ABI stuff,
hopefully he could shed some light on it.
> /**
> * A structure used to access io resources for a pci device.
> * rte_pci_ioport is arch, os, driver specific, and should not be used
> outside
> * of pci ioport api.
> */
> struct rte_pci_ioport {
> ...
> }
>
> So I'd say it's ok to have it integrated for 16.07.
I'll let Thomas to decide it :)
--yliu
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH v4 3/9] librte_ether: add new fields to rte_eth_dev_info struct
@ 2016-05-23 22:24 3% ` Stephen Hemminger
2016-05-24 8:09 3% ` Pattan, Reshma
0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2016-05-23 22:24 UTC (permalink / raw)
To: Reshma Pattan; +Cc: dev
On Mon, 23 May 2016 22:38:26 +0100
Reshma Pattan <reshma.pattan@intel.com> wrote:
> Add new fields to rte_eth_dev_info struct
> New fields nb_rx_queues and nb_tx_queues are added to
> rte_eth_dev_info structure.
> Changes to API rte_eth_dev_info_get() are done to update
> these new fields to rte_eth_dev_info object.
>
> Signed-off-by: Reshma Pattan <reshma.pattan@intel.com>
This is an ABI break because rte_dev_info_get will clobber the
the stack of the caller if the caller thinks dev_info is old size.
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH v4 8/9] doc: update doc for packet capture framework
2016-05-23 21:38 3% ` [dpdk-dev] [PATCH v4 0/9] add " Reshma Pattan
@ 2016-05-23 21:38 6% ` Reshma Pattan
2016-05-23 21:38 9% ` [dpdk-dev] [PATCH v4 9/9] doc: announce ABI change for rte_eth_dev_info structure Reshma Pattan
2 siblings, 0 replies; 200+ results
From: Reshma Pattan @ 2016-05-23 21:38 UTC (permalink / raw)
To: dev; +Cc: Reshma Pattan
Added programmers guide for librte_pdump.
Added sample application guide for app/pdump application.
Updated release note for packet capture framework changes.
Signed-off-by: Reshma Pattan <reshma.pattan@intel.com>
---
MAINTAINERS | 3 +
doc/guides/prog_guide/index.rst | 1 +
doc/guides/prog_guide/pdump_library.rst | 106 +++++++++++++++++++++++++++++
doc/guides/rel_notes/release_16_07.rst | 11 +++
doc/guides/sample_app_ug/index.rst | 1 +
doc/guides/sample_app_ug/pdump.rst | 115 ++++++++++++++++++++++++++++++++
6 files changed, 237 insertions(+)
create mode 100644 doc/guides/prog_guide/pdump_library.rst
create mode 100644 doc/guides/sample_app_ug/pdump.rst
diff --git a/MAINTAINERS b/MAINTAINERS
index ae706b9..8b00f41 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -437,6 +437,9 @@ Pdump
M: Reshma Pattan <reshma.pattan@intel.com>
F: lib/librte_pdump/
F: app/pdump/
+F: doc/guides/prog_guide/pdump_library.rst
+F: doc/guides/sample_app_ug/pdump.rst
+
Hierarchical scheduler
M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst
index b862d0c..4caf969 100644
--- a/doc/guides/prog_guide/index.rst
+++ b/doc/guides/prog_guide/index.rst
@@ -71,6 +71,7 @@ Programmer's Guide
writing_efficient_code
profile_app
glossary
+ pdump_library
**Figures**
diff --git a/doc/guides/prog_guide/pdump_library.rst b/doc/guides/prog_guide/pdump_library.rst
new file mode 100644
index 0000000..8d9ef29
--- /dev/null
+++ b/doc/guides/prog_guide/pdump_library.rst
@@ -0,0 +1,106 @@
+.. BSD LICENSE
+ Copyright(c) 2016 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.. _Pdump_Library:
+
+pdump Library
+=============
+
+The ``pdump`` library provides the framework for the packet capturing on DPDK.
+Library provides the below APIs to initialize the packet capture framework, to enable
+or disable the packet capture and to un initialize the packet capture framework.
+
+``rte_pdump_init()``:
+This API initializes the packet capture framework.
+
+``rte_pdump_enable()``:
+This API enables the packet capture on a given port and the queue.
+Note: filter option in the API is the place holder for the future enhancements.
+
+``rte_pdump_enable_by_deviceid()``:
+This API enables the packet capture on a given device id(``vdev name or pci address``) and the queue.
+Note: filter option in the API is the place holder for the future enhancements.
+
+``rte_pdump_disable()``:
+This API disables the packet capture on a given port and the queue.
+
+``rte_pdump_disable_by_deviceid()``:
+This API disables the packet capture on a given device id(``vdev name or pci address``) and the queue.
+
+``rte_pdump_uninit()``:
+This API un initializes the packet capture framework.
+
+
+Operation
+---------
+
+The ``pdump`` library works on the server and the client based model. The sever is responsible for enabling or
+disabling the packet capture and the clients are responsible to request enable or disable the packet capture.
+
+The packet capture framework, as part of it's initialization, creates the pthread and creates the server socket in
+the pthread. The application who calls the framework initialization first, will have the server socket created and
+the further calls to the framework initialization by same application or other applications is not allowed i.e. only
+one server socket is allowed on the system. So the other applications, can only request for enabling or disabling of
+the packet capture and the client socket is created to send the request to the server. The server socket will be
+listening to the client requests for enabling or disabling the packet capture.
+
+
+Implementation Details
+----------------------
+
+The library API ``rte_pdump_init()``, initializes the packet capture framework by creating the pthread and the server
+socket.The server socket in the pthread context will be listening to the client requests to enable or disable the
+packet capture. Who ever calls this API first will have the server socket created, the subsequent calls to this APIs
+will not create any further server socket. i.e. only one server socket is allowed.
+
+These library APIs ``rte_pdump_enable()/rte_pdump_enable_by_deviceid()`` enables the packet capture, on each call to
+these APIs, library creates the separate client socket, creates the pdump enable request and send the request to the
+server. Server who is listening on the socket will take the request, enable the packet capture by registering the
+Ethernet rx/tx callbacks for the given port or device_id and queue combinations. Then server will mirror the packets
+to the new mempool and enqueue them to the ring that clients has passed in to these APIs, server also sends the response
+back to the client about the status of the request that was processed. After the response is received from the server,
+client socket is closed.
+
+The library APIs ``rte_pdump_disable()/rte_pdump_disable_by_deviceid()`` disables the packet capture, on each call to
+these APIs, library creates the separate client socket, creates the pdump disable request and send the request to the
+server. Server who is listening on the socket will take the request, disable the packet capture by removing the
+Ethernet rx/tx callbacks for the given port or device_id and queue combinations. Server sends the response back to the
+client about the status of the request that was processed. After the response is received from the server, client
+socket is closed.
+
+The library API ``rte_pdump_uninit()``, un initializes the packet capture framework by closing the pthread and the
+server socket.
+
+
+Use Case: Packet Capturing
+--------------------------
+
+DPDK ``app/pdump`` tool is developed based on this library to capture the packets in DPDK.
+Users can use this library to develop their own packet capturing application.
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 30e78d4..e3cd64a 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -47,6 +47,10 @@ New Features
* Dropped specific Xen Dom0 code.
* Dropped specific anonymous mempool code in testpmd.
+* **Added packet capture framework.**
+
+ * The new library ``librte_pdump`` is added to provide packet capture APIs.
+ * The new ``app/pdump`` tool is added to capture packets on DPDK.
Resolved Issues
---------------
@@ -116,6 +120,11 @@ API Changes
ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+* Function ``rte_eth_dev_get_port_by_name`` changed to public API.
+
+* Function ``rte_eth_dev_info_get`` updated to return new fields ``nb_rx_queues`` and ``nb_tx_queues``
+ in ``rte_eth_dev_info`` object.
+
ABI Changes
-----------
@@ -127,6 +136,8 @@ ABI Changes
* The ``rte_port_source_params`` structure has new fields to support PCAP file.
It was already in release 16.04 with ``RTE_NEXT_ABI`` flag.
+* The ``rte_eth_dev_info`` structure has new fields ``nb_rx_queues`` and ``nb_tx_queues``
+ to support number of queues configured by software.
Shared Library Versions
-----------------------
diff --git a/doc/guides/sample_app_ug/index.rst b/doc/guides/sample_app_ug/index.rst
index 930f68c..96bb317 100644
--- a/doc/guides/sample_app_ug/index.rst
+++ b/doc/guides/sample_app_ug/index.rst
@@ -76,6 +76,7 @@ Sample Applications User Guide
ptpclient
performance_thread
ipsec_secgw
+ pdump
**Figures**
diff --git a/doc/guides/sample_app_ug/pdump.rst b/doc/guides/sample_app_ug/pdump.rst
new file mode 100644
index 0000000..89b14ec
--- /dev/null
+++ b/doc/guides/sample_app_ug/pdump.rst
@@ -0,0 +1,115 @@
+
+.. BSD LICENSE
+ Copyright(c) 2016 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+dpdk_pdump Application
+======================
+
+The ``dpdk_pdump`` application is a Data Plane Development Kit (DPDK) application that runs as a DPDK secondary process and
+is capable of enabling packet capture on dpdk ports.
+
+
+Running the Application
+-----------------------
+
+The application has a ``--pdump`` command line option with various sub arguments:
+
+.. code-block:: console
+
+ ./build/app/dpdk_pdump --
+ --pdump '(port=<port id> | device_id=<pci id or vdev name>),
+ (queue=<queue_id>),
+ (rx-dev=<iface or pcap file> |
+ tx-dev=<iface or pcap file>),
+ [ring-size=<ring size>],
+ [mbuf-size=<mbuf data size>],
+ [total-num-mbufs=<number of mbufs>]'
+
+Note:
+
+* Parameters inside the parentheses represents mandatory parameters.
+
+* Parameters inside the square brackets represents optional parameters.
+
+Multiple instances of ``--pdump`` can be passed to capture packets on different port and queue combinations.
+
+
+Parameters
+~~~~~~~~~~
+
+``port``:
+Port id of the eth device on which packets should be captured.
+
+``device_id``:
+PCI address (or) name of the eth device on which packets should be captured.
+
+``queue``:
+Queue id of the eth device on which packets should be captured. The user can pass a queue value of ``*`` to enable
+packet capture on all queues of the eth device.
+
+``rx-dev``:
+Can be either a pcap file name or any Linux iface.
+
+``tx-dev``:
+Can be either a pcap file name or any Linux iface.
+
+ .. Note::
+
+ * To receive ingress packets only, ``rx-dev`` should be passed.
+
+ * To receive egress packets only, ``tx-dev`` should be passed.
+
+ * To receive ingress and egress packets separately ``rx-dev`` and ``tx-dev``
+ should both be passed with the different file names or the Linux iface names.
+
+ * To receive ingress and egress packets separately ``rx-dev`` and ``tx-dev``
+ should both be passed with the same file names or the the Linux iface names.
+
+``ring-size``:
+Size of the ring. This value is used internally for ring creation. The ring will be used to enqueue the packets from
+the primary application to the secondary. This is an optional parameter with default size 16384.
+
+``mbuf-size``:
+Size of the mbuf data. This is used internally for mempool creation. Ideally this value must be same as
+the primary application's mempool's mbuf data size which is used for packet RX. This is an optional parameter with
+default size 2176.
+
+``total-num-mbufs``:
+Total number mbufs in mempool. This is used internally for mempool creation. This is an optional parameter with default
+value 65535.
+
+
+Example
+-------
+
+.. code-block:: console
+
+ $ sudo ./build/app/dpdk_pdump -- --pdump 'port=0,queue=*,rx-dev=/tmp/rx.pcap'
--
2.5.0
^ permalink raw reply [relevance 6%]
* [dpdk-dev] [PATCH v4 9/9] doc: announce ABI change for rte_eth_dev_info structure
2016-05-23 21:38 3% ` [dpdk-dev] [PATCH v4 0/9] add " Reshma Pattan
2016-05-23 21:38 6% ` [dpdk-dev] [PATCH v4 8/9] doc: update doc for packet capture framework Reshma Pattan
@ 2016-05-23 21:38 9% ` Reshma Pattan
2 siblings, 0 replies; 200+ results
From: Reshma Pattan @ 2016-05-23 21:38 UTC (permalink / raw)
To: dev; +Cc: Reshma Pattan
New fields nb_rx_queues and nb_tx_queues will be added to
rte_eth_dev_info structure.
Changes to API rte_eth_dev_info_get() will be done to update
these new fields to rte_eth_dev_info object.
Signed-off-by: Reshma Pattan <reshma.pattan@intel.com>
---
doc/guides/rel_notes/deprecation.rst | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index ad05eba..04316fb 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -57,3 +57,9 @@ Deprecation Notices
a handle, like the way kernel exposes an fd to user for locating a
specific file, and to keep all major structures internally, so that
we are likely to be free from ABI violations in future.
+
+* A librte_ether public structure ``rte_eth_dev_info`` will be changed in 16.07.
+ The proposed change will add new parameters ``nb_rx_queues``, ``nb_tx_queues``
+ to the structure. These are the number of queues configured by software.
+ Modification to definition of ``rte_eth_dev_info_get()`` will be done
+ to update new parameters to ``rte_eth_dev_info`` object.
--
2.5.0
^ permalink raw reply [relevance 9%]
* [dpdk-dev] [PATCH v4 0/9] add packet capture framework
2016-05-17 16:37 6% ` [dpdk-dev] [PATCH v3 8/8] doc: update doc for " Reshma Pattan
@ 2016-05-23 21:38 3% ` Reshma Pattan
` (2 more replies)
1 sibling, 3 replies; 200+ results
From: Reshma Pattan @ 2016-05-23 21:38 UTC (permalink / raw)
To: dev
This patchset include below changes
1)Changes to librte_ether.
2)New library librte_pdump added for packet capture framework.
3)New app/pdump tool added for packet capturing.
4)Test pmd changes done to initialize packet capture framework.
5)Documentation update.
1)librte_pdump
==============
To support packet capturing on dpdk ethernet devices, a new library librte_pdump
is added.Users can develop their own packet capturing application using new library APIs.
Operation:
----------
Pdump library provides APIs to support packet capturing on dpdk ethernet devices.
Library provides APIs to initialize the packet capture framework, enable/disable
the packet capture and un initialize the packet capture framework.
Pdump library works on server and client based model.
Sever is responsible for enabling/disabling the packet captures.
Clients are responsible for requesting enable/disable of the
packet captures.
As part of packet capture framework initialization, pthread and
the server socket is created. Only one server socket is allowed on the system.
As part of enabling/disabling the packet capture, client sockets are created
and multiple client sockets are allowed.
Who ever calls initialization first they will succeed with the initialization,
next subsequent calls of initialization are not allowed. So next users can only
request enabling/disabling the packet capture.
Applications using below APIs need to pass port/device_id, queue, mempool and
ring parameters. Library uses user provided ring and mempool to mirror the rx/tx
packets of the port for users. Users need to deque the rings and write the packets
to vdev(pcap/tuntap) to view the packets using any standard tools.
Note:
Mempool and Ring should be mc/mp supportable.
Mempool mbuf size should be big enough to handle the rx/tx packets of a port.
APIs:
-----
rte_pdump_init()
rte_pdump_enable()
rte_pdump_enable_by_deviceid()
rte_pdump_disable()
rte_pdump_disable_by_deviceid()
rte_pdump_uninit()
2)app/pdump tool
================
Tool app/pdump is based on librte_pdump for packet capturing.
This tool by default runs as secondary process, and provides the support for
the command line options for packet capture.
./build/app/dpdk_pdump --
--pdump '(port=<port id> | device_id=<pci id or vdev name>),
(queue=<queue id>),
(rx-dev=<iface or pcap file> |
tx-dev=<iface or pcap file>),
[ring-size=<ring size>],
[mbuf-size=<mbuf data size>],
[total-num-mbufs=<number of mbufs>]'
Parameters inside the parenthesis represents the mandatory parameters.
Parameters inside the square brackets represents optional parameters.
User has to pass on packet capture parameters under --pdump parameters, multiples of
--pdump can be passed to capture packets on different port and queue combinations
Operation:
----------
*Tool parse the user command line arguments,
creates the mempool, ring and the PCAP PMD vdev with 'tx_stream' as either
of the device passed in rx-dev|tx-dev parameters.
*Then calls the APIs of librte_pdump i.e. rte_pdump_enable()/rte_pdump_enable_by_deviceid()
to enable packet capturing on a specific port/device_id and queue by passing on
port|device_id, queue, mempool and ring info.
*Tool runs in while loop to dequeue the packets from the ring and write them to pcap device.
*Tool can be stopped using SIGINT, upon which tool calls
rte_pdump_disable()/rte_pdump_disable_by_deviceid() and free the allocated resources.
Note:
CONFIG_RTE_LIBRTE_PMD_PCAP flag should be set to yes to compile and run the pdump tool.
3)Test-pmd changes
==================
Changes are done to test-pmd application to initialize/uninitialize the packet capture framework.
So app/pdump tool can be run to see packets of dpdk ports that are used by test-pmd.
Similarly any application which needs packet capture should call initialize/uninitialize apis of
librate_pdump and use pdump tool to start the capture.
4)Packet capture flow between pdump tool and librte_pdump
=========================================================
* Pdump tool (Secondary process) requests packet capture
for specific port|device_id and queue combinations.
*Library in secondary process context creates client socket and communicates
the port|device_id, queue, ring and mempool to server.
*Library initializes server in primary process 'test-pmd' context and serves client
request to enable ethernet rxtx call-backs for given port|device_id and queue.·
*Copy the rx/tx packets to passed mempool and enqueue the packets to ring for secondary process.
*Pdump tool will dequeue the packets from ring and writes them to PCAPMD vdev,
so ultimately packets will be seen on device passed in rx-dev|tx-dev.
*Once the pdump tool is terminated with SIGINT it will disable packet capturing.
*Library receives the disable packet capture request, communicate the info to server,
server will remove the ethernet rxtx call-backs.
*Packet capture can be seen using tcpdump command
"tcpdump -ni <iface>" (or) "tcpdump –nr <pcapfile>"
5)Example command line
======================
./build/app/dpdk_pdump -- --pdump 'device_id=0000:02:0.0,queue=*,tx-dev=/tmp/dt-file.pcap,rx-dev=/tmp/dr-file.pcap,ring-size=8192,mbuf-size=2176,total-num-mbufs=32768' --pdump 'device_id=0000:01:00.0,queue=*,rx-dev=/tmp/d-file.pcap,tx-dev=/tmp/d-file.pcap,ring-size=16384,mbuf-size=2176,total-num-mbufs=32768'
v4:
added missing deprecation notice for ABI changes of rte_eth_dev_info structure.
made doc changes as per doc guidelines.
replaced rte_eal_vdev_init with rte_eth_dev_attach in pdump tool.
removed rxtx-dev parameter from pdump tool command line.
v3:
app/pdump: Moved cleanup code from signal handler to main.
divided librte_ether changes into multiple patches.
example command changed in app/pdump application guide
v2:
fix compilation issues for 4.8.3
fix unnecessary #includes
Reshma Pattan (9):
librte_ether: protect add/remove of rxtx callbacks with spinlocks
librte_ether: add new api rte_eth_add_first_rx_callback
librte_ether: add new fields to rte_eth_dev_info struct
librte_ether: make rte_eth_dev_get_port_by_name api public
lib/librte_pdump: add new library for packet capturing support
app/pdump: add pdump tool for packet capturing
app/test-pmd: add pdump initialization uninitialization
doc: update doc for packet capture framework
doc: announce ABI change for rte_eth_dev_info structure
MAINTAINERS | 8 +
app/Makefile | 1 +
app/pdump/Makefile | 45 ++
app/pdump/main.c | 888 ++++++++++++++++++++++++++++++++
app/test-pmd/testpmd.c | 6 +
config/common_base | 5 +
doc/guides/prog_guide/index.rst | 1 +
doc/guides/prog_guide/pdump_library.rst | 106 ++++
doc/guides/rel_notes/deprecation.rst | 6 +
doc/guides/rel_notes/release_16_07.rst | 11 +
doc/guides/sample_app_ug/index.rst | 1 +
doc/guides/sample_app_ug/pdump.rst | 115 +++++
lib/Makefile | 1 +
lib/librte_ether/rte_ethdev.c | 121 +++--
lib/librte_ether/rte_ethdev.h | 45 ++
lib/librte_ether/rte_ether_version.map | 8 +
lib/librte_pdump/Makefile | 55 ++
lib/librte_pdump/rte_pdump.c | 816 +++++++++++++++++++++++++++++
lib/librte_pdump/rte_pdump.h | 186 +++++++
lib/librte_pdump/rte_pdump_version.map | 12 +
mk/rte.app.mk | 1 +
21 files changed, 2395 insertions(+), 43 deletions(-)
create mode 100644 app/pdump/Makefile
create mode 100644 app/pdump/main.c
create mode 100644 doc/guides/prog_guide/pdump_library.rst
create mode 100644 doc/guides/sample_app_ug/pdump.rst
create mode 100644 lib/librte_pdump/Makefile
create mode 100644 lib/librte_pdump/rte_pdump.c
create mode 100644 lib/librte_pdump/rte_pdump.h
create mode 100644 lib/librte_pdump/rte_pdump_version.map
--
2.5.0
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH v2 5/7] eal/linux: mmap ioports on ppc64
2016-05-23 13:07 3% ` Yuanhan Liu
@ 2016-05-23 13:40 3% ` Olivier Matz
2016-05-24 5:15 3% ` Yuanhan Liu
0 siblings, 1 reply; 200+ results
From: Olivier Matz @ 2016-05-23 13:40 UTC (permalink / raw)
To: Yuanhan Liu, David Marchand; +Cc: dev, Chao Zhu, Xie, Huawei
Hi Yuanhan,
On 05/23/2016 03:07 PM, Yuanhan Liu wrote:
> On Tue, May 17, 2016 at 05:54:01PM +0200, David Marchand wrote:
>>> +pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>>> + struct rte_pci_ioport *p)
>>> +{
>>> + FILE *f;
>>> + char buf[BUFSIZ];
>>> + char filename[PATH_MAX];
>>> + uint64_t phys_addr, end_addr, flags;
>>> + int fd, i;
>>> + void *addr;
>>> +
>>> + /* open and read addresses of the corresponding resource in sysfs */
>>> + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
>>> + SYSFS_PCI_DEVICES, dev->addr.domain, dev->addr.bus,
>>> + dev->addr.devid, dev->addr.function);
>>> + f = fopen(filename, "r");
>>> + if (f == NULL) {
>>> + RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
>>> + strerror(errno));
>>> + return -1;
>>> + }
>>> + for (i = 0; i < bar + 1; i++) {
>>> + if (fgets(buf, sizeof(buf), f) == NULL) {
>>> + RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
>>> + goto error;
>>> + }
>>> + }
>>> + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
>>> + &end_addr, &flags) < 0)
>>> + goto error;
>>> + if ((flags & IORESOURCE_IO) == 0) {
>>> + RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar);
>>> + goto error;
>>> + }
>>> + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d",
>>> + SYSFS_PCI_DEVICES, dev->addr.domain, dev->addr.bus,
>>> + dev->addr.devid, dev->addr.function, bar);
>>> +
>>> + /* mmap the pci resource */
>>> + fd = open(filename, O_RDWR);
>>> + if (fd < 0) {
>>> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
>>> + strerror(errno));
>>> + goto error;
>>> + }
>>> + addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE,
>>> + MAP_SHARED, fd, 0);
>>
>> Sorry, did not catch it in v1, but a close(fd) is missing here.
>> With this, I think the patchset looks good.
>>
>> Just missing some opinion from the virtio maintainers ?
>
> Apologize for being late for review. Assuming you have done proper
> test, this patch set looks good to me. (well, I don't quite like
> the tons of "#ifdef ... #else ..#end" block though)
>
> A side note is that I noticed an ABI breakage introduced in this
> patch, so, this release is not a good fit?
Thank you for the review.
For reference, here is the report of the ABI checker for EAL:
[−] struct rte_pci_ioport (2)
1 Field len has been added to this type.
1) This field will not be initialized by old clients.
2) Size of the inclusive type has been changed.
NOTE: this field should be accessed only from the new library
functions, otherwise it may result in crash or incorrect behavior
of applications.
2 Size of this type has been changed from 16 bytes to 24 bytes.
The fields or parameters of such data type may be incorrectly
initialized or accessed by old client applications.
[−] affected symbols (4)
rte_eal_pci_ioport_map ( struct rte_pci_device* dev, int bar,
struct rte_pci_ioport* p ) @@ DPDK_16.04
3rd parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
rte_eal_pci_ioport_read ( struct rte_pci_ioport* p, void* data,
size_t len, off_t offset ) @@ DPDK_16.04
1st parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
rte_eal_pci_ioport_unmap ( struct rte_pci_ioport* p ) @@ DPDK_16.04
1st parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
rte_eal_pci_ioport_write ( struct rte_pci_ioport* p, void const* data,
size_t len, off_t offset ) @@ DPDK_16.04
1st parameter 'p' (pointer) has base type 'struct rte_pci_ioport'.
My understanding of the comment for this structure is that it's
internal to EAL:
/**
* A structure used to access io resources for a pci device.
* rte_pci_ioport is arch, os, driver specific, and should not be used
outside
* of pci ioport api.
*/
struct rte_pci_ioport {
...
}
So I'd say it's ok to have it integrated for 16.07.
Regards,
Olivier
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH v2 5/7] eal/linux: mmap ioports on ppc64
@ 2016-05-23 13:07 3% ` Yuanhan Liu
2016-05-23 13:40 3% ` Olivier Matz
0 siblings, 1 reply; 200+ results
From: Yuanhan Liu @ 2016-05-23 13:07 UTC (permalink / raw)
To: David Marchand; +Cc: Olivier Matz, dev, Chao Zhu, Xie, Huawei
On Tue, May 17, 2016 at 05:54:01PM +0200, David Marchand wrote:
> > +pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
> > + struct rte_pci_ioport *p)
> > +{
> > + FILE *f;
> > + char buf[BUFSIZ];
> > + char filename[PATH_MAX];
> > + uint64_t phys_addr, end_addr, flags;
> > + int fd, i;
> > + void *addr;
> > +
> > + /* open and read addresses of the corresponding resource in sysfs */
> > + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
> > + SYSFS_PCI_DEVICES, dev->addr.domain, dev->addr.bus,
> > + dev->addr.devid, dev->addr.function);
> > + f = fopen(filename, "r");
> > + if (f == NULL) {
> > + RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
> > + strerror(errno));
> > + return -1;
> > + }
> > + for (i = 0; i < bar + 1; i++) {
> > + if (fgets(buf, sizeof(buf), f) == NULL) {
> > + RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
> > + goto error;
> > + }
> > + }
> > + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
> > + &end_addr, &flags) < 0)
> > + goto error;
> > + if ((flags & IORESOURCE_IO) == 0) {
> > + RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar);
> > + goto error;
> > + }
> > + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d",
> > + SYSFS_PCI_DEVICES, dev->addr.domain, dev->addr.bus,
> > + dev->addr.devid, dev->addr.function, bar);
> > +
> > + /* mmap the pci resource */
> > + fd = open(filename, O_RDWR);
> > + if (fd < 0) {
> > + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
> > + strerror(errno));
> > + goto error;
> > + }
> > + addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE,
> > + MAP_SHARED, fd, 0);
>
> Sorry, did not catch it in v1, but a close(fd) is missing here.
> With this, I think the patchset looks good.
>
> Just missing some opinion from the virtio maintainers ?
Apologize for being late for review. Assuming you have done proper
test, this patch set looks good to me. (well, I don't quite like
the tons of "#ifdef ... #else ..#end" block though)
A side note is that I noticed an ABI breakage introduced in this
patch, so, this release is not a good fit?
--yliu
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH] mbuf: new flag when Vlan is stripped
2016-05-23 8:46 2% ` [dpdk-dev] [PATCH] mbuf: new flag when Vlan " Olivier Matz
2016-05-23 8:59 0% ` Ananyev, Konstantin
@ 2016-05-23 9:20 0% ` Ananyev, Konstantin
2016-05-27 14:33 2% ` [dpdk-dev] [PATCH v2] " Olivier Matz
2 siblings, 0 replies; 200+ results
From: Ananyev, Konstantin @ 2016-05-23 9:20 UTC (permalink / raw)
To: Olivier Matz, dev
Cc: johndale, Zhang, Helin, adrien.mazarguil, rahul.lakkireddy,
alejandro.lucero, sony.chacko
> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Monday, May 23, 2016 9:47 AM
> To: dev@dpdk.org
> Cc: johndale@cisco.com; Ananyev, Konstantin; Zhang, Helin; adrien.mazarguil@6wind.com; rahul.lakkireddy@chelsio.com;
> alejandro.lucero@netronome.com; sony.chacko@qlogic.com
> Subject: [PATCH] mbuf: new flag when Vlan is stripped
>
> The behavior of PKT_RX_VLAN_PKT was not very well defined, resulting in
> PMDs not advertising the same flags in similar conditions.
>
> Following discussion in [1], introduce 2 new flags PKT_RX_VLAN_STRIPPED
> and PKT_RX_QINQ_STRIPPED that are better defined:
>
> PKT_RX_VLAN_STRIPPED: a vlan has been stripped by the hardware and its
> tci is saved in mbuf->vlan_tci. This can only happen if vlan stripping
> is enabled in the RX configuration of the PMD.
>
> For now, the old flag PKT_RX_VLAN_PKT is kept but marked as deprecated.
> It should be removed from applications and PMDs in a future revision.
>
> This patch also updates the drivers. For PKT_RX_VLAN_PKT:
>
> - e1000, enic, i40e, mlx5, nfp, vmxnet3: done, PKT_RX_VLAN_PKT already
> had the same meaning than PKT_RX_VLAN_STRIPPED, minor update is
> required.
> - fm10k: done, PKT_RX_VLAN_PKT already had the same meaning than
> PKT_RX_VLAN_STRIPPED, and vlan stripping is always enabled on fm10k.
> - ixgbe: modification done for standard mode (vector does not support
> vlan stripping)
> - the other drivers do not support vlan stripping.
>
> For PKT_RX_QINQ_PKT, it was only supported on i40e, and the meaning was
> already correct, so we can reuse the same value for PKT_RX_QINQ_STRIPPED.
>
> [1] http://dpdk.org/ml/archives/dev/2016-April/037837.html,
>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>
> RFC -> v1:
> - fix checkpatch and check-git-log.sh issues
> - add a deprecation notice for the old vlan flags
> - rebase on head
>
>
> app/test-pmd/rxonly.c | 4 +--
> doc/guides/rel_notes/deprecation.rst | 5 ++++
> drivers/net/e1000/em_rxtx.c | 3 ++-
> drivers/net/e1000/igb_rxtx.c | 3 ++-
> drivers/net/enic/enic_rx.c | 2 +-
> drivers/net/i40e/i40e_rxtx.c | 2 +-
> drivers/net/ixgbe/ixgbe_ethdev.c | 7 +++++
> drivers/net/ixgbe/ixgbe_rxtx.c | 21 +++++++++++----
> drivers/net/ixgbe/ixgbe_rxtx.h | 1 +
> drivers/net/mlx5/mlx5_rxtx.c | 6 +++--
> drivers/net/nfp/nfp_net.c | 2 +-
> drivers/net/vmxnet3/vmxnet3_rxtx.c | 2 +-
> lib/librte_mbuf/rte_mbuf.c | 2 ++
> lib/librte_mbuf/rte_mbuf.h | 50 ++++++++++++++++++++++++++++++++----
> 14 files changed, 90 insertions(+), 20 deletions(-)
>
> diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
> index 14555ab..c69b344 100644
> --- a/app/test-pmd/rxonly.c
> +++ b/app/test-pmd/rxonly.c
> @@ -156,9 +156,9 @@ pkt_burst_receive(struct fwd_stream *fs)
> printf("hash=0x%x ID=0x%x ",
> mb->hash.fdir.hash, mb->hash.fdir.id);
> }
> - if (ol_flags & PKT_RX_VLAN_PKT)
> + if (ol_flags & PKT_RX_VLAN_STRIPPED)
> printf(" - VLAN tci=0x%x", mb->vlan_tci);
> - if (ol_flags & PKT_RX_QINQ_PKT)
> + if (ol_flags & PKT_RX_QINQ_STRIPPED)
> printf(" - QinQ VLAN tci=0x%x, VLAN tci outer=0x%x",
> mb->vlan_tci, mb->vlan_tci_outer);
> if (mb->packet_type) {
> diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
> index ad05eba..2233a90 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -57,3 +57,8 @@ Deprecation Notices
> a handle, like the way kernel exposes an fd to user for locating a
> specific file, and to keep all major structures internally, so that
> we are likely to be free from ABI violations in future.
> +
> +* The mbuf flags PKT_RX_VLAN_PKT and PKT_RX_QINQ_PKT are deprecated and
> + are respectively replaced by PKT_RX_VLAN_STRIPPED and
> + PKT_RX_QINQ_STRIPPED, that are better described. The old flags and
> + their behavior will be kept in 16.07 and will be removed in 16.11.
> diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
> index 3d36f21..6d8750a 100644
> --- a/drivers/net/e1000/em_rxtx.c
> +++ b/drivers/net/e1000/em_rxtx.c
> @@ -629,7 +629,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
> uint64_t pkt_flags;
>
> /* Check if VLAN present */
> - pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0);
> + pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
> + PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
>
> return pkt_flags;
> }
> diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
> index 18aeead..9d80a0b 100644
> --- a/drivers/net/e1000/igb_rxtx.c
> +++ b/drivers/net/e1000/igb_rxtx.c
> @@ -729,7 +729,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
> uint64_t pkt_flags;
>
> /* Check if VLAN present */
> - pkt_flags = (rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
> + pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
> + PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
>
> #if defined(RTE_LIBRTE_IEEE1588)
> if (rx_status & E1000_RXD_STAT_TMST)
> diff --git a/drivers/net/enic/enic_rx.c b/drivers/net/enic/enic_rx.c
> index f92f6bc..6459e97 100644
> --- a/drivers/net/enic/enic_rx.c
> +++ b/drivers/net/enic/enic_rx.c
> @@ -197,7 +197,7 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
>
> /* VLAN stripping */
> if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
> - pkt_flags |= PKT_RX_VLAN_PKT;
> + pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
> mbuf->vlan_tci = enic_cq_rx_desc_vlan(cqrd);
> } else {
> mbuf->vlan_tci = 0;
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> index c833aa3..aa161a9 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -99,7 +99,7 @@ i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
> #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
> if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) &
> (1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) {
> - mb->ol_flags |= PKT_RX_QINQ_PKT;
> + mb->ol_flags |= PKT_RX_QINQ_STRIPPED;
> mb->vlan_tci_outer = mb->vlan_tci;
> mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2);
> PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
> index a2b170b..e7717e3 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -1636,6 +1636,7 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
> {
> struct ixgbe_hwstrip *hwstrip =
> IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(dev->data->dev_private);
> + struct ixgbe_rx_queue *rxq;
>
> if (queue >= IXGBE_MAX_RX_QUEUE_NUM)
> return;
> @@ -1644,6 +1645,12 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
> IXGBE_SET_HWSTRIP(hwstrip, queue);
> else
> IXGBE_CLEAR_HWSTRIP(hwstrip, queue);
> +
> + if (queue >= dev->data->nb_rx_queues)
> + return;
> +
> + rxq = dev->data->rx_queues[queue];
> + rxq->vlan_strip = on;
> }
>
> static void
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 9c6eaf2..3d740df 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -1221,16 +1221,23 @@ ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
> }
>
> static inline uint64_t
> -rx_desc_status_to_pkt_flags(uint32_t rx_status)
> +rx_desc_status_to_pkt_flags(uint32_t rx_status, uint8_t vlan_strip)
> {
> uint64_t pkt_flags;
> + uint64_t vlan_flags;
> +
> + /* if vlan is stripped, set the proper flag */
> + if (vlan_strip)
> + vlan_flags = PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
> + else
> + vlan_flags = PKT_RX_VLAN_PKT;
>
> /*
> * Check if VLAN present only.
> * Do not check whether L3/L4 rx checksum done by NIC or not,
> * That can be found from rte_eth_rxmode.hw_ip_checksum flag
> */
> - pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
> + pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? vlan_flags : 0;
Instead of storing in rxq (and passing as a parameter) a bool value for vlan_strip (=on/off),
you probably can store in rxq and pass as a parameter here uint64_t vlan_flags;
Then it will be:
rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
{
...
pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? vlan_flags : 0;
...
}
...
pkt_flags = rx_desc_status_to_pkt_flags(s[j], rxq->vlan_flags);
Might help to save few cycles here.
Konstantin
>
> #ifdef RTE_LIBRTE_IEEE1588
> if (rx_status & IXGBE_RXD_STAT_TMST)
> @@ -1287,6 +1294,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
> uint32_t pkt_info[LOOK_AHEAD];
> int i, j, nb_rx = 0;
> uint32_t status;
> + uint8_t vlan_strip = rxq->vlan_strip;
>
> /* get references to current descriptor and S/W ring entry */
> rxdp = &rxq->rx_ring[rxq->rx_tail];
> @@ -1328,7 +1336,8 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
> mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
>
> /* convert descriptor fields to rte mbuf flags */
> - pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
> + pkt_flags = rx_desc_status_to_pkt_flags(s[j],
> + vlan_strip);
> pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
> pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
> ((uint16_t)pkt_info[j]);
> @@ -1544,6 +1553,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> uint16_t nb_rx;
> uint16_t nb_hold;
> uint64_t pkt_flags;
> + uint8_t vlan_strip;
>
> nb_rx = 0;
> nb_hold = 0;
> @@ -1551,6 +1561,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> rx_id = rxq->rx_tail;
> rx_ring = rxq->rx_ring;
> sw_ring = rxq->sw_ring;
> + vlan_strip = rxq->vlan_strip;
> while (nb_rx < nb_pkts) {
> /*
> * The order of operations here is important as the DD status
> @@ -1660,7 +1671,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
> rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
>
> - pkt_flags = rx_desc_status_to_pkt_flags(staterr);
> + pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_strip);
> pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
> pkt_flags = pkt_flags |
> ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
> @@ -1753,7 +1764,7 @@ ixgbe_fill_cluster_head_buf(
> */
> head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
> pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
> - pkt_flags = rx_desc_status_to_pkt_flags(staterr);
> + pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_strip);
> pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
> pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
> head->ol_flags = pkt_flags;
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 3691a19..9ca0e8b 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -146,6 +146,7 @@ struct ixgbe_rx_queue {
> uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
> uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
> uint8_t rx_deferred_start; /**< not in global dev start. */
> + uint8_t vlan_strip; /**< 1 if vlan stripping enabled. */
> /** need to alloc dummy mbuf, for wraparound when scanning hw ring */
> struct rte_mbuf fake_mbuf;
> /** hold packets to return to application */
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] mbuf: new flag when Vlan is stripped
2016-05-23 8:46 2% ` [dpdk-dev] [PATCH] mbuf: new flag when Vlan " Olivier Matz
@ 2016-05-23 8:59 0% ` Ananyev, Konstantin
2016-05-23 9:20 0% ` Ananyev, Konstantin
2016-05-27 14:33 2% ` [dpdk-dev] [PATCH v2] " Olivier Matz
2 siblings, 0 replies; 200+ results
From: Ananyev, Konstantin @ 2016-05-23 8:59 UTC (permalink / raw)
To: Olivier Matz, dev
Cc: johndale, Zhang, Helin, adrien.mazarguil, rahul.lakkireddy,
alejandro.lucero, sony.chacko
Hi Olivier,
> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Monday, May 23, 2016 9:47 AM
> To: dev@dpdk.org
> Cc: johndale@cisco.com; Ananyev, Konstantin; Zhang, Helin; adrien.mazarguil@6wind.com; rahul.lakkireddy@chelsio.com;
> alejandro.lucero@netronome.com; sony.chacko@qlogic.com
> Subject: [PATCH] mbuf: new flag when Vlan is stripped
>
> The behavior of PKT_RX_VLAN_PKT was not very well defined, resulting in
> PMDs not advertising the same flags in similar conditions.
>
> Following discussion in [1], introduce 2 new flags PKT_RX_VLAN_STRIPPED
> and PKT_RX_QINQ_STRIPPED that are better defined:
>
> PKT_RX_VLAN_STRIPPED: a vlan has been stripped by the hardware and its
> tci is saved in mbuf->vlan_tci. This can only happen if vlan stripping
> is enabled in the RX configuration of the PMD.
>
> For now, the old flag PKT_RX_VLAN_PKT is kept but marked as deprecated.
> It should be removed from applications and PMDs in a future revision.
>
> This patch also updates the drivers. For PKT_RX_VLAN_PKT:
>
> - e1000, enic, i40e, mlx5, nfp, vmxnet3: done, PKT_RX_VLAN_PKT already
> had the same meaning than PKT_RX_VLAN_STRIPPED, minor update is
> required.
> - fm10k: done, PKT_RX_VLAN_PKT already had the same meaning than
> PKT_RX_VLAN_STRIPPED, and vlan stripping is always enabled on fm10k.
> - ixgbe: modification done for standard mode (vector does not support
> vlan stripping)
> - the other drivers do not support vlan stripping.
>
> For PKT_RX_QINQ_PKT, it was only supported on i40e, and the meaning was
> already correct, so we can reuse the same value for PKT_RX_QINQ_STRIPPED.
>
> [1] http://dpdk.org/ml/archives/dev/2016-April/037837.html,
>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>
> RFC -> v1:
> - fix checkpatch and check-git-log.sh issues
> - add a deprecation notice for the old vlan flags
> - rebase on head
>
>
> app/test-pmd/rxonly.c | 4 +--
> doc/guides/rel_notes/deprecation.rst | 5 ++++
> drivers/net/e1000/em_rxtx.c | 3 ++-
> drivers/net/e1000/igb_rxtx.c | 3 ++-
> drivers/net/enic/enic_rx.c | 2 +-
> drivers/net/i40e/i40e_rxtx.c | 2 +-
> drivers/net/ixgbe/ixgbe_ethdev.c | 7 +++++
> drivers/net/ixgbe/ixgbe_rxtx.c | 21 +++++++++++----
> drivers/net/ixgbe/ixgbe_rxtx.h | 1 +
> drivers/net/mlx5/mlx5_rxtx.c | 6 +++--
> drivers/net/nfp/nfp_net.c | 2 +-
> drivers/net/vmxnet3/vmxnet3_rxtx.c | 2 +-
> lib/librte_mbuf/rte_mbuf.c | 2 ++
> lib/librte_mbuf/rte_mbuf.h | 50 ++++++++++++++++++++++++++++++++----
> 14 files changed, 90 insertions(+), 20 deletions(-)
I don't see ixgbe/i4oe_rxtx_vec.c updated.
Would it be another patch for them?
Thanks
Konstantin
>
> diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
> index 14555ab..c69b344 100644
> --- a/app/test-pmd/rxonly.c
> +++ b/app/test-pmd/rxonly.c
> @@ -156,9 +156,9 @@ pkt_burst_receive(struct fwd_stream *fs)
> printf("hash=0x%x ID=0x%x ",
> mb->hash.fdir.hash, mb->hash.fdir.id);
> }
> - if (ol_flags & PKT_RX_VLAN_PKT)
> + if (ol_flags & PKT_RX_VLAN_STRIPPED)
> printf(" - VLAN tci=0x%x", mb->vlan_tci);
> - if (ol_flags & PKT_RX_QINQ_PKT)
> + if (ol_flags & PKT_RX_QINQ_STRIPPED)
> printf(" - QinQ VLAN tci=0x%x, VLAN tci outer=0x%x",
> mb->vlan_tci, mb->vlan_tci_outer);
> if (mb->packet_type) {
> diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
> index ad05eba..2233a90 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -57,3 +57,8 @@ Deprecation Notices
> a handle, like the way kernel exposes an fd to user for locating a
> specific file, and to keep all major structures internally, so that
> we are likely to be free from ABI violations in future.
> +
> +* The mbuf flags PKT_RX_VLAN_PKT and PKT_RX_QINQ_PKT are deprecated and
> + are respectively replaced by PKT_RX_VLAN_STRIPPED and
> + PKT_RX_QINQ_STRIPPED, that are better described. The old flags and
> + their behavior will be kept in 16.07 and will be removed in 16.11.
> diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
> index 3d36f21..6d8750a 100644
> --- a/drivers/net/e1000/em_rxtx.c
> +++ b/drivers/net/e1000/em_rxtx.c
> @@ -629,7 +629,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
> uint64_t pkt_flags;
>
> /* Check if VLAN present */
> - pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0);
> + pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
> + PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
>
> return pkt_flags;
> }
> diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
> index 18aeead..9d80a0b 100644
> --- a/drivers/net/e1000/igb_rxtx.c
> +++ b/drivers/net/e1000/igb_rxtx.c
> @@ -729,7 +729,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
> uint64_t pkt_flags;
>
> /* Check if VLAN present */
> - pkt_flags = (rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
> + pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
> + PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
>
> #if defined(RTE_LIBRTE_IEEE1588)
> if (rx_status & E1000_RXD_STAT_TMST)
> diff --git a/drivers/net/enic/enic_rx.c b/drivers/net/enic/enic_rx.c
> index f92f6bc..6459e97 100644
> --- a/drivers/net/enic/enic_rx.c
> +++ b/drivers/net/enic/enic_rx.c
> @@ -197,7 +197,7 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
>
> /* VLAN stripping */
> if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
> - pkt_flags |= PKT_RX_VLAN_PKT;
> + pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
> mbuf->vlan_tci = enic_cq_rx_desc_vlan(cqrd);
> } else {
> mbuf->vlan_tci = 0;
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> index c833aa3..aa161a9 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -99,7 +99,7 @@ i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
> #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
> if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) &
> (1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) {
> - mb->ol_flags |= PKT_RX_QINQ_PKT;
> + mb->ol_flags |= PKT_RX_QINQ_STRIPPED;
> mb->vlan_tci_outer = mb->vlan_tci;
> mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2);
> PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
> index a2b170b..e7717e3 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -1636,6 +1636,7 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
> {
> struct ixgbe_hwstrip *hwstrip =
> IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(dev->data->dev_private);
> + struct ixgbe_rx_queue *rxq;
>
> if (queue >= IXGBE_MAX_RX_QUEUE_NUM)
> return;
> @@ -1644,6 +1645,12 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
> IXGBE_SET_HWSTRIP(hwstrip, queue);
> else
> IXGBE_CLEAR_HWSTRIP(hwstrip, queue);
> +
> + if (queue >= dev->data->nb_rx_queues)
> + return;
> +
> + rxq = dev->data->rx_queues[queue];
> + rxq->vlan_strip = on;
> }
>
> static void
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 9c6eaf2..3d740df 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -1221,16 +1221,23 @@ ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
> }
>
> static inline uint64_t
> -rx_desc_status_to_pkt_flags(uint32_t rx_status)
> +rx_desc_status_to_pkt_flags(uint32_t rx_status, uint8_t vlan_strip)
> {
> uint64_t pkt_flags;
> + uint64_t vlan_flags;
> +
> + /* if vlan is stripped, set the proper flag */
> + if (vlan_strip)
> + vlan_flags = PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
> + else
> + vlan_flags = PKT_RX_VLAN_PKT;
>
> /*
> * Check if VLAN present only.
> * Do not check whether L3/L4 rx checksum done by NIC or not,
> * That can be found from rte_eth_rxmode.hw_ip_checksum flag
> */
> - pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
> + pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? vlan_flags : 0;
>
> #ifdef RTE_LIBRTE_IEEE1588
> if (rx_status & IXGBE_RXD_STAT_TMST)
> @@ -1287,6 +1294,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
> uint32_t pkt_info[LOOK_AHEAD];
> int i, j, nb_rx = 0;
> uint32_t status;
> + uint8_t vlan_strip = rxq->vlan_strip;
>
> /* get references to current descriptor and S/W ring entry */
> rxdp = &rxq->rx_ring[rxq->rx_tail];
> @@ -1328,7 +1336,8 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
> mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
>
> /* convert descriptor fields to rte mbuf flags */
> - pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
> + pkt_flags = rx_desc_status_to_pkt_flags(s[j],
> + vlan_strip);
> pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
> pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
> ((uint16_t)pkt_info[j]);
> @@ -1544,6 +1553,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> uint16_t nb_rx;
> uint16_t nb_hold;
> uint64_t pkt_flags;
> + uint8_t vlan_strip;
>
> nb_rx = 0;
> nb_hold = 0;
> @@ -1551,6 +1561,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> rx_id = rxq->rx_tail;
> rx_ring = rxq->rx_ring;
> sw_ring = rxq->sw_ring;
> + vlan_strip = rxq->vlan_strip;
> while (nb_rx < nb_pkts) {
> /*
> * The order of operations here is important as the DD status
> @@ -1660,7 +1671,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
> rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
>
> - pkt_flags = rx_desc_status_to_pkt_flags(staterr);
> + pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_strip);
> pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
> pkt_flags = pkt_flags |
> ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
> @@ -1753,7 +1764,7 @@ ixgbe_fill_cluster_head_buf(
> */
> head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
> pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
> - pkt_flags = rx_desc_status_to_pkt_flags(staterr);
> + pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_strip);
> pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
> pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
> head->ol_flags = pkt_flags;
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 3691a19..9ca0e8b 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -146,6 +146,7 @@ struct ixgbe_rx_queue {
> uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
> uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
> uint8_t rx_deferred_start; /**< not in global dev start. */
> + uint8_t vlan_strip; /**< 1 if vlan stripping enabled. */
> /** need to alloc dummy mbuf, for wraparound when scanning hw ring */
> struct rte_mbuf fake_mbuf;
> /** hold packets to return to application */
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 13c8d71..ac96fc9 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -1051,7 +1051,8 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
> pkt_buf->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
> #ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS
> if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
> - pkt_buf->ol_flags |= PKT_RX_VLAN_PKT;
> + pkt_buf->ol_flags |= PKT_RX_VLAN_PKT |
> + PKT_RX_VLAN_STRIPPED;
> pkt_buf->vlan_tci = vlan_tci;
> }
> #endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */
> @@ -1207,7 +1208,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
> seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
> #ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS
> if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
> - seg->ol_flags |= PKT_RX_VLAN_PKT;
> + seg->ol_flags |= PKT_RX_VLAN_PKT |
> + PKT_RX_VLAN_STRIPPED;
> seg->vlan_tci = vlan_tci;
> }
> #endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */
> diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
> index ea5a2a3..5c9f350 100644
> --- a/drivers/net/nfp/nfp_net.c
> +++ b/drivers/net/nfp/nfp_net.c
> @@ -1800,7 +1800,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
> if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) &&
> (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) {
> mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan);
> - mb->ol_flags |= PKT_RX_VLAN_PKT;
> + mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
> }
>
> /* Adding the mbuff to the mbuff array passed by the app */
> diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> index 9fe8752..ccafc0c 100644
> --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
> +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> @@ -579,7 +579,7 @@ vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
> {
> /* Check for hardware stripped VLAN tag */
> if (rcd->ts) {
> - rxm->ol_flags |= PKT_RX_VLAN_PKT;
> + rxm->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
> rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
> }
>
> diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> index eec1456..2ece742 100644
> --- a/lib/librte_mbuf/rte_mbuf.c
> +++ b/lib/librte_mbuf/rte_mbuf.c
> @@ -258,8 +258,10 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
> /* case PKT_RX_HBUF_OVERFLOW: return "PKT_RX_HBUF_OVERFLOW"; */
> /* case PKT_RX_RECIP_ERR: return "PKT_RX_RECIP_ERR"; */
> /* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */
> + case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
> case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
> case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
> + case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
> default: return NULL;
> }
> }
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 48911a6..5b8a11a 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -79,7 +79,16 @@ extern "C" {
> * Keep these flags synchronized with rte_get_rx_ol_flag_name() and
> * rte_get_tx_ol_flag_name().
> */
> -#define PKT_RX_VLAN_PKT (1ULL << 0) /**< RX packet is a 802.1q VLAN packet. */
> +
> +/**
> + * Deprecated.
> + * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
> + * the packet is recognized as a VLAN, but the behavior between PMDs
> + * was not the same. This flag is kept for some time to avoid breaking
> + * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
> + */
> +#define PKT_RX_VLAN_PKT (1ULL << 0)
> +
> #define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */
> #define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */
> #define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */
> @@ -89,11 +98,37 @@ extern "C" {
> #define PKT_RX_HBUF_OVERFLOW (0ULL << 0) /**< Header buffer overflow. */
> #define PKT_RX_RECIP_ERR (0ULL << 0) /**< Hardware processing error. */
> #define PKT_RX_MAC_ERR (0ULL << 0) /**< MAC error. */
> +
> +/**
> + * A vlan has been stripped by the hardware and its tci is saved in
> + * mbuf->vlan_tci. This can only happen if vlan stripping is enabled
> + * in the RX configuration of the PMD.
> + */
> +#define PKT_RX_VLAN_STRIPPED (1ULL << 6)
> +
> +/* hole, some bits can be reused here */
> +
> #define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */
> #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
> #define PKT_RX_FDIR_ID (1ULL << 13) /**< FD id reported if FDIR match. */
> #define PKT_RX_FDIR_FLX (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
> -#define PKT_RX_QINQ_PKT (1ULL << 15) /**< RX packet with double VLAN stripped. */
> +
> +/**
> + * The 2 vlans have been stripped by the hardware and their tci are
> + * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
> + * This can only happen if vlan stripping is enabled in the RX
> + * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
> + * must also be set.
> + */
> +#define PKT_RX_QINQ_STRIPPED (1ULL << 15)
> +
> +/**
> + * Deprecated.
> + * RX packet with double VLAN stripped.
> + * This flag is replaced by PKT_RX_QINQ_STRIPPED.
> + */
> +#define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
> +
> /* add new RX flags here */
>
> /* add new TX flags here */
> @@ -761,7 +796,10 @@ struct rte_mbuf {
>
> /*
> * The packet type, which is the combination of outer/inner L2, L3, L4
> - * and tunnel types.
> + * and tunnel types. The packet_type is about data really present in the
> + * mbuf. Example: if vlan stripping is enabled, a received vlan packet
> + * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
> + * vlan is stripped from the data.
> */
> union {
> uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
> @@ -778,7 +816,8 @@ struct rte_mbuf {
>
> uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
> uint16_t data_len; /**< Amount of data in segment buffer. */
> - uint16_t vlan_tci; /**< VLAN Tag Control Identifier (CPU order) */
> + /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
> + uint16_t vlan_tci;
>
> union {
> uint32_t rss; /**< RSS hash result if RSS enabled */
> @@ -804,7 +843,8 @@ struct rte_mbuf {
>
> uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */
>
> - uint16_t vlan_tci_outer; /**< Outer VLAN Tag Control Identifier (CPU order) */
> + /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
> + uint16_t vlan_tci_outer;
>
> /* second cache line - fields only used in slow path or on TX */
> MARKER cacheline1 __rte_cache_min_aligned;
> --
> 2.8.0.rc3
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH] mbuf: new flag when Vlan is stripped
@ 2016-05-23 8:46 2% ` Olivier Matz
2016-05-23 8:59 0% ` Ananyev, Konstantin
` (2 more replies)
0 siblings, 3 replies; 200+ results
From: Olivier Matz @ 2016-05-23 8:46 UTC (permalink / raw)
To: dev
Cc: johndale, konstantin.ananyev, helin.zhang, adrien.mazarguil,
rahul.lakkireddy, alejandro.lucero, sony.chacko
The behavior of PKT_RX_VLAN_PKT was not very well defined, resulting in
PMDs not advertising the same flags in similar conditions.
Following discussion in [1], introduce 2 new flags PKT_RX_VLAN_STRIPPED
and PKT_RX_QINQ_STRIPPED that are better defined:
PKT_RX_VLAN_STRIPPED: a vlan has been stripped by the hardware and its
tci is saved in mbuf->vlan_tci. This can only happen if vlan stripping
is enabled in the RX configuration of the PMD.
For now, the old flag PKT_RX_VLAN_PKT is kept but marked as deprecated.
It should be removed from applications and PMDs in a future revision.
This patch also updates the drivers. For PKT_RX_VLAN_PKT:
- e1000, enic, i40e, mlx5, nfp, vmxnet3: done, PKT_RX_VLAN_PKT already
had the same meaning than PKT_RX_VLAN_STRIPPED, minor update is
required.
- fm10k: done, PKT_RX_VLAN_PKT already had the same meaning than
PKT_RX_VLAN_STRIPPED, and vlan stripping is always enabled on fm10k.
- ixgbe: modification done for standard mode (vector does not support
vlan stripping)
- the other drivers do not support vlan stripping.
For PKT_RX_QINQ_PKT, it was only supported on i40e, and the meaning was
already correct, so we can reuse the same value for PKT_RX_QINQ_STRIPPED.
[1] http://dpdk.org/ml/archives/dev/2016-April/037837.html,
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
RFC -> v1:
- fix checkpatch and check-git-log.sh issues
- add a deprecation notice for the old vlan flags
- rebase on head
app/test-pmd/rxonly.c | 4 +--
doc/guides/rel_notes/deprecation.rst | 5 ++++
drivers/net/e1000/em_rxtx.c | 3 ++-
drivers/net/e1000/igb_rxtx.c | 3 ++-
drivers/net/enic/enic_rx.c | 2 +-
drivers/net/i40e/i40e_rxtx.c | 2 +-
drivers/net/ixgbe/ixgbe_ethdev.c | 7 +++++
drivers/net/ixgbe/ixgbe_rxtx.c | 21 +++++++++++----
drivers/net/ixgbe/ixgbe_rxtx.h | 1 +
drivers/net/mlx5/mlx5_rxtx.c | 6 +++--
drivers/net/nfp/nfp_net.c | 2 +-
drivers/net/vmxnet3/vmxnet3_rxtx.c | 2 +-
lib/librte_mbuf/rte_mbuf.c | 2 ++
lib/librte_mbuf/rte_mbuf.h | 50 ++++++++++++++++++++++++++++++++----
14 files changed, 90 insertions(+), 20 deletions(-)
diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index 14555ab..c69b344 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -156,9 +156,9 @@ pkt_burst_receive(struct fwd_stream *fs)
printf("hash=0x%x ID=0x%x ",
mb->hash.fdir.hash, mb->hash.fdir.id);
}
- if (ol_flags & PKT_RX_VLAN_PKT)
+ if (ol_flags & PKT_RX_VLAN_STRIPPED)
printf(" - VLAN tci=0x%x", mb->vlan_tci);
- if (ol_flags & PKT_RX_QINQ_PKT)
+ if (ol_flags & PKT_RX_QINQ_STRIPPED)
printf(" - QinQ VLAN tci=0x%x, VLAN tci outer=0x%x",
mb->vlan_tci, mb->vlan_tci_outer);
if (mb->packet_type) {
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index ad05eba..2233a90 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -57,3 +57,8 @@ Deprecation Notices
a handle, like the way kernel exposes an fd to user for locating a
specific file, and to keep all major structures internally, so that
we are likely to be free from ABI violations in future.
+
+* The mbuf flags PKT_RX_VLAN_PKT and PKT_RX_QINQ_PKT are deprecated and
+ are respectively replaced by PKT_RX_VLAN_STRIPPED and
+ PKT_RX_QINQ_STRIPPED, that are better described. The old flags and
+ their behavior will be kept in 16.07 and will be removed in 16.11.
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 3d36f21..6d8750a 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -629,7 +629,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
uint64_t pkt_flags;
/* Check if VLAN present */
- pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0);
+ pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
+ PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
return pkt_flags;
}
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 18aeead..9d80a0b 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -729,7 +729,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
uint64_t pkt_flags;
/* Check if VLAN present */
- pkt_flags = (rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
+ pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
+ PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
#if defined(RTE_LIBRTE_IEEE1588)
if (rx_status & E1000_RXD_STAT_TMST)
diff --git a/drivers/net/enic/enic_rx.c b/drivers/net/enic/enic_rx.c
index f92f6bc..6459e97 100644
--- a/drivers/net/enic/enic_rx.c
+++ b/drivers/net/enic/enic_rx.c
@@ -197,7 +197,7 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
/* VLAN stripping */
if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
- pkt_flags |= PKT_RX_VLAN_PKT;
+ pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
mbuf->vlan_tci = enic_cq_rx_desc_vlan(cqrd);
} else {
mbuf->vlan_tci = 0;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index c833aa3..aa161a9 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -99,7 +99,7 @@ i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) &
(1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) {
- mb->ol_flags |= PKT_RX_QINQ_PKT;
+ mb->ol_flags |= PKT_RX_QINQ_STRIPPED;
mb->vlan_tci_outer = mb->vlan_tci;
mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2);
PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index a2b170b..e7717e3 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1636,6 +1636,7 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
{
struct ixgbe_hwstrip *hwstrip =
IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(dev->data->dev_private);
+ struct ixgbe_rx_queue *rxq;
if (queue >= IXGBE_MAX_RX_QUEUE_NUM)
return;
@@ -1644,6 +1645,12 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
IXGBE_SET_HWSTRIP(hwstrip, queue);
else
IXGBE_CLEAR_HWSTRIP(hwstrip, queue);
+
+ if (queue >= dev->data->nb_rx_queues)
+ return;
+
+ rxq = dev->data->rx_queues[queue];
+ rxq->vlan_strip = on;
}
static void
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 9c6eaf2..3d740df 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1221,16 +1221,23 @@ ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
}
static inline uint64_t
-rx_desc_status_to_pkt_flags(uint32_t rx_status)
+rx_desc_status_to_pkt_flags(uint32_t rx_status, uint8_t vlan_strip)
{
uint64_t pkt_flags;
+ uint64_t vlan_flags;
+
+ /* if vlan is stripped, set the proper flag */
+ if (vlan_strip)
+ vlan_flags = PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+ else
+ vlan_flags = PKT_RX_VLAN_PKT;
/*
* Check if VLAN present only.
* Do not check whether L3/L4 rx checksum done by NIC or not,
* That can be found from rte_eth_rxmode.hw_ip_checksum flag
*/
- pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
+ pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? vlan_flags : 0;
#ifdef RTE_LIBRTE_IEEE1588
if (rx_status & IXGBE_RXD_STAT_TMST)
@@ -1287,6 +1294,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
uint32_t pkt_info[LOOK_AHEAD];
int i, j, nb_rx = 0;
uint32_t status;
+ uint8_t vlan_strip = rxq->vlan_strip;
/* get references to current descriptor and S/W ring entry */
rxdp = &rxq->rx_ring[rxq->rx_tail];
@@ -1328,7 +1336,8 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
/* convert descriptor fields to rte mbuf flags */
- pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
+ pkt_flags = rx_desc_status_to_pkt_flags(s[j],
+ vlan_strip);
pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
((uint16_t)pkt_info[j]);
@@ -1544,6 +1553,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_rx;
uint16_t nb_hold;
uint64_t pkt_flags;
+ uint8_t vlan_strip;
nb_rx = 0;
nb_hold = 0;
@@ -1551,6 +1561,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
rx_id = rxq->rx_tail;
rx_ring = rxq->rx_ring;
sw_ring = rxq->sw_ring;
+ vlan_strip = rxq->vlan_strip;
while (nb_rx < nb_pkts) {
/*
* The order of operations here is important as the DD status
@@ -1660,7 +1671,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
/* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
- pkt_flags = rx_desc_status_to_pkt_flags(staterr);
+ pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_strip);
pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
pkt_flags = pkt_flags |
ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
@@ -1753,7 +1764,7 @@ ixgbe_fill_cluster_head_buf(
*/
head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
- pkt_flags = rx_desc_status_to_pkt_flags(staterr);
+ pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_strip);
pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
head->ol_flags = pkt_flags;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 3691a19..9ca0e8b 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -146,6 +146,7 @@ struct ixgbe_rx_queue {
uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
uint8_t rx_deferred_start; /**< not in global dev start. */
+ uint8_t vlan_strip; /**< 1 if vlan stripping enabled. */
/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
struct rte_mbuf fake_mbuf;
/** hold packets to return to application */
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 13c8d71..ac96fc9 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1051,7 +1051,8 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
pkt_buf->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS
if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
- pkt_buf->ol_flags |= PKT_RX_VLAN_PKT;
+ pkt_buf->ol_flags |= PKT_RX_VLAN_PKT |
+ PKT_RX_VLAN_STRIPPED;
pkt_buf->vlan_tci = vlan_tci;
}
#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */
@@ -1207,7 +1208,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS
if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
- seg->ol_flags |= PKT_RX_VLAN_PKT;
+ seg->ol_flags |= PKT_RX_VLAN_PKT |
+ PKT_RX_VLAN_STRIPPED;
seg->vlan_tci = vlan_tci;
}
#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index ea5a2a3..5c9f350 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -1800,7 +1800,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) &&
(hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) {
mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan);
- mb->ol_flags |= PKT_RX_VLAN_PKT;
+ mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
}
/* Adding the mbuff to the mbuff array passed by the app */
diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 9fe8752..ccafc0c 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -579,7 +579,7 @@ vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
{
/* Check for hardware stripped VLAN tag */
if (rcd->ts) {
- rxm->ol_flags |= PKT_RX_VLAN_PKT;
+ rxm->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
}
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index eec1456..2ece742 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -258,8 +258,10 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
/* case PKT_RX_HBUF_OVERFLOW: return "PKT_RX_HBUF_OVERFLOW"; */
/* case PKT_RX_RECIP_ERR: return "PKT_RX_RECIP_ERR"; */
/* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */
+ case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
+ case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
default: return NULL;
}
}
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 48911a6..5b8a11a 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -79,7 +79,16 @@ extern "C" {
* Keep these flags synchronized with rte_get_rx_ol_flag_name() and
* rte_get_tx_ol_flag_name().
*/
-#define PKT_RX_VLAN_PKT (1ULL << 0) /**< RX packet is a 802.1q VLAN packet. */
+
+/**
+ * Deprecated.
+ * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
+ * the packet is recognized as a VLAN, but the behavior between PMDs
+ * was not the same. This flag is kept for some time to avoid breaking
+ * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
+ */
+#define PKT_RX_VLAN_PKT (1ULL << 0)
+
#define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */
#define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */
#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */
@@ -89,11 +98,37 @@ extern "C" {
#define PKT_RX_HBUF_OVERFLOW (0ULL << 0) /**< Header buffer overflow. */
#define PKT_RX_RECIP_ERR (0ULL << 0) /**< Hardware processing error. */
#define PKT_RX_MAC_ERR (0ULL << 0) /**< MAC error. */
+
+/**
+ * A vlan has been stripped by the hardware and its tci is saved in
+ * mbuf->vlan_tci. This can only happen if vlan stripping is enabled
+ * in the RX configuration of the PMD.
+ */
+#define PKT_RX_VLAN_STRIPPED (1ULL << 6)
+
+/* hole, some bits can be reused here */
+
#define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */
#define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
#define PKT_RX_FDIR_ID (1ULL << 13) /**< FD id reported if FDIR match. */
#define PKT_RX_FDIR_FLX (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
-#define PKT_RX_QINQ_PKT (1ULL << 15) /**< RX packet with double VLAN stripped. */
+
+/**
+ * The 2 vlans have been stripped by the hardware and their tci are
+ * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
+ * This can only happen if vlan stripping is enabled in the RX
+ * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
+ * must also be set.
+ */
+#define PKT_RX_QINQ_STRIPPED (1ULL << 15)
+
+/**
+ * Deprecated.
+ * RX packet with double VLAN stripped.
+ * This flag is replaced by PKT_RX_QINQ_STRIPPED.
+ */
+#define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
+
/* add new RX flags here */
/* add new TX flags here */
@@ -761,7 +796,10 @@ struct rte_mbuf {
/*
* The packet type, which is the combination of outer/inner L2, L3, L4
- * and tunnel types.
+ * and tunnel types. The packet_type is about data really present in the
+ * mbuf. Example: if vlan stripping is enabled, a received vlan packet
+ * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
+ * vlan is stripped from the data.
*/
union {
uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
@@ -778,7 +816,8 @@ struct rte_mbuf {
uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
uint16_t data_len; /**< Amount of data in segment buffer. */
- uint16_t vlan_tci; /**< VLAN Tag Control Identifier (CPU order) */
+ /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
+ uint16_t vlan_tci;
union {
uint32_t rss; /**< RSS hash result if RSS enabled */
@@ -804,7 +843,8 @@ struct rte_mbuf {
uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */
- uint16_t vlan_tci_outer; /**< Outer VLAN Tag Control Identifier (CPU order) */
+ /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
+ uint16_t vlan_tci_outer;
/* second cache line - fields only used in slow path or on TX */
MARKER cacheline1 __rte_cache_min_aligned;
--
2.8.0.rc3
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] Suggestions for the dpdk stable tree
@ 2016-05-23 2:21 3% ` Yuanhan Liu
2016-06-01 19:01 0% ` Mcnamara, John
0 siblings, 1 reply; 200+ results
From: Yuanhan Liu @ 2016-05-23 2:21 UTC (permalink / raw)
To: Mcnamara, John
Cc: Christian Ehrhardt, dev, Stephen Hemminger, Thomas Monjalon
On Fri, May 20, 2016 at 02:49:31PM +0000, Mcnamara, John wrote:
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Christian Ehrhardt
> > Sent: Friday, May 20, 2016 9:07 AM
> > To: dev <dev@dpdk.org>; Stephen Hemminger <stephen@networkplumber.org>
> > Subject: Re: [dpdk-dev] Suggestions for the dpdk stable tree
> >
> > Hi,
> > I guess over time/releases less people mind the 2.2-stable.
> > But I still see a lot of people referring to 2.2 - so why not giving this
> > thread a ping again.
> >
> > ack / nack / opinions ?
>
> Hi Christian,
>
> We are interested in having a LTS/Stable tree.
I didn't notice this thread, otherwise, I would have commented earlier:
TBH, I have also thought of LTS tree few months before. But I was thinking,
hmm, it's just a library, what's the big deal of maintaining a stable
tree for it. I then hide it deep inside of my mind, silently.
> We have been looking at identifying a maintainer and validation engineer internally to support the effort but haven't be able to finalize that. Once we do we will come back to the mailing list with a proposal and a request for comments.
I would nominate myself as the LTS tree maintainer, if it makes sense
to have one.
> We would probably be looking at 16.04 or even 16.07 as the basis for the LTS at this stage.
Just one opinion from the view of vhost: since 16.07 is a vhost ABI/API
refactoring release, I'd suggest to base on 16.07, and then we could
have less conflicts to apply later bug fix patches.
However, I'm very open to choose any others as the base, say, even v2.2.
--yliu
> It would be great if we could get support from you or others as well.
>
> John.
> --
>
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH v4] i40e: configure MTU
@ 2016-05-20 15:17 4% ` Beilei Xing
0 siblings, 0 replies; 200+ results
From: Beilei Xing @ 2016-05-20 15:17 UTC (permalink / raw)
To: jingjing.wu; +Cc: dev, Beilei Xing
This patch enables configuring MTU for i40e.
Since changing MTU needs to reconfigure queue, stop port first
before configuring MTU.
Signed-off-by: Beilei Xing <beilei.xing@intel.com>
---
v4 changes:
Revert v2 change, if the port is running, return -EBUSY.
doc/guides/rel_notes/release_16_07.rst | 3 +++
drivers/net/i40e/i40e_ethdev.c | 34 ++++++++++++++++++++++++++++++++++
lib/librte_ether/rte_ethdev.h | 1 +
3 files changed, 38 insertions(+)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 30e78d4..4b1c176 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -116,6 +116,9 @@ API Changes
ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+* The function ``rte_eth_dev_set_mtu`` adds a new return value ``-EBUSY``, which
+ indicates the operation is forbidden because the port is running.
+
ABI Changes
-----------
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 24777d5..ffccaae 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -447,6 +447,8 @@ static int i40e_get_eeprom(struct rte_eth_dev *dev,
static void i40e_set_default_mac_addr(struct rte_eth_dev *dev,
struct ether_addr *mac_addr);
+static int i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
+
static const struct rte_pci_id pci_id_i40e_map[] = {
#define RTE_PCI_DEV_ID_DECL_I40E(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
#include "rte_pci_dev_ids.h"
@@ -520,6 +522,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
.get_eeprom_length = i40e_get_eeprom_length,
.get_eeprom = i40e_get_eeprom,
.mac_addr_set = i40e_set_default_mac_addr,
+ .mtu_set = i40e_dev_mtu_set,
};
/* store statistics names and its offset in stats structure */
@@ -9108,3 +9111,34 @@ static void i40e_set_default_mac_addr(struct rte_eth_dev *dev,
/* Flags: 0x3 updates port address */
i40e_aq_mac_address_write(hw, 0x3, mac_addr->addr_bytes, NULL);
}
+
+static int
+i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+ struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+ struct rte_eth_dev_data *dev_data = pf->dev_data;
+ uint32_t frame_size = mtu + ETHER_HDR_LEN
+ + ETHER_CRC_LEN + I40E_VLAN_TAG_SIZE;
+ int ret = 0;
+
+ /* check if mtu is within the allowed range */
+ if ((mtu < ETHER_MIN_MTU) || (frame_size > I40E_FRAME_SIZE_MAX))
+ return -EINVAL;
+
+ /* mtu setting is forbidden if port is start */
+ if (dev_data->dev_started) {
+ PMD_DRV_LOG(ERR,
+ "port %d must be stopped before configuration\n",
+ dev_data->port_id);
+ return -EBUSY;
+ }
+
+ if (frame_size > ETHER_MAX_LEN)
+ dev_data->dev_conf.rxmode.jumbo_frame = 1;
+ else
+ dev_data->dev_conf.rxmode.jumbo_frame = 0;
+
+ dev_data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
+
+ return ret;
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 2757510..a8d9963 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -2398,6 +2398,7 @@ int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu);
* - (-ENOTSUP) if operation is not supported.
* - (-ENODEV) if *port_id* invalid.
* - (-EINVAL) if *mtu* invalid.
+ * - (-EBUSY) if operation is not allowed when the port is running
*/
int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu);
--
2.5.0
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v2] doc: fix code section in abi versioning doc
@ 2016-05-20 14:08 13% John McNamara
0 siblings, 0 replies; 200+ results
From: John McNamara @ 2016-05-20 14:08 UTC (permalink / raw)
To: dev; +Cc: John McNamara
Fix broken cosole directive in the ABI validator section of the
ABI versioning docs.
Fixes: f1ef9794f9bd ("doc: add ABI guidelines")
Signed-off-by: John McNamara <john.mcnamara@intel.com>
---
v2: Added fixline.
doc/guides/contributing/versioning.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/guides/contributing/versioning.rst b/doc/guides/contributing/versioning.rst
index ae10a98..92b4d7c 100644
--- a/doc/guides/contributing/versioning.rst
+++ b/doc/guides/contributing/versioning.rst
@@ -475,7 +475,7 @@ Where ``REV1`` and ``REV2`` are valid gitrevisions(7)
https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html
on the local repo and target is the usual DPDK compilation target.
-For example:
+For example::
# Check between the previous and latest commit:
./scripts/validate-abi.sh HEAD~1 HEAD x86_64-native-linuxapp-gcc
--
2.5.0
^ permalink raw reply [relevance 13%]
* [dpdk-dev] [PATCH v1] doc: fix code section in abi versioning doc
@ 2016-05-20 13:51 13% John McNamara
0 siblings, 0 replies; 200+ results
From: John McNamara @ 2016-05-20 13:51 UTC (permalink / raw)
To: dev; +Cc: John McNamara
Fix broken cosole directive in the ABI validator section of the
ABI versioning docs.
Signed-off-by: John McNamara <john.mcnamara@intel.com>
---
doc/guides/contributing/versioning.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/guides/contributing/versioning.rst b/doc/guides/contributing/versioning.rst
index ae10a98..92b4d7c 100644
--- a/doc/guides/contributing/versioning.rst
+++ b/doc/guides/contributing/versioning.rst
@@ -475,7 +475,7 @@ Where ``REV1`` and ``REV2`` are valid gitrevisions(7)
https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html
on the local repo and target is the usual DPDK compilation target.
-For example:
+For example::
# Check between the previous and latest commit:
./scripts/validate-abi.sh HEAD~1 HEAD x86_64-native-linuxapp-gcc
--
2.5.0
^ permalink raw reply [relevance 13%]
* Re: [dpdk-dev] [PATCH v3 00/35] mempool: rework memory allocation
2016-05-19 12:47 0% ` [dpdk-dev] [PATCH v3 00/35] mempool: rework memory allocation Thomas Monjalon
@ 2016-05-20 8:42 0% ` Panu Matilainen
0 siblings, 0 replies; 200+ results
From: Panu Matilainen @ 2016-05-20 8:42 UTC (permalink / raw)
To: Thomas Monjalon, Olivier Matz; +Cc: dev, bruce.richardson, stephen, keith.wiles
On 05/19/2016 03:47 PM, Thomas Monjalon wrote:
> 2016-05-18 13:04, Olivier Matz:
>> This series is a rework of mempool. For those who don't want to read
>> all the cover letter, here is a sumary:
>>
>> - it is not possible to allocate large mempools if there is not enough
>> contiguous memory, this series solves this issue
>> - introduce new APIs with less arguments: "create, populate, obj_init"
>> - allow to free a mempool
>> - split code in smaller functions, will ease the introduction of ext_handler
>> - remove test-pmd anonymous mempool creation
>> - remove most of dom0-specific mempool code
>> - opens the door for a eal_memory rework: we probably don't need large
>> contiguous memory area anymore, working with pages would work.
>>
>> This breaks the ABI as it was indicated in the deprecation for 16.04.
>> The API stays almost the same, no modification is needed in examples app
>> or in test-pmd. Only kni and mellanox drivers are slightly modified.
>
> Applied with a small change you sent me to fix mlx build in the middle of the patchset
> and update the removed Xen files in MAINTAINERS file.
>
> Thanks for the big rework!
>
Just noticed this series "breaks" --no-huge as a regular user, commit
593a084afc2b to be exact:
mmap(NULL, 4194304, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_LOCKED, 0, 0) = -1 EAGAIN (Resource
temporarily unavailable)
write(1, "EAL: rte_eal_hugepage_init: mmap"..., 76EAL:
rte_eal_hugepage_init: mmap() failed: Resource temporarily unavailable
"Breaks" in quotes because I guess it always was broken (as the
non-locked pages might not be in physical memory) and because its
possible to adjust resourse limits to allow the operation to succeed.
If you're root, that is.
I was just looking into making the test-suite runnable by a regular user
with no special privileges, primarily to make it possible to run the
testsuite as part of rpm package builds (in %check), and no special
setup or extra privileges can be assumed there. Such tests are of course
of limited coverage but still better than nothing, and --no-huge was my
ticket there. Talk about bad timing :)
It'd be fine to have limited subset of tests to run when non-privileged
but since this one lives inside rte_eal_init() it practically prevents
everything, unless I'm missing some other magic switch or such. Thoughts?
- Panu -
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH v2] doc: announce ABI change of struct rte_port_source_params and rte_port_sink_params
2016-05-16 13:18 9% [dpdk-dev] [PATCH 0/2] doc: announce ABI change of struct rte_port_source_params Fan Zhang
2016-05-16 13:18 18% ` [dpdk-dev] [PATCH 1/2] " Fan Zhang
2016-05-16 13:18 18% ` [dpdk-dev] [PATCH 2/2] doc: announce ABI change of struct rte_port_sink_params Fan Zhang
@ 2016-05-19 14:18 20% ` Fan Zhang
2 siblings, 0 replies; 200+ results
From: Fan Zhang @ 2016-05-19 14:18 UTC (permalink / raw)
To: dev
The ABI changes are planned for rte_port_source_params and
rte_port_sink_params, which will be supported from release 16.11. Here
announces that ABI changes in detail.
Signed-off-by: Fan Zhang <roy.fan.zhang@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
doc/guides/rel_notes/deprecation.rst | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index fffe9c7..4f3fefe 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -74,3 +74,11 @@ Deprecation Notices
a handle, like the way kernel exposes an fd to user for locating a
specific file, and to keep all major structures internally, so that
we are likely to be free from ABI violations in future.
+
+* ABI will change for rte_port_source_params struct. The member file_name
+ data type will be changed from char * to const char *. This change targets
+ release 16.11
+
+* ABI will change for rte_port_sink_params struct. The member file_name
+ data type will be changed from char * to const char *. This change targets
+ release 16.11
--
2.5.5
^ permalink raw reply [relevance 20%]
* [dpdk-dev] mempool: external mempool manager
2016-04-14 13:57 2% ` [dpdk-dev] [PATCH v4 0/3] " Olivier Matz
@ 2016-05-19 13:44 2% ` David Hunt
2016-06-01 16:19 2% ` [dpdk-dev] [PATCH v6 0/5] mempool: add external mempool manager David Hunt
0 siblings, 2 replies; 200+ results
From: David Hunt @ 2016-05-19 13:44 UTC (permalink / raw)
To: dev; +Cc: olivier.matz, yuanhan.liu, pmatilai
Here's the latest version of the External Mempool Manager patchset.
It's re-based on top of the latest head as of 19/5/2016, including
Olivier's 35-part patch series on mempool re-org [1]
[1] http://dpdk.org/ml/archives/dev/2016-May/039229.html
v5 changes:
* rebasing, as it is dependent on another patch series [1]
v4 changes (Olivier Matz):
* remove the rte_mempool_create_ext() function. To change the handler, the
user has to do the following:
- mp = rte_mempool_create_empty()
- rte_mempool_set_handler(mp, "my_handler")
- rte_mempool_populate_default(mp)
This avoids to add another function with more than 10 arguments, duplicating
the doxygen comments
* change the api of rte_mempool_alloc_t: only the mempool pointer is required
as all information is available in it
* change the api of rte_mempool_free_t: remove return value
* move inline wrapper functions from the .c to the .h (else they won't be
inlined). This implies to have one header file (rte_mempool.h), or it
would have generate cross dependencies issues.
* remove now unused MEMPOOL_F_INT_HANDLER (note: it was misused anyway due
to the use of && instead of &)
* fix build in debug mode (__MEMPOOL_STAT_ADD(mp, put_pool, n) remaining)
* fix build with shared libraries (global handler has to be declared in
the .map file)
* rationalize #include order
* remove unused function rte_mempool_get_handler_name()
* rename some structures, fields, functions
* remove the static in front of rte_tailq_elem rte_mempool_tailq (comment
from Yuanhan)
* test the ext mempool handler in the same file than standard mempool tests,
avoiding to duplicate the code
* rework the custom handler in mempool_test
* rework a bit the patch selecting default mbuf pool handler
* fix some doxygen comments
v3 changes:
* simplified the file layout, renamed to rte_mempool_handler.[hc]
* moved the default handlers into rte_mempool_default.c
* moved the example handler out into app/test/test_ext_mempool.c
* removed is_mc/is_mp change, slight perf degredation on sp cached operation
* removed stack hanler, may re-introduce at a later date
* Changes out of code reviews
v2 changes:
* There was a lot of duplicate code between rte_mempool_xmem_create and
rte_mempool_create_ext. This has now been refactored and is now
hopefully cleaner.
* The RTE_NEXT_ABI define is now used to allow building of the library
in a format that is compatible with binaries built against previous
versions of DPDK.
* Changes out of code reviews. Hopefully I've got most of them included.
The External Mempool Manager is an extension to the mempool API that allows
users to add and use an external mempool manager, which allows external memory
subsystems such as external hardware memory management systems and software
based memory allocators to be used with DPDK.
The existing API to the internal DPDK mempool manager will remain unchanged
and will be backward compatible. However, there will be an ABI breakage, as
the mempool struct is changing. These changes are all contained withing
RTE_NEXT_ABI defs, and the current or next code can be changed with
the CONFIG_RTE_NEXT_ABI config setting
There are two aspects to external mempool manager.
1. Adding the code for your new mempool handler. This is achieved by adding a
new mempool handler source file into the librte_mempool library, and
using the REGISTER_MEMPOOL_HANDLER macro.
2. Using the new API to call rte_mempool_create_empty and
rte_mempool_set_handler to create a new mempool
using the name parameter to identify which handler to use.
New API calls added
1. A new rte_mempool_create_empty() function
2. rte_mempool_set_handler() which sets the mempool's handler
3. An rte_mempool_populate_default() and rte_mempool_populate_anon() functions
which populates the mempool using the relevant handler
Several external mempool managers may be used in the same application. A new
mempool can then be created by using the new 'create' function, providing the
mempool handler name to point the mempool to the relevant mempool manager
callback structure.
The old 'create' function can still be called by legacy programs, and will
internally work out the mempool handle based on the flags provided (single
producer, single consumer, etc). By default handles are created internally to
implement the built-in DPDK mempool manager and mempool types.
The external mempool manager needs to provide the following functions.
1. alloc - allocates the mempool memory, and adds each object onto a ring
2. put - puts an object back into the mempool once an application has
finished with it
3. get - gets an object from the mempool for use by the application
4. get_count - gets the number of available objects in the mempool
5. free - frees the mempool memory
Every time a get/put/get_count is called from the application/PMD, the
callback for that mempool is called. These functions are in the fastpath,
and any unoptimised handlers may limit performance.
The new APIs are as follows:
1. rte_mempool_create_empty
struct rte_mempool *
rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
int socket_id, unsigned flags);
2. rte_mempool_set_handler()
int
rte_mempool_set_handler(struct rte_mempool *mp, const char *name);
3. rte_mempool_populate_default()
int rte_mempool_populate_default(struct rte_mempool *mp);
4. rte_mempool_populate_anon()
int rte_mempool_populate_anon(struct rte_mempool *mp);
Please see rte_mempool.h for further information on the parameters.
The important thing to note is that the mempool handler is passed by name
to rte_mempool_set_handler, which looks through the handler array to
get the handler index, which is then stored in the rte_memool structure. This
allow multiple processes to use the same mempool, as the function pointers
are accessed via handler index.
The mempool handler structure contains callbacks to the implementation of
the handler, and is set up for registration as follows:
static const struct rte_mempool_handler handler_sp_mc = {
.name = "ring_sp_mc",
.alloc = rte_mempool_common_ring_alloc,
.put = common_ring_sp_put,
.get = common_ring_mc_get,
.get_count = common_ring_get_count,
.free = common_ring_free,
};
And then the following macro will register the handler in the array of handlers
REGISTER_MEMPOOL_HANDLER(handler_mp_mc);
For and example of a simple malloc based mempool manager, see
lib/librte_mempool/custom_mempool.c
For an example of API usage, please see app/test/test_mempool.c, which
implements a rudimentary "custom_handler" mempool manager using simple mallocs
for each mempool object. This file also contains the callbacks and self
registration for the new handler.
David Hunt (2):
mempool: support external handler
mbuf: get default mempool handler from configuration
Olivier Matz (1):
app/test: test external mempool handler
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] [PATCH] mbuf: make rearm_data address naturally aligned
2016-05-19 12:18 0% ` Ananyev, Konstantin
@ 2016-05-19 13:35 0% ` Jerin Jacob
0 siblings, 0 replies; 200+ results
From: Jerin Jacob @ 2016-05-19 13:35 UTC (permalink / raw)
To: Ananyev, Konstantin
Cc: Richardson, Bruce, dev, thomas.monjalon, viktorin, jianbo.liu
On Thu, May 19, 2016 at 12:18:57PM +0000, Ananyev, Konstantin wrote:
>
> Hi everyone,
>
> > On Thu, May 19, 2016 at 12:20:16AM +0530, Jerin Jacob wrote:
> > > On Wed, May 18, 2016 at 05:43:00PM +0100, Bruce Richardson wrote:
> > > > On Wed, May 18, 2016 at 07:27:43PM +0530, Jerin Jacob wrote:
> > > > > To avoid multiple stores on fast path, Ethernet drivers
> > > > > aggregate the writes to data_off, refcnt, nb_segs and port
> > > > > to an uint64_t data and write the data in one shot
> > > > > with uint64_t* at &mbuf->rearm_data address.
> > > > >
> > > > > Some of the non-IA platforms have store operation overhead
> > > > > if the store address is not naturally aligned.This patch
> > > > > fixes the performance issue on those targets.
> > > > >
> > > > > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> > > > > ---
> > > > >
> > > > > Tested this patch on IA and non-IA(ThunderX) platforms.
> > > > > This patch shows 400Kpps/core improvement on ThunderX + ixgbe + vector environment.
> > > > > and this patch does not have any overhead on IA platform.
> > > > >
> > > > > Have tried an another similar approach by replacing "buf_len" with "pad"
> > > > > (in this patch context),
> > > > > Since it has additional overhead on read and then mask to keep "buf_len" intact,
> > > > > not much improvement is not shown.
> > > > > ref: http://dpdk.org/ml/archives/dev/2016-May/038914.html
> > > > >
> > > > > ---
> > > > While this will work and from your tests doesn't seem to have a performance
> > > > impact, I'm not sure I particularly like it. It's extending out the end of
> > > > cacheline0 of the mbuf by 16 bytes, though I suppose it's not technically using
> > > > up any more space of it.
> > >
> > > Extending by 2 bytes. Right ?. Yes, I guess, Now we using only 56 out of 64 bytes
> > > in the first 64-byte cache line.
> > >
> > > >
> > > > What I'm wondering about though, is do we have any usecases where we need a
> > > > variable buf_len for packets for RX. These mbufs come directly from a mempool,
> > > > which is generally understood to be a set of fixed-sized buffers. I realise that
> > > > this change was made in the past after some discussion, but one of the key points
> > > > there [at least to my reading] was that - even though nobody actually made a
> > > > concrete case where they had variable-sized buffers - having support for them
> > > > made no performance difference.
>
> I was going to point to vhost zcp support, but as Thomas pointed out
> that functionality was removed from dpdk.org recently.
> So I am not aware does such case exist right now in the 'real world' or not.
> Though I still think RX function should leave buf_len field intact.
>
> > > >
> > > > The latter part of that has now changed, and supporting variable-sized mbufs
> > > > from an mbuf pool has a perf impact. Do we definitely need that functionality,
> > > > because the easiest fix here is just to move the rxrearm marker back above
> > > > mbuf_len as it was originally in releases like 1.8?
> > >
> > > And initialize the buf_len with mp->elt_size - sizeof(struct rte_mbuf).
> > > Right?
> > >
> > > I don't have a strong opinion on this, I can do this if there is no
> > > objection on this. Let me know.
> > >
> > > However, I do see in future, "buf_len" may belong at the end of the first 64 byte
> > > cache line as currently "port" is defined as uint8_t, IMO, that is less.
> > > We may need to increase that uint16_t. The reason why I think that
> > > because, Currently in ThunderX HW, we do have 128VFs per socket for
> > > built-in NIC, So, the two node configuration and one external PCIe NW card
> > > configuration can easily go beyond 256 ports.
>
> I wonder does anyone really use mbuf port field?
> My though was - could we to drop it completely?
> Actually, after discussing it with Bruce offline, an interesting idea came out:
> if we'll drop port and make mbuf_prefree() to reset nb_segs=1, then
> we can reduce RX rearm_data to 4B. So with that layout:
>
> struct rte_mbuf {
>
> MARKER cacheline0;
>
> void *buf_addr;
> phys_addr_t buf_physaddr;
> uint16_t buf_len;
> uint8_t nb_segs;
> uint8_t reserved_1byte; /* former port */
>
> MARKER32 rearm_data;
> uint16_t data_off;
> uint16_t refcnt;
>
> uint64_t ol_flags;
> ...
>
> We can keep buf_len at its place and avoid 2B gap, while making rearm_data
> 4B long and 4B aligned.
Couple of comments,
- IMO, It is good if nb_segs can move under rearm_data, as some
drivers(not in ixgbe may be) can write nb_segs in one shot also
in segmented rx handler case
- I think, it makes sense to keep port in mbuf so that application
can make use of it(Not sure what real application developers think of
this)
- if Writing 4B and 8B consume same cycles(at least in arm64) then I think it
makes sense to make it as 8B wide with maximum pre-built constants are possible.
>
> Another similar alternative, is to make mbuf_prefree() to set refcnt=1
> (as it update it anyway). Then we can remove refcnt from the RX rearm_data,
> and again make rearm_data 4B long and 4B aligned:
>
> struct rte_mbuf {
>
> MARKER cacheline0;
>
> void *buf_addr;
> phys_addr_t buf_physaddr;
> uint16_t buf_len;
> uint16_t refcnt;
>
> MARKER32 rearm_data;
> uint16_t data_off;
> uint8_t nb_segs;
> uint8_t port;
The only problem I think with this approach is that, port data type cannot be
extended to uint16_t in future.
>
> uint64_t ol_flags;
> ..
>
> As additional plus, __rte_mbuf_raw_alloc() wouldn't need to modify mbuf contents at all -
> which probably is a good thing.
> As a drawback - we'll have a free mbufs in pool with refcnt==1, which probably reduce
> debug ability of the mbuf code.
>
> Konstantin
>
> > >
> > Ok, good point. If you think it's needed, and if we are changing the mbuf
> > structure, it might be a good time to extend that field while you are at it, save
> > a second ABI break later on.
> >
> > /Bruce
> >
> > > >
> > > > Regards,
> > > > /Bruce
> > > >
> > > > Ref: http://dpdk.org/ml/archives/dev/2014-December/009432.html
> > > >
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH v3] ci: Add the class_id support in pci probe
2016-05-19 12:25 7% ` [dpdk-dev] [PATCH v2] ci: " Ziye Yang
@ 2016-05-19 13:17 7% ` Ziye Yang
2016-05-24 12:50 7% ` [dpdk-dev] [PATCH v4] Pci: Add the class_id support Ziye Yang
0 siblings, 1 reply; 200+ results
From: Ziye Yang @ 2016-05-19 13:17 UTC (permalink / raw)
To: dev
This patch is used to add the class_id (class_code,
subclass_code, programming_interface) support for
pci_device probe. With this patch, it will be
flexible for users to probe a class of devices
by class_id.
Signed-off-by: Ziye Yang <ziye.yang@intel.com>
---
doc/guides/rel_notes/deprecation.rst | 6 ------
lib/librte_eal/bsdapp/eal/eal_pci.c | 5 +++++
lib/librte_eal/common/eal_common_pci.c | 3 +++
lib/librte_eal/common/include/rte_pci.h | 8 ++++++--
lib/librte_eal/linuxapp/eal/eal_pci.c | 10 ++++++++++
5 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 7d94ba5..28f9c61 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -20,12 +20,6 @@ Deprecation Notices
do not need to care about the kind of devices that are being used, making it
easier to add new buses later.
-* ABI changes are planned for struct rte_pci_id, i.e., add new field ``class``.
- This new added ``class`` field can be used to probe pci device by class
- related info. This change should impact size of struct rte_pci_id and struct
- rte_pci_device. The release 16.04 does not contain these ABI changes, but
- release 16.07 will.
-
* The xstats API and rte_eth_xstats struct will be changed to allow retrieval
of values without any string copies or parsing.
No backwards compatibility is planned, as it would require code duplication
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 2d16d78..7fdd6f1 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -278,6 +278,11 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
/* get subsystem_device id */
dev->id.subsystem_device_id = conf->pc_subdevice;
+ /* get class id */
+ dev->id.class_id = (conf->pc_class << 16) |
+ (conf->pc_subclass << 8) |
+ (conf->pc_progif);
+
/* TODO: get max_vfs */
dev->max_vfs = 0;
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index 3cae4cb..6c3117d 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -162,6 +162,9 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
if (id_table->subsystem_device_id != dev->id.subsystem_device_id &&
id_table->subsystem_device_id != PCI_ANY_ID)
continue;
+ if (id_table->class_id != dev->id.class_id &&
+ id_table->class_id != RTE_CLASS_ANY_ID)
+ continue;
struct rte_pci_addr *loc = &dev->addr;
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 8fa2712..c30adaf 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -125,6 +125,7 @@ struct rte_pci_resource {
* table of these IDs for each device that it supports.
*/
struct rte_pci_id {
+ uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */
uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
@@ -170,6 +171,7 @@ struct rte_pci_device {
/** Any PCI device identifier (vendor, device, ...) */
#define PCI_ANY_ID (0xffff)
+#define RTE_CLASS_ANY_ID (0xffffff)
#ifdef __cplusplus
/** C++ macro used to help building up tables of device IDs */
@@ -177,14 +179,16 @@ struct rte_pci_device {
(vend), \
(dev), \
PCI_ANY_ID, \
- PCI_ANY_ID
+ PCI_ANY_ID, \
+ RTE_CLASS_ANY_ID
#else
/** Macro used to help building up tables of device IDs */
#define RTE_PCI_DEVICE(vend, dev) \
.vendor_id = (vend), \
.device_id = (dev), \
.subsystem_vendor_id = PCI_ANY_ID, \
- .subsystem_device_id = PCI_ANY_ID
+ .subsystem_device_id = PCI_ANY_ID, \
+ .class_id = RTE_CLASS_ANY_ID
#endif
struct rte_pci_driver;
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index bdc08a0..e6f0f13 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -306,6 +306,16 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
}
dev->id.subsystem_device_id = (uint16_t)tmp;
+ /* get class_id */
+ snprintf(filename, sizeof(filename), "%s/class",
+ dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ /* the least 24 bits are valid: class, subclass, program interface */
+ dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID;
+
/* get max_vfs */
dev->max_vfs = 0;
snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
--
1.9.3
^ permalink raw reply [relevance 7%]
* Re: [dpdk-dev] [PATCH v3 00/35] mempool: rework memory allocation
2016-05-18 11:04 2% ` [dpdk-dev] [PATCH v3 00/35] " Olivier Matz
2016-05-18 11:04 10% ` [dpdk-dev] [PATCH v3 35/35] doc: update release notes about mempool allocation Olivier Matz
@ 2016-05-19 12:47 0% ` Thomas Monjalon
2016-05-20 8:42 0% ` Panu Matilainen
1 sibling, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-05-19 12:47 UTC (permalink / raw)
To: Olivier Matz; +Cc: dev, bruce.richardson, stephen, keith.wiles
2016-05-18 13:04, Olivier Matz:
> This series is a rework of mempool. For those who don't want to read
> all the cover letter, here is a sumary:
>
> - it is not possible to allocate large mempools if there is not enough
> contiguous memory, this series solves this issue
> - introduce new APIs with less arguments: "create, populate, obj_init"
> - allow to free a mempool
> - split code in smaller functions, will ease the introduction of ext_handler
> - remove test-pmd anonymous mempool creation
> - remove most of dom0-specific mempool code
> - opens the door for a eal_memory rework: we probably don't need large
> contiguous memory area anymore, working with pages would work.
>
> This breaks the ABI as it was indicated in the deprecation for 16.04.
> The API stays almost the same, no modification is needed in examples app
> or in test-pmd. Only kni and mellanox drivers are slightly modified.
Applied with a small change you sent me to fix mlx build in the middle of the patchset
and update the removed Xen files in MAINTAINERS file.
Thanks for the big rework!
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 1/2] mbuf: new NSH packet type
@ 2016-05-19 12:26 4% ` Olivier Matz
0 siblings, 0 replies; 200+ results
From: Olivier Matz @ 2016-05-19 12:26 UTC (permalink / raw)
To: Jingjing Wu, helin.zhang; +Cc: dev
Hi Jingjing,
On 05/03/2016 07:51 AM, Jingjing Wu wrote:
> Signed-off-by: Jingjing Wu <jingjing.wu@intel.com>
> ---
> lib/librte_mbuf/rte_mbuf.h | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 529debb..79edae3 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -274,6 +274,13 @@ extern "C" {
> */
> #define RTE_PTYPE_L2_ETHER_LLDP 0x00000004
> /**
> + * NSH (Network Service Header) packet type.
> + *
> + * Packet format:
> + * <'ether type'=0x894F>
> + */
> +#define RTE_PTYPE_L2_ETHER_NSH 0x00000005
> +/**
> * Mask of layer 2 packet types.
> * It is used for outer packet for tunneling cases.
> */
>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
I have no objection for this patch, but it makes me think about
2 things:
- we have the room for 16 types for each layer, maybe we should
start to be careful about which types should be supported to
avoid running out of types in the future.
- The types supported in outer and inner have diverged. It would
have been better to have something like:
#define RTE_PTYPE_INNER_$type (RTE_PTYPE_$type << 16)
Because it would make the software using the packet types
simpler.
It's maybe a bit late now because it would break the ABI, but
this is something we could keep in mind in case we change the
ABI for another reason.
Regards,
Olivier
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v2] ci: Add the class_id support in pci probe
2016-05-11 15:21 3% ` Stephen Hemminger
@ 2016-05-19 12:25 7% ` Ziye Yang
2016-05-19 13:17 7% ` [dpdk-dev] [PATCH v3] " Ziye Yang
1 sibling, 1 reply; 200+ results
From: Ziye Yang @ 2016-05-19 12:25 UTC (permalink / raw)
To: dev
This patch is used to add the class_id (class_code,
subclass_code, programming_interface) support for
pci_device probe. With this patch, it will be
flexible for users to probe a class of devices
by class_id.
Signed-off-by: Ziye Yang <ziye.yang@intel.com>
---
doc/guides/rel_notes/deprecation.rst | 6 ------
lib/librte_eal/bsdapp/eal/eal_pci.c | 5 +++++
lib/librte_eal/common/eal_common_pci.c | 3 +++
lib/librte_eal/common/include/rte_pci.h | 8 ++++++--
lib/librte_eal/linuxapp/eal/eal_pci.c | 9 +++++++++
5 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 7d94ba5..28f9c61 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -20,12 +20,6 @@ Deprecation Notices
do not need to care about the kind of devices that are being used, making it
easier to add new buses later.
-* ABI changes are planned for struct rte_pci_id, i.e., add new field ``class``.
- This new added ``class`` field can be used to probe pci device by class
- related info. This change should impact size of struct rte_pci_id and struct
- rte_pci_device. The release 16.04 does not contain these ABI changes, but
- release 16.07 will.
-
* The xstats API and rte_eth_xstats struct will be changed to allow retrieval
of values without any string copies or parsing.
No backwards compatibility is planned, as it would require code duplication
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 2d16d78..7fdd6f1 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -278,6 +278,11 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
/* get subsystem_device id */
dev->id.subsystem_device_id = conf->pc_subdevice;
+ /* get class id */
+ dev->id.class_id = (conf->pc_class << 16) |
+ (conf->pc_subclass << 8) |
+ (conf->pc_progif);
+
/* TODO: get max_vfs */
dev->max_vfs = 0;
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index 3cae4cb..6c3117d 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -162,6 +162,9 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
if (id_table->subsystem_device_id != dev->id.subsystem_device_id &&
id_table->subsystem_device_id != PCI_ANY_ID)
continue;
+ if (id_table->class_id != dev->id.class_id &&
+ id_table->class_id != RTE_CLASS_ANY_ID)
+ continue;
struct rte_pci_addr *loc = &dev->addr;
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 8fa2712..c30adaf 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -125,6 +125,7 @@ struct rte_pci_resource {
* table of these IDs for each device that it supports.
*/
struct rte_pci_id {
+ uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */
uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
@@ -170,6 +171,7 @@ struct rte_pci_device {
/** Any PCI device identifier (vendor, device, ...) */
#define PCI_ANY_ID (0xffff)
+#define RTE_CLASS_ANY_ID (0xffffff)
#ifdef __cplusplus
/** C++ macro used to help building up tables of device IDs */
@@ -177,14 +179,16 @@ struct rte_pci_device {
(vend), \
(dev), \
PCI_ANY_ID, \
- PCI_ANY_ID
+ PCI_ANY_ID, \
+ RTE_CLASS_ANY_ID
#else
/** Macro used to help building up tables of device IDs */
#define RTE_PCI_DEVICE(vend, dev) \
.vendor_id = (vend), \
.device_id = (dev), \
.subsystem_vendor_id = PCI_ANY_ID, \
- .subsystem_device_id = PCI_ANY_ID
+ .subsystem_device_id = PCI_ANY_ID, \
+ .class_id = RTE_CLASS_ANY_ID
#endif
struct rte_pci_driver;
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index bdc08a0..ff255b4 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -306,6 +306,15 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
}
dev->id.subsystem_device_id = (uint16_t)tmp;
+ /* get class_id */
+ snprintf(filename, sizeof(filename), "%s/class",
+ dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID;
+
/* get max_vfs */
dev->max_vfs = 0;
snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
--
1.9.3
^ permalink raw reply [relevance 7%]
* Re: [dpdk-dev] [PATCH] mbuf: make rearm_data address naturally aligned
2016-05-19 8:50 3% ` Bruce Richardson
2016-05-19 11:54 0% ` Jan Viktorin
@ 2016-05-19 12:18 0% ` Ananyev, Konstantin
2016-05-19 13:35 0% ` Jerin Jacob
1 sibling, 1 reply; 200+ results
From: Ananyev, Konstantin @ 2016-05-19 12:18 UTC (permalink / raw)
To: Richardson, Bruce, Jerin Jacob; +Cc: dev, thomas.monjalon, viktorin, jianbo.liu
Hi everyone,
> On Thu, May 19, 2016 at 12:20:16AM +0530, Jerin Jacob wrote:
> > On Wed, May 18, 2016 at 05:43:00PM +0100, Bruce Richardson wrote:
> > > On Wed, May 18, 2016 at 07:27:43PM +0530, Jerin Jacob wrote:
> > > > To avoid multiple stores on fast path, Ethernet drivers
> > > > aggregate the writes to data_off, refcnt, nb_segs and port
> > > > to an uint64_t data and write the data in one shot
> > > > with uint64_t* at &mbuf->rearm_data address.
> > > >
> > > > Some of the non-IA platforms have store operation overhead
> > > > if the store address is not naturally aligned.This patch
> > > > fixes the performance issue on those targets.
> > > >
> > > > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> > > > ---
> > > >
> > > > Tested this patch on IA and non-IA(ThunderX) platforms.
> > > > This patch shows 400Kpps/core improvement on ThunderX + ixgbe + vector environment.
> > > > and this patch does not have any overhead on IA platform.
> > > >
> > > > Have tried an another similar approach by replacing "buf_len" with "pad"
> > > > (in this patch context),
> > > > Since it has additional overhead on read and then mask to keep "buf_len" intact,
> > > > not much improvement is not shown.
> > > > ref: http://dpdk.org/ml/archives/dev/2016-May/038914.html
> > > >
> > > > ---
> > > While this will work and from your tests doesn't seem to have a performance
> > > impact, I'm not sure I particularly like it. It's extending out the end of
> > > cacheline0 of the mbuf by 16 bytes, though I suppose it's not technically using
> > > up any more space of it.
> >
> > Extending by 2 bytes. Right ?. Yes, I guess, Now we using only 56 out of 64 bytes
> > in the first 64-byte cache line.
> >
> > >
> > > What I'm wondering about though, is do we have any usecases where we need a
> > > variable buf_len for packets for RX. These mbufs come directly from a mempool,
> > > which is generally understood to be a set of fixed-sized buffers. I realise that
> > > this change was made in the past after some discussion, but one of the key points
> > > there [at least to my reading] was that - even though nobody actually made a
> > > concrete case where they had variable-sized buffers - having support for them
> > > made no performance difference.
I was going to point to vhost zcp support, but as Thomas pointed out
that functionality was removed from dpdk.org recently.
So I am not aware does such case exist right now in the 'real world' or not.
Though I still think RX function should leave buf_len field intact.
> > >
> > > The latter part of that has now changed, and supporting variable-sized mbufs
> > > from an mbuf pool has a perf impact. Do we definitely need that functionality,
> > > because the easiest fix here is just to move the rxrearm marker back above
> > > mbuf_len as it was originally in releases like 1.8?
> >
> > And initialize the buf_len with mp->elt_size - sizeof(struct rte_mbuf).
> > Right?
> >
> > I don't have a strong opinion on this, I can do this if there is no
> > objection on this. Let me know.
> >
> > However, I do see in future, "buf_len" may belong at the end of the first 64 byte
> > cache line as currently "port" is defined as uint8_t, IMO, that is less.
> > We may need to increase that uint16_t. The reason why I think that
> > because, Currently in ThunderX HW, we do have 128VFs per socket for
> > built-in NIC, So, the two node configuration and one external PCIe NW card
> > configuration can easily go beyond 256 ports.
I wonder does anyone really use mbuf port field?
My though was - could we to drop it completely?
Actually, after discussing it with Bruce offline, an interesting idea came out:
if we'll drop port and make mbuf_prefree() to reset nb_segs=1, then
we can reduce RX rearm_data to 4B. So with that layout:
struct rte_mbuf {
MARKER cacheline0;
void *buf_addr;
phys_addr_t buf_physaddr;
uint16_t buf_len;
uint8_t nb_segs;
uint8_t reserved_1byte; /* former port */
MARKER32 rearm_data;
uint16_t data_off;
uint16_t refcnt;
uint64_t ol_flags;
...
We can keep buf_len at its place and avoid 2B gap, while making rearm_data
4B long and 4B aligned.
Another similar alternative, is to make mbuf_prefree() to set refcnt=1
(as it update it anyway). Then we can remove refcnt from the RX rearm_data,
and again make rearm_data 4B long and 4B aligned:
struct rte_mbuf {
MARKER cacheline0;
void *buf_addr;
phys_addr_t buf_physaddr;
uint16_t buf_len;
uint16_t refcnt;
MARKER32 rearm_data;
uint16_t data_off;
uint8_t nb_segs;
uint8_t port;
uint64_t ol_flags;
..
As additional plus, __rte_mbuf_raw_alloc() wouldn't need to modify mbuf contents at all -
which probably is a good thing.
As a drawback - we'll have a free mbufs in pool with refcnt==1, which probably reduce
debug ability of the mbuf code.
Konstantin
> >
> Ok, good point. If you think it's needed, and if we are changing the mbuf
> structure, it might be a good time to extend that field while you are at it, save
> a second ABI break later on.
>
> /Bruce
>
> > >
> > > Regards,
> > > /Bruce
> > >
> > > Ref: http://dpdk.org/ml/archives/dev/2014-December/009432.html
> > >
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] mbuf: make rearm_data address naturally aligned
2016-05-19 8:50 3% ` Bruce Richardson
@ 2016-05-19 11:54 0% ` Jan Viktorin
2016-05-19 12:18 0% ` Ananyev, Konstantin
1 sibling, 0 replies; 200+ results
From: Jan Viktorin @ 2016-05-19 11:54 UTC (permalink / raw)
To: Bruce Richardson
Cc: Jerin Jacob, dev, thomas.monjalon, konstantin.ananyev, jianbo.liu
On Thu, 19 May 2016 09:50:48 +0100
Bruce Richardson <bruce.richardson@intel.com> wrote:
> On Thu, May 19, 2016 at 12:20:16AM +0530, Jerin Jacob wrote:
> > On Wed, May 18, 2016 at 05:43:00PM +0100, Bruce Richardson wrote:
> > > On Wed, May 18, 2016 at 07:27:43PM +0530, Jerin Jacob wrote:
> > > > To avoid multiple stores on fast path, Ethernet drivers
> > > > aggregate the writes to data_off, refcnt, nb_segs and port
> > > > to an uint64_t data and write the data in one shot
> > > > with uint64_t* at &mbuf->rearm_data address.
> > > >
> > > > Some of the non-IA platforms have store operation overhead
> > > > if the store address is not naturally aligned.This patch
> > > > fixes the performance issue on those targets.
> > > >
> > > > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> > > > ---
> > > >
> > > > Tested this patch on IA and non-IA(ThunderX) platforms.
> > > > This patch shows 400Kpps/core improvement on ThunderX + ixgbe + vector environment.
> > > > and this patch does not have any overhead on IA platform.
Hello,
I can confirm a very small improvement in our synthetic tests based on the PMD
null (ARM Cortex-A9). For a single-core (1C) test, there is now a lower overhead
and it is more stable with different packet lengths. However, when running dual-core
(2C), the result is slightly slower but again, it seems to be more stable.
Without this patch (cycles per packet):
length: 64 128 256 512 1024 1280 1518
1C 488 544 487 454 543 488 515
2C 433 433 431 433 433 461 443
Applied this patch (cycles per packet):
length: 64 128 256 512 1024 1280 1518
1C 472 472 472 472 473 472 473
2C 435 435 435 435 436 436 436
Regards
Jan
> > > >
> > > > Have tried an another similar approach by replacing "buf_len" with "pad"
> > > > (in this patch context),
> > > > Since it has additional overhead on read and then mask to keep "buf_len" intact,
> > > > not much improvement is not shown.
> > > > ref: http://dpdk.org/ml/archives/dev/2016-May/038914.html
> > > >
> > > > ---
> > > While this will work and from your tests doesn't seem to have a performance
> > > impact, I'm not sure I particularly like it. It's extending out the end of
> > > cacheline0 of the mbuf by 16 bytes, though I suppose it's not technically using
> > > up any more space of it.
> >
> > Extending by 2 bytes. Right ?. Yes, I guess, Now we using only 56 out of 64 bytes
> > in the first 64-byte cache line.
> >
> > >
> > > What I'm wondering about though, is do we have any usecases where we need a
> > > variable buf_len for packets for RX. These mbufs come directly from a mempool,
> > > which is generally understood to be a set of fixed-sized buffers. I realise that
> > > this change was made in the past after some discussion, but one of the key points
> > > there [at least to my reading] was that - even though nobody actually made a
> > > concrete case where they had variable-sized buffers - having support for them
> > > made no performance difference.
> > >
> > > The latter part of that has now changed, and supporting variable-sized mbufs
> > > from an mbuf pool has a perf impact. Do we definitely need that functionality,
> > > because the easiest fix here is just to move the rxrearm marker back above
> > > mbuf_len as it was originally in releases like 1.8?
> >
> > And initialize the buf_len with mp->elt_size - sizeof(struct rte_mbuf).
> > Right?
> >
> > I don't have a strong opinion on this, I can do this if there is no
> > objection on this. Let me know.
> >
> > However, I do see in future, "buf_len" may belong at the end of the first 64 byte
> > cache line as currently "port" is defined as uint8_t, IMO, that is less.
> > We may need to increase that uint16_t. The reason why I think that
> > because, Currently in ThunderX HW, we do have 128VFs per socket for
> > built-in NIC, So, the two node configuration and one external PCIe NW card
> > configuration can easily go beyond 256 ports.
> >
> Ok, good point. If you think it's needed, and if we are changing the mbuf
> structure, it might be a good time to extend that field while you are at it, save
> a second ABI break later on.
>
> /Bruce
>
> > >
> > > Regards,
> > > /Bruce
> > >
> > > Ref: http://dpdk.org/ml/archives/dev/2014-December/009432.html
> > >
--
Jan Viktorin E-mail: Viktorin@RehiveTech.com
System Architect Web: www.RehiveTech.com
RehiveTech
Brno, Czech Republic
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] mbuf: make rearm_data address naturally aligned
@ 2016-05-19 8:50 3% ` Bruce Richardson
2016-05-19 11:54 0% ` Jan Viktorin
2016-05-19 12:18 0% ` Ananyev, Konstantin
0 siblings, 2 replies; 200+ results
From: Bruce Richardson @ 2016-05-19 8:50 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, konstantin.ananyev, viktorin, jianbo.liu
On Thu, May 19, 2016 at 12:20:16AM +0530, Jerin Jacob wrote:
> On Wed, May 18, 2016 at 05:43:00PM +0100, Bruce Richardson wrote:
> > On Wed, May 18, 2016 at 07:27:43PM +0530, Jerin Jacob wrote:
> > > To avoid multiple stores on fast path, Ethernet drivers
> > > aggregate the writes to data_off, refcnt, nb_segs and port
> > > to an uint64_t data and write the data in one shot
> > > with uint64_t* at &mbuf->rearm_data address.
> > >
> > > Some of the non-IA platforms have store operation overhead
> > > if the store address is not naturally aligned.This patch
> > > fixes the performance issue on those targets.
> > >
> > > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> > > ---
> > >
> > > Tested this patch on IA and non-IA(ThunderX) platforms.
> > > This patch shows 400Kpps/core improvement on ThunderX + ixgbe + vector environment.
> > > and this patch does not have any overhead on IA platform.
> > >
> > > Have tried an another similar approach by replacing "buf_len" with "pad"
> > > (in this patch context),
> > > Since it has additional overhead on read and then mask to keep "buf_len" intact,
> > > not much improvement is not shown.
> > > ref: http://dpdk.org/ml/archives/dev/2016-May/038914.html
> > >
> > > ---
> > While this will work and from your tests doesn't seem to have a performance
> > impact, I'm not sure I particularly like it. It's extending out the end of
> > cacheline0 of the mbuf by 16 bytes, though I suppose it's not technically using
> > up any more space of it.
>
> Extending by 2 bytes. Right ?. Yes, I guess, Now we using only 56 out of 64 bytes
> in the first 64-byte cache line.
>
> >
> > What I'm wondering about though, is do we have any usecases where we need a
> > variable buf_len for packets for RX. These mbufs come directly from a mempool,
> > which is generally understood to be a set of fixed-sized buffers. I realise that
> > this change was made in the past after some discussion, but one of the key points
> > there [at least to my reading] was that - even though nobody actually made a
> > concrete case where they had variable-sized buffers - having support for them
> > made no performance difference.
> >
> > The latter part of that has now changed, and supporting variable-sized mbufs
> > from an mbuf pool has a perf impact. Do we definitely need that functionality,
> > because the easiest fix here is just to move the rxrearm marker back above
> > mbuf_len as it was originally in releases like 1.8?
>
> And initialize the buf_len with mp->elt_size - sizeof(struct rte_mbuf).
> Right?
>
> I don't have a strong opinion on this, I can do this if there is no
> objection on this. Let me know.
>
> However, I do see in future, "buf_len" may belong at the end of the first 64 byte
> cache line as currently "port" is defined as uint8_t, IMO, that is less.
> We may need to increase that uint16_t. The reason why I think that
> because, Currently in ThunderX HW, we do have 128VFs per socket for
> built-in NIC, So, the two node configuration and one external PCIe NW card
> configuration can easily go beyond 256 ports.
>
Ok, good point. If you think it's needed, and if we are changing the mbuf
structure, it might be a good time to extend that field while you are at it, save
a second ABI break later on.
/Bruce
> >
> > Regards,
> > /Bruce
> >
> > Ref: http://dpdk.org/ml/archives/dev/2014-December/009432.html
> >
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH v3 35/35] doc: update release notes about mempool allocation
2016-05-18 11:04 2% ` [dpdk-dev] [PATCH v3 00/35] " Olivier Matz
@ 2016-05-18 11:04 10% ` Olivier Matz
2016-05-19 12:47 0% ` [dpdk-dev] [PATCH v3 00/35] mempool: rework memory allocation Thomas Monjalon
1 sibling, 0 replies; 200+ results
From: Olivier Matz @ 2016-05-18 11:04 UTC (permalink / raw)
To: dev; +Cc: bruce.richardson, stephen, keith.wiles
Remove the deprecation notice and add an entry in the release note
for the changes in mempool allocation.
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
doc/guides/rel_notes/deprecation.rst | 8 --------
doc/guides/rel_notes/release_16_07.rst | 9 +++++++++
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 7d94ba5..ad05eba 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -51,14 +51,6 @@ Deprecation Notices
functions added to facilitate the creation of mempools using an external
handler. The 16.07 release will contain these changes.
-* The rte_mempool allocation will be changed in 16.07:
- allocation of large mempool in several virtual memory chunks, new API
- to populate a mempool, new API to free a mempool, allocation in
- anonymous mapping, drop of specific dom0 code. These changes will
- induce a modification of the rte_mempool structure, plus a
- modification of the API of rte_mempool_obj_iter(), implying a breakage
- of the ABI.
-
* A librte_vhost public structures refactor is planned for DPDK 16.07
that requires both ABI and API change.
The proposed refactor would expose DPDK vhost dev to applications as
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 58c8ef9..6cb5304 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -38,6 +38,15 @@ New Features
The size of the mempool structure is reduced if the per-lcore cache is disabled.
+* **Changed the memory allocation in mempool library.**
+
+ * Added ability to allocate a large mempool in virtually fragmented memory.
+ * Added new APIs to populate a mempool with memory.
+ * Added an API to free a mempool.
+ * Modified the API of rte_mempool_obj_iter() function.
+ * Dropped specific Xen Dom0 code.
+ * Dropped specific anonymous mempool code in testpmd.
+
Resolved Issues
---------------
--
2.8.0.rc3
^ permalink raw reply [relevance 10%]
* [dpdk-dev] [PATCH v3 00/35] mempool: rework memory allocation
2016-04-14 10:19 2% ` [dpdk-dev] [PATCH 00/36] " Olivier Matz
2016-04-14 13:50 0% ` Wiles, Keith
@ 2016-05-18 11:04 2% ` Olivier Matz
2016-05-18 11:04 10% ` [dpdk-dev] [PATCH v3 35/35] doc: update release notes about mempool allocation Olivier Matz
2016-05-19 12:47 0% ` [dpdk-dev] [PATCH v3 00/35] mempool: rework memory allocation Thomas Monjalon
1 sibling, 2 replies; 200+ results
From: Olivier Matz @ 2016-05-18 11:04 UTC (permalink / raw)
To: dev; +Cc: bruce.richardson, stephen, keith.wiles
This series is a rework of mempool. For those who don't want to read
all the cover letter, here is a sumary:
- it is not possible to allocate large mempools if there is not enough
contiguous memory, this series solves this issue
- introduce new APIs with less arguments: "create, populate, obj_init"
- allow to free a mempool
- split code in smaller functions, will ease the introduction of ext_handler
- remove test-pmd anonymous mempool creation
- remove most of dom0-specific mempool code
- opens the door for a eal_memory rework: we probably don't need large
contiguous memory area anymore, working with pages would work.
This breaks the ABI as it was indicated in the deprecation for 16.04.
The API stays almost the same, no modification is needed in examples app
or in test-pmd. Only kni and mellanox drivers are slightly modified.
Changes v2 -> v3:
- fix some checkpatch issues
- rework titles and commit logs
- fix compilation with debug + shared libraries:
rte_mempool_check_cookies() must be exported
- rebase on head
Changes v1 -> v2:
- do not change the place of __rte_unused in txq_mp2mr_mbuf_check(),
as suggested by Keith.
Changes RFC -> v1:
- remove the rte_deconst macro, and remove some const qualifier in
dump/audit functions
- rework modifications in mellanox drivers to ensure the mempool is
virtually contiguous
- fix mempool memory chunk iteration (bad pointer was used)
- fix compilation on freebsd: replace MAP_LOCKED flag by mlock()
- fix compilation on tilera (pointer arithmetics)
- slightly rework and clean the mempool autotest
- fix mempool autotest on bsd
- more validation (especially mellanox drivers and kni that were not
tested in RFC)
- passed autotests (x86_64-native-linuxapp-gcc and x86_64-native-bsdapp-gcc)
- rebase on head, reorder the patches a bit and fix minor split issues
Description of the initial issue
--------------------------------
The allocation of mbuf pool can fail even if there is enough memory.
The problem is related to the way the memory is allocated and used in
dpdk. It is particularly annoying with mbuf pools, but it can also fail
in other use cases allocating a large amount of memory.
- rte_malloc() allocates physically contiguous memory, which is needed
for mempools, but useless most of the time.
Allocating a large physically contiguous zone is often impossible
because the system provide hugepages which may not be contiguous.
- rte_mempool_create() (and therefore rte_pktmbuf_pool_create())
requires a physically contiguous zone.
- rte_mempool_xmem_create() does not solve the issue as it still
needs the memory to be virtually contiguous, and there is no
way in dpdk to allocate a virtually contiguous memory that is
not also physically contiguous.
How to reproduce the issue
--------------------------
- start the dpdk with some 2MB hugepages (it can also occur with 1GB)
- allocate a large mempool
- even if there is enough memory, the allocation can fail
Example:
git clone http://dpdk.org/git/dpdk
cd dpdk
make config T=x86_64-native-linuxapp-gcc
make -j32
mkdir -p /mnt/huge
mount -t hugetlbfs nodev /mnt/huge
echo 256 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
# we try to allocate a mempool whose size is ~450MB, it fails
./build/app/testpmd -l 2,4 -- --total-num-mbufs=200000 -i
The EAL logs "EAL: Virtual area found at..." shows that there are
several zones, but all smaller than 450MB.
Workarounds:
- Use 1GB hugepages: it sometimes work, but for very large
pools (millions of mbufs) there is the same issue. Moreover,
it would consume 1GB memory at least which can be a lot
in some cases.
- Reboot the machine or allocate hugepages at boot time: this increases
the chances to have more contiguous memory, but does not completely
solve the issue
Solutions
---------
Below is a list of proposed solutions. I implemented a quick and dirty
PoC of solution 1, but it's not working in all conditions and it's
really an ugly hack. This series implement the solution 4 which looks
the best to me, knowing it does not prevent to do more enhancements
in dpdk memory in the future (solution 3 for instance).
Solution 1: in application
--------------------------
- allocate several hugepages using rte_malloc() or rte_memzone_reserve()
(only keeping complete hugepages)
- parse memsegs and /proc/maps to check which files mmaps these pages
- mmap the files in a contiguous virtual area
- use rte_mempool_xmem_create()
Cons:
- 1a. parsing the memsegs of rte config in the application does not
use a public API, and can be broken if internal dpdk code changes
- 1b. some memory is lost due to malloc headers. Also, if the memory is
very fragmented (ex: all 2MB pages are physically separated), it does
not work at all because we cannot get any complete page. It is not
possible to use a lower level allocator since commit fafcc11985a.
- 1c. we cannot use rte_pktmbuf_pool_create(), so we need to use mempool
api and do a part of the job manually
- 1d. it breaks secondary processes as the virtual addresses won't be
mmap'd at the same place in secondary process
- 1e. it only fixes the issue for the mbuf pool of the application,
internal pools in dpdk libraries are not modified
- 1f. this is a pure linux solution (rte_map files)
- 1g. The application has to be aware of RTE_EAL_SINGLE_SEGMENTS option
that changes the way hugepages are mapped. By the way, it's strange
to have such a compile-time option, we should probably have only
one behavior that works all the time.
Solution 2: in dpdk memory allocator
------------------------------------
- do the same than solution 1 in a new function rte_malloc_non_contig():
allocate several chunks and mmap them in a contiguous virtual memory
- a flag has to be added in malloc header to do the proper cleanup in
rte_free() (free all the chunks, munmap the memory)
- introduce a new rte_mem_get_physmap(*physmap,addr, len) that returns
the virt2phys mapping of a virtual area in dpdk
- add a mempool flag MEMPOOL_F_NON_PHYS_CONTIG to use
rte_malloc_non_contig() to allocate the area storing the objects
Cons:
- 2a. same than 1b: it breaks secondary processes if the mempool flag is
used.
- 2b. same as 1d: some memory is lost due to malloc headers, and it
cannot work if memory is too fragmented.
- 2c. rte_malloc_virt2phy() cannot be used on these zones. It would
return the physical address of the first page. It would be better to
return an error in this case.
- 2d. need to check how to implement this on bsd (TBD)
Solution 3: in dpdk eal memory
------------------------------
- Rework the way hugepages are mmap'd in dpdk: instead of having several
rte_map* files, just mmap one file per node. It may drastically
simplify EAL memory management in dpdk.
- An API should be added to retrieve the physical mapping of a virtual
area (ex: rte_mem_get_physmap(*physmap, addr, len))
- rte_malloc() and rte_memzone_reserve() won't allocate physically
contiguous memory anymore (TBD)
- Update mempool to always use the rte_mempool_xmem_create() version
Cons:
- 3a. lot of rework in eal memory, it will induce some behavior changes
and maybe api changes
- 3b. possible conflicts with xen_dom0 mempool
Solution 4: in mempool
----------------------
- Introduce a new API to fill a mempool with zones that are not
virtually contiguous. It requires to add new functions to create and
populate a mempool. Example (TBD):
- rte_mempool_create_empty(name, n, elt_size, cache_size, priv_size)
- rte_mempool_populate(mp, addr, len): add virtual memory for objects
- rte_mempool_mempool_obj_iter(mp, obj_cb, arg): call a cb for each object
- update rte_mempool_create() to allocate objects in several memory
chunks by default if there is no large enough physically contiguous
memory.
Tests done
----------
Compilation
~~~~~~~~~~~
The following targets:
x86_64-native-linuxapp-gcc
i686-native-linuxapp-gcc
x86_x32-native-linuxapp-gcc
x86_64-native-linuxapp-clang
x86_64-native-bsdapp-gcc
ppc_64-power8-linuxapp-gcc
tile-tilegx-linuxapp-gcc (only the mempool files, the target does not compile)
Libraries with and without debug, in static and shared mode + examples.
autotests
~~~~~~~~~
Passed all autotests on x86_64-native-linuxapp-gcc (including kni) and
mempool-related autotests on x86_64-native-bsdapp-gcc.
test-pmd
~~~~~~~~
# now starts fine, was failing before if mempool was too fragmented
./x86_64-native-linuxapp-gcc/app/testpmd -l 0,2,4 -n 4 -- -i --port-topology=chained
# still ok
./x86_64-native-linuxapp-gcc/app/testpmd -l 0,2,4 -n 4 -m 256 -- -i --port-topology=chained --mp-anon
set fwd txonly
start
stop
# fail, but was failing before too. The problem is because the physical
# addresses are not properly set when using --no-huge. The mempool phys addr
# are now correct, but the zones allocated through memzone_reserve() are
# still wrong. This could be fixed in a future series.
./x86_64-native-linuxapp-gcc/app/testpmd -l 0,2,4 -n 4 -m 256 --no-huge -- -i ---port-topology=chained
set fwd txonly
start
stop
*** BLURB HERE ***
Olivier Matz (35):
mempool: rework comments and style
mempool: rename element size variables
mempool: uninline function to check cookies
mempool: use sizeof to get the size of header and trailer
mempool: rename object constructor typedef
mempool: list objects when added
mempool: remove const qualifier when browsing pools
mempool: remove const qualifier in dump and audit
mempool: use the list to iterate the elements
mempool: use the list to audit all elements
mempool: use the list to initialize objects
mempool: create internal ring in a specific function
mempool: store physical address in objects
mempool: remove macro to check if contiguous
mempool: store memory chunks in a list
mempool: add function to iterate the memory chunks
mempool: simplify the memory usage calculation
mempool: introduce a free callback for memory chunks
mempool: get memory size with unspecified page size
mempool: allocate in several memory chunks by default
eal: lock memory when not using hugepages
mempool: support no hugepage mode
mempool: replace physical address by a memzone pointer
mempool: introduce a function to free a pool
mempool: introduce a function to create an empty pool
eal/xen: return machine address without knowing memseg id
mempool: rework support of Xen dom0
mempool: create the internal ring when populating
mempool: populate with anonymous memory
mempool: make mempool populate and free api public
app/testpmd: remove anonymous mempool code
mem: avoid memzone/mempool/ring name truncation
mempool: add flag for removing phys contiguous constraint
app/test: rework mempool test
doc: update release notes about mempool allocation
app/test-pmd/Makefile | 4 -
app/test-pmd/mempool_anon.c | 201 -----
app/test-pmd/mempool_osdep.h | 54 --
app/test-pmd/testpmd.c | 23 +-
app/test/test_mempool.c | 243 +++---
doc/guides/rel_notes/deprecation.rst | 8 -
doc/guides/rel_notes/release_16_07.rst | 9 +
drivers/net/mlx4/mlx4.c | 140 ++--
drivers/net/mlx5/mlx5_rxtx.c | 140 ++--
drivers/net/mlx5/mlx5_rxtx.h | 4 +-
drivers/net/xenvirt/rte_eth_xenvirt.h | 2 +-
drivers/net/xenvirt/rte_mempool_gntalloc.c | 4 +-
lib/librte_eal/common/eal_common_log.c | 2 +-
lib/librte_eal/common/eal_common_memzone.c | 10 +-
lib/librte_eal/common/include/rte_memory.h | 11 +-
lib/librte_eal/linuxapp/eal/eal_memory.c | 2 +-
lib/librte_eal/linuxapp/eal/eal_xen_memory.c | 18 +-
lib/librte_kni/rte_kni.c | 12 +-
lib/librte_mempool/Makefile | 3 -
lib/librte_mempool/rte_dom0_mempool.c | 133 ----
lib/librte_mempool/rte_mempool.c | 1061 +++++++++++++++++---------
lib/librte_mempool/rte_mempool.h | 597 +++++++--------
lib/librte_mempool/rte_mempool_version.map | 19 +-
lib/librte_ring/rte_ring.c | 16 +-
24 files changed, 1402 insertions(+), 1314 deletions(-)
delete mode 100644 app/test-pmd/mempool_anon.c
delete mode 100644 app/test-pmd/mempool_osdep.h
delete mode 100644 lib/librte_mempool/rte_dom0_mempool.c
--
2.8.0.rc3
^ permalink raw reply [relevance 2%]
* [dpdk-dev] [PATCH v3 8/8] doc: update doc for packet capture framework
@ 2016-05-17 16:37 6% ` Reshma Pattan
2016-05-23 21:38 3% ` [dpdk-dev] [PATCH v4 0/9] add " Reshma Pattan
1 sibling, 0 replies; 200+ results
From: Reshma Pattan @ 2016-05-17 16:37 UTC (permalink / raw)
To: dev; +Cc: Reshma Pattan
Added programmers guide for librte_pdump.
Added sample application guide for app/pdump application.
Updated release note for packet capture framework changes.
Signed-off-by: Reshma Pattan <reshma.pattan@intel.com>
---
MAINTAINERS | 3 +
doc/guides/prog_guide/index.rst | 1 +
doc/guides/prog_guide/pdump_library.rst | 121 ++++++++++++++++++++++++++++++++
doc/guides/rel_notes/release_16_07.rst | 6 ++
doc/guides/sample_app_ug/index.rst | 1 +
doc/guides/sample_app_ug/pdump.rst | 109 ++++++++++++++++++++++++++++
6 files changed, 241 insertions(+)
create mode 100644 doc/guides/prog_guide/pdump_library.rst
create mode 100644 doc/guides/sample_app_ug/pdump.rst
diff --git a/MAINTAINERS b/MAINTAINERS
index 58f5ba4..d4d0630 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -439,6 +439,9 @@ Pdump
M: Reshma Pattan <reshma.pattan@intel.com>
F: lib/librte_pdump/
F: app/pdump/
+F: doc/guides/prog_guide/pdump_library.rst
+F: doc/guides/sample_app_ug/pdump.rst
+
Hierarchical scheduler
M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst
index b862d0c..4caf969 100644
--- a/doc/guides/prog_guide/index.rst
+++ b/doc/guides/prog_guide/index.rst
@@ -71,6 +71,7 @@ Programmer's Guide
writing_efficient_code
profile_app
glossary
+ pdump_library
**Figures**
diff --git a/doc/guides/prog_guide/pdump_library.rst b/doc/guides/prog_guide/pdump_library.rst
new file mode 100644
index 0000000..6af77b9
--- /dev/null
+++ b/doc/guides/prog_guide/pdump_library.rst
@@ -0,0 +1,121 @@
+.. BSD LICENSE
+ Copyright(c) 2016 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.. _Pdump_Library:
+
+pdump Library
+=============
+
+Pdump library provides framework for packet capturing on DPDK.
+
+Operation
+---------
+
+Pdump library provides APIs to support packet capturing on dpdk ethernet devices.
+Library provides APIs to initialize the packet capture framework, enable/disable
+the packet capture and un initialize the packet capture framework.
+
+Pdump library works on server and client based model.
+
+Sever is responsible for enabling/disabling the packet captures.
+Clients are responsible for requesting enable/disable of the
+packet captures.
+
+As part of packet capture framework initialization, pthread and
+the server socket is created. Only one server socket is allowed on the system.
+As part of enabling/disabling the packet capture, client sockets are created
+and multiple client sockets are allowed.
+Who ever calls initialization first they will succeed with the initialization,
+next subsequent calls of initialization are not allowed. So next users can only
+request enabling/disabling the packet capture.
+
+Library provides below APIs
+
+``rte_pdump_init()``
+This API initializes the packet capture framework.
+
+``rte_pdump_enable()``
+This API enables the packet capturing on a given port and queue.
+Note: filter option in the API is place holder for future use.
+
+``rte_pdump_enable_by_deviceid()``
+This API enables the packet capturing on a given device id
+(device name or pci address) and queue.
+Note: filter option in the API is place holder for future use.
+
+``rte_pdump_disable()``
+This API disables the packet capturing on a given port and queue.
+
+``rte_pdump_disable_by_deviceid()``
+This API disables the packet capturing on a given device_id and queue.
+
+``rte_pdump_uninit()``
+This API un initializes the packet capture framework.
+
+
+Implementation Details
+----------------------
+
+On a call to library API ``rte_pdump_init()``, library creates pthread and server socket.
+Server socket in pthread context will be listening to the client requests to enable/disable
+the packet capture.
+
+Who ever calls this API first will have server socket created,
+subsequent calls to this APIs will not create any further server sockets. i.e only one server
+socket is allowed.
+
+On each call to library APIs ``rte_pdump_enable()/rte_pdump_enable_by_deviceid()``
+to enable the packet capture, library creates separate client sockets,
+builds up enable request and sends the request to the server.
+Server listening on the socket will serve the request, enable the packet capture
+by registering ethernet rx/tx callbacks for the given port/device_id and queue combinations.
+Server mirrors the packets to new mempool and enqueue them to the ring that clients has passed
+in these APIs.
+Server sends the response back to the client about the status of the request that was processed.
+After the response is received from the server, client sockets will be closed.
+
+On each call to library APIs ``rte_pdump_disable()/rte_pdump_disable_by_deviceid()``
+to disable packet capture, library creates separate client sockets,
+builds up disable request and sends the request to the server.
+Server listening on the socket will serve the request, disable the packet capture
+by removing the ethernet rx/tx callbacks for the given port/device_id and queue combinations.
+Server sends the response back to the client about the status of the request that was processed.
+After the response is received from the server, client sockets will be closed.
+
+On a call to library API ``rte_pdump_uninit()``, library closes the pthread and the server socket.
+
+
+Use Case: Packet Capturing
+--------------------------
+
+app/pdump tool is developed based on this library to capture the packets
+in DPDK.
+Users can develop their own packet capturing application using new library
+if they wish to do so.
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 58c8ef9..7275a35 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -38,6 +38,9 @@ New Features
The size of the mempool structure is reduced if the per-lcore cache is disabled.
+* **Added packet capturing support.**
+ Now users have facility to capture packets on dpdk ports using librte_pdump
+ and app/pdump tool.
Resolved Issues
---------------
@@ -101,6 +104,7 @@ API Changes
ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+* Now function ``rte_eth_dev_get_port_by_name`` changed to public API.
ABI Changes
-----------
@@ -112,6 +116,8 @@ ABI Changes
* The ``rte_port_source_params`` structure has new fields to support PCAP file.
It was already in release 16.04 with ``RTE_NEXT_ABI`` flag.
+* The ``rte_eth_dev_info`` structure has new fields ``nb_rx_queues`` and ``nb_tx_queues``
+ to support number of queues configured by software.
Shared Library Versions
-----------------------
diff --git a/doc/guides/sample_app_ug/index.rst b/doc/guides/sample_app_ug/index.rst
index 930f68c..96bb317 100644
--- a/doc/guides/sample_app_ug/index.rst
+++ b/doc/guides/sample_app_ug/index.rst
@@ -76,6 +76,7 @@ Sample Applications User Guide
ptpclient
performance_thread
ipsec_secgw
+ pdump
**Figures**
diff --git a/doc/guides/sample_app_ug/pdump.rst b/doc/guides/sample_app_ug/pdump.rst
new file mode 100644
index 0000000..60ea1f6
--- /dev/null
+++ b/doc/guides/sample_app_ug/pdump.rst
@@ -0,0 +1,109 @@
+
+.. BSD LICENSE
+ Copyright(c) 2016 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+dpdk_pdump Application
+======================
+The dpdk_pdump application is a Data Plane Development Kit (DPDK) application
+that runs as a DPDK secondary process and is capable of enabling packet capturing
+on dpdk ports and capturing the packets.
+
+Running the Application
+-----------------------
+The application has a pdump command line option with various sub arguments inside:
+Parameters inside the parenthesis represents the mandatory parameters.
+Parameters inside the square brackets represents optional parameters.
+User has to pass on packet capture parameters under --pdump parameters, multiples of
+--pdump can be passed to capture packets on different port and queue combinations.
+
+.. code-block:: console
+
+ ./$(RTE_TARGET)/app/pdump -- --pdump '(port=<port_id> |
+ device_id=<pci address or device name>),
+ (queue=2), (rx-dev=<iface/path to pcap file> |
+ tx-dev=<iface/path to pcap file> |
+ rxtx-dev=<iface/path to pcap file>),
+ [ring-size=1024], [mbuf-size=2048], [total-num-mbufs=8191]'
+
+Parameters
+~~~~~~~~~~
+``--pdump``: Specifies arguments needed for packet capturing.
+
+``port``
+Port id of the eth device on which packets should be captured.
+
+``device_id``
+PCI address (or) name of the eth device on which packets should be captured.
+
+``queue``
+Queue id of the eth device on which packets should be captured.
+User can pass on queue value as ‘*’ if packets capturing has to be enabled
+on all queues of the eth device.
+
+``rx-dev``
+Can be either pcap file name or any linux iface onto which ingress side packets of
+dpdk eth device will be sent on for users to view.
+
+``tx-dev``
+Can be either pcap file name or any linux iface onto which egress side packets of
+dpdk eth device will be sent on for users to view.
+
+``rxtx-dev``
+Can be either pcap file name or any linux iface onto which both ingress &
+egress side packets of dpdk eth device will be sent on for users to view.
+
+Note:
+To receive ingress packets only, rx-dev should be passed.
+To receive egress packets only, tx-dev should be passed.
+To receive ingress and egress packets separately should pass on both rx-dev and tx-dev.
+To receive both ingress and egress packets on same device, should pass only rxtx-dev.
+
+Pdump tool uses these devices internally to create PCAPPMD vdev having ``tx_stream``
+as either of these devices.
+
+``ring-size``
+Size of the ring. This value is used internally for ring creation.
+The ring will be used to enqueue the packets from primary application to secondary.
+
+``mbuf-size``
+Size of the mbuf data room size. This is used internally for mempool creation.
+Ideally this value must be same as primary application's mempool which is used for
+packet rx.
+
+``total-num-mbufs``
+Total number mbufs in mempool. This is used internally for mempool creation.
+
+Example
+-------
+
+.. code-block:: console
+
+ $ sudo ./x86_64-native-linuxapp-gcc/app/dpdk_pdump -- --pdump 'port=0,queue=*,rxtx-dev=/tmp/rxtx-file.pcap'
--
2.5.0
^ permalink raw reply [relevance 6%]
* Re: [dpdk-dev] [PATCH v5] mempool: reduce rte_mempool structure size
2016-04-14 9:42 2% ` [dpdk-dev] [PATCH v5] " Olivier Matz
2016-04-14 13:28 0% ` Wiles, Keith
2016-04-14 13:53 0% ` Wiles, Keith
@ 2016-05-17 5:31 0% ` Thomas Monjalon
2 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-05-17 5:31 UTC (permalink / raw)
To: Olivier Matz, keith.wiles; +Cc: dev, pmatilai
2016-04-14 11:42, Olivier Matz:
> From: Keith Wiles <keith.wiles@intel.com>
>
> The rte_mempool structure is changed, which will cause an ABI change
> for this structure. Providing backward compat is not reasonable
> here as this structure is used in multiple defines/inlines.
The deprecation notice must be removed by this patch.
[...]
> +/**
> * Calculate the size of the mempool header.
> *
> * @param mp
> @@ -254,9 +256,9 @@ struct rte_mempool {
> * @param pgn
> * Number of pages used to store mempool objects.
A new parameter has been forgotten:
* @param cs
* Size of the per-lcore cache.
> */
> -#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \
> - RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
> - sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
> +#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
> + (sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
> + (sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
Applied with above changes
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH 1/4] pmdinfo: Add buildtools and pmdinfo utility
@ 2016-05-16 20:41 2% ` Neil Horman
0 siblings, 0 replies; 200+ results
From: Neil Horman @ 2016-05-16 20:41 UTC (permalink / raw)
To: dev
Cc: Neil Horman, Bruce Richardson, Thomas Monjalon,
Stephen Hemminger, Panu Matilainen
pmdinfo is a tool used to parse object files and build json strings for use in
later determining hardware support in a dso or application binary. pmdinfo
looks for the non-exported symbol names this_pmd_name<n> and this_pmd_tbl<n>
(where n is a integer counter). It records the name of each of these tuples,
using the later to find the symbolic name of the pci_table for physical devices
that the object supports. With this information, it outputs a C file with a
single line of the form:
static char *<pmd_name>_driver_info[] __attribute__((used)) = " \
PMD_DRIVER_INFO=<json string>";
Where <pmd_name> is the arbitrary name of the pmd, and <json_string> is the json
encoded string that hold relevant pmd information, including the pmd name, type
and optional array of pci device/vendor ids that the driver supports.
This c file is suitable for compiling to object code, then relocatably linking
into the parent file from which the C was generated. This creates an entry in
the string table of the object that can inform a later tool about hardware
support.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Bruce Richardson <bruce.richardson@intel.com>
CC: Thomas Monjalon <thomas.monjalon@6wind.com>
CC: Stephen Hemminger <stephen@networkplumber.org>
CC: Panu Matilainen <pmatilai@redhat.com>
---
GNUmakefile | 2 +-
buildtools/Makefile | 36 ++++
buildtools/pmdinfo/Makefile | 48 +++++
buildtools/pmdinfo/pmdinfo.c | 435 +++++++++++++++++++++++++++++++++++++++++++
buildtools/pmdinfo/pmdinfo.h | 210 +++++++++++++++++++++
mk/rte.buildtools.mk | 148 +++++++++++++++
mk/rte.sdkbuild.mk | 3 +-
7 files changed, 880 insertions(+), 2 deletions(-)
create mode 100644 buildtools/Makefile
create mode 100644 buildtools/pmdinfo/Makefile
create mode 100644 buildtools/pmdinfo/pmdinfo.c
create mode 100644 buildtools/pmdinfo/pmdinfo.h
create mode 100644 mk/rte.buildtools.mk
diff --git a/GNUmakefile b/GNUmakefile
index b59e4b6..00fe0db 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -40,6 +40,6 @@ export RTE_SDK
# directory list
#
-ROOTDIRS-y := lib drivers app
+ROOTDIRS-y := buildtools lib drivers app
include $(RTE_SDK)/mk/rte.sdkroot.mk
diff --git a/buildtools/Makefile b/buildtools/Makefile
new file mode 100644
index 0000000..0f15d58
--- /dev/null
+++ b/buildtools/Makefile
@@ -0,0 +1,36 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-y += pmdinfo
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/buildtools/pmdinfo/Makefile b/buildtools/pmdinfo/Makefile
new file mode 100644
index 0000000..3dea68b
--- /dev/null
+++ b/buildtools/pmdinfo/Makefile
@@ -0,0 +1,48 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+APP = pmdinfo
+
+#
+# all sources are stored in SRCS-y
+#
+SRCS-y += pmdinfo.c
+
+#CFLAGS += $(WERROR_FLAGS) -g
+CFLAGS += -g
+
+include $(RTE_SDK)/mk/rte.buildtools.mk
+
diff --git a/buildtools/pmdinfo/pmdinfo.c b/buildtools/pmdinfo/pmdinfo.c
new file mode 100644
index 0000000..5e705ab
--- /dev/null
+++ b/buildtools/pmdinfo/pmdinfo.c
@@ -0,0 +1,435 @@
+/* Postprocess pmd object files to export hw support
+ *
+ * Copyright 2016 Neil Horman <nhorman@tuxdriver.com>
+ * Based in part on modpost.c from the linux kernel
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License V2, incorporated herein by reference.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <errno.h>
+#include "pmdinfo.h"
+
+
+static const char *sym_name(struct elf_info *elf, Elf_Sym *sym)
+{
+ if (sym)
+ return elf->strtab + sym->st_name;
+ else
+ return "(unknown)";
+}
+
+void *grab_file(const char *filename, unsigned long *size)
+{
+ struct stat st;
+ void *map = MAP_FAILED;
+ int fd;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+ if (fstat(fd, &st))
+ goto failed;
+
+ *size = st.st_size;
+ map = mmap(NULL, *size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+failed:
+ close(fd);
+ if (map == MAP_FAILED)
+ return NULL;
+ return map;
+}
+
+/**
+ * Return a copy of the next line in a mmap'ed file.
+ * spaces in the beginning of the line is trimmed away.
+ * Return a pointer to a static buffer.
+ **/
+char *get_next_line(unsigned long *pos, void *file, unsigned long size)
+{
+ static char line[4096];
+ int skip = 1;
+ size_t len = 0;
+ signed char *p = (signed char *)file + *pos;
+ char *s = line;
+
+ for (; *pos < size ; (*pos)++) {
+ if (skip && isspace(*p)) {
+ p++;
+ continue;
+ }
+ skip = 0;
+ if (*p != '\n' && (*pos < size)) {
+ len++;
+ *s++ = *p++;
+ if (len > 4095)
+ break; /* Too long, stop */
+ } else {
+ /* End of string */
+ *s = '\0';
+ return line;
+ }
+ }
+ /* End of buffer */
+ return NULL;
+}
+
+void release_file(void *file, unsigned long size)
+{
+ munmap(file, size);
+}
+
+
+static void *get_sym_value(struct elf_info *info, const Elf_Sym *sym)
+{
+ void *ptr = (void *)info->hdr + info->sechdrs[sym->st_shndx].sh_offset;
+
+ return (void *)(ptr + sym->st_value);
+}
+
+static Elf_Sym *find_sym_in_symtab(struct elf_info *info,
+ const char *name, Elf_Sym *last)
+{
+ Elf_Sym *idx;
+ if (last)
+ idx = last+1;
+ else
+ idx = info->symtab_start;
+
+ for(; idx < info->symtab_stop; idx++) {
+ const char *n = sym_name(info, idx);
+ if (!strncmp(n, name, strlen(name)))
+ return idx;
+ }
+ return NULL;
+}
+
+static int parse_elf(struct elf_info *info, const char *filename)
+{
+ unsigned int i;
+ Elf_Ehdr *hdr;
+ Elf_Shdr *sechdrs;
+ Elf_Sym *sym;
+ const char *secstrings;
+ unsigned int symtab_idx = ~0U, symtab_shndx_idx = ~0U;
+
+ hdr = grab_file(filename, &info->size);
+ if (!hdr) {
+ perror(filename);
+ exit(1);
+ }
+ info->hdr = hdr;
+ if (info->size < sizeof(*hdr)) {
+ /* file too small, assume this is an empty .o file */
+ return 0;
+ }
+ /* Is this a valid ELF file? */
+ if ((hdr->e_ident[EI_MAG0] != ELFMAG0) ||
+ (hdr->e_ident[EI_MAG1] != ELFMAG1) ||
+ (hdr->e_ident[EI_MAG2] != ELFMAG2) ||
+ (hdr->e_ident[EI_MAG3] != ELFMAG3)) {
+ /* Not an ELF file - silently ignore it */
+ return 0;
+ }
+ /* Fix endianness in ELF header */
+ hdr->e_type = TO_NATIVE(hdr->e_type);
+ hdr->e_machine = TO_NATIVE(hdr->e_machine);
+ hdr->e_version = TO_NATIVE(hdr->e_version);
+ hdr->e_entry = TO_NATIVE(hdr->e_entry);
+ hdr->e_phoff = TO_NATIVE(hdr->e_phoff);
+ hdr->e_shoff = TO_NATIVE(hdr->e_shoff);
+ hdr->e_flags = TO_NATIVE(hdr->e_flags);
+ hdr->e_ehsize = TO_NATIVE(hdr->e_ehsize);
+ hdr->e_phentsize = TO_NATIVE(hdr->e_phentsize);
+ hdr->e_phnum = TO_NATIVE(hdr->e_phnum);
+ hdr->e_shentsize = TO_NATIVE(hdr->e_shentsize);
+ hdr->e_shnum = TO_NATIVE(hdr->e_shnum);
+ hdr->e_shstrndx = TO_NATIVE(hdr->e_shstrndx);
+ sechdrs = (void *)hdr + hdr->e_shoff;
+ info->sechdrs = sechdrs;
+
+ /* Check if file offset is correct */
+ if (hdr->e_shoff > info->size) {
+ fprintf(stderr, "section header offset=%lu in file '%s' is bigger than "
+ "filesize=%lu\n", (unsigned long)hdr->e_shoff,
+ filename, info->size);
+ return 0;
+ }
+
+ if (hdr->e_shnum == SHN_UNDEF) {
+ /*
+ * There are more than 64k sections,
+ * read count from .sh_size.
+ */
+ info->num_sections = TO_NATIVE(sechdrs[0].sh_size);
+ }
+ else {
+ info->num_sections = hdr->e_shnum;
+ }
+ if (hdr->e_shstrndx == SHN_XINDEX) {
+ info->secindex_strings = TO_NATIVE(sechdrs[0].sh_link);
+ }
+ else {
+ info->secindex_strings = hdr->e_shstrndx;
+ }
+
+ /* Fix endianness in section headers */
+ for (i = 0; i < info->num_sections; i++) {
+ sechdrs[i].sh_name = TO_NATIVE(sechdrs[i].sh_name);
+ sechdrs[i].sh_type = TO_NATIVE(sechdrs[i].sh_type);
+ sechdrs[i].sh_flags = TO_NATIVE(sechdrs[i].sh_flags);
+ sechdrs[i].sh_addr = TO_NATIVE(sechdrs[i].sh_addr);
+ sechdrs[i].sh_offset = TO_NATIVE(sechdrs[i].sh_offset);
+ sechdrs[i].sh_size = TO_NATIVE(sechdrs[i].sh_size);
+ sechdrs[i].sh_link = TO_NATIVE(sechdrs[i].sh_link);
+ sechdrs[i].sh_info = TO_NATIVE(sechdrs[i].sh_info);
+ sechdrs[i].sh_addralign = TO_NATIVE(sechdrs[i].sh_addralign);
+ sechdrs[i].sh_entsize = TO_NATIVE(sechdrs[i].sh_entsize);
+ }
+ /* Find symbol table. */
+ secstrings = (void *)hdr + sechdrs[info->secindex_strings].sh_offset;
+ for (i = 1; i < info->num_sections; i++) {
+ const char *secname;
+ int nobits = sechdrs[i].sh_type == SHT_NOBITS;
+
+ if (!nobits && sechdrs[i].sh_offset > info->size) {
+ fprintf(stderr, "%s is truncated. sechdrs[i].sh_offset=%lu > "
+ "sizeof(*hrd)=%zu\n", filename,
+ (unsigned long)sechdrs[i].sh_offset,
+ sizeof(*hdr));
+ return 0;
+ }
+ secname = secstrings + sechdrs[i].sh_name;
+ if (strcmp(secname, ".modinfo") == 0) {
+ if (nobits)
+ fprintf(stderr, "%s has NOBITS .modinfo\n", filename);
+ info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
+ info->modinfo_len = sechdrs[i].sh_size;
+ } else if (strcmp(secname, "__ksymtab") == 0)
+ info->export_sec = i;
+ else if (strcmp(secname, "__ksymtab_unused") == 0)
+ info->export_unused_sec = i;
+ else if (strcmp(secname, "__ksymtab_gpl") == 0)
+ info->export_gpl_sec = i;
+ else if (strcmp(secname, "__ksymtab_unused_gpl") == 0)
+ info->export_unused_gpl_sec = i;
+ else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
+ info->export_gpl_future_sec = i;
+
+ if (sechdrs[i].sh_type == SHT_SYMTAB) {
+ unsigned int sh_link_idx;
+ symtab_idx = i;
+ info->symtab_start = (void *)hdr +
+ sechdrs[i].sh_offset;
+ info->symtab_stop = (void *)hdr +
+ sechdrs[i].sh_offset + sechdrs[i].sh_size;
+ sh_link_idx = sechdrs[i].sh_link;
+ info->strtab = (void *)hdr +
+ sechdrs[sh_link_idx].sh_offset;
+ }
+
+ /* 32bit section no. table? ("more than 64k sections") */
+ if (sechdrs[i].sh_type == SHT_SYMTAB_SHNDX) {
+ symtab_shndx_idx = i;
+ info->symtab_shndx_start = (void *)hdr +
+ sechdrs[i].sh_offset;
+ info->symtab_shndx_stop = (void *)hdr +
+ sechdrs[i].sh_offset + sechdrs[i].sh_size;
+ }
+ }
+ if (!info->symtab_start)
+ fprintf(stderr, "%s has no symtab?\n", filename);
+
+ /* Fix endianness in symbols */
+ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) {
+ sym->st_shndx = TO_NATIVE(sym->st_shndx);
+ sym->st_name = TO_NATIVE(sym->st_name);
+ sym->st_value = TO_NATIVE(sym->st_value);
+ sym->st_size = TO_NATIVE(sym->st_size);
+ }
+
+ if (symtab_shndx_idx != ~0U) {
+ Elf32_Word *p;
+ if (symtab_idx != sechdrs[symtab_shndx_idx].sh_link)
+ fprintf(stderr, "%s: SYMTAB_SHNDX has bad sh_link: %u!=%u\n",
+ filename, sechdrs[symtab_shndx_idx].sh_link,
+ symtab_idx);
+ /* Fix endianness */
+ for (p = info->symtab_shndx_start; p < info->symtab_shndx_stop;
+ p++)
+ *p = TO_NATIVE(*p);
+ }
+
+ return 1;
+}
+
+static void parse_elf_finish(struct elf_info *info)
+{
+ struct pmd_driver *tmp, *idx = info->drivers;
+ release_file(info->hdr, info->size);
+ while (idx) {
+ tmp = idx->next;
+ free(idx);
+ idx = tmp;
+ }
+}
+
+static const char *sec_name(struct elf_info *elf, int secindex)
+{
+ Elf_Shdr *sechdrs = elf->sechdrs;
+ return (void *)elf->hdr +
+ elf->sechdrs[elf->secindex_strings].sh_offset +
+ sechdrs[secindex].sh_name;
+}
+
+static int get_symbol_index(struct elf_info *info, Elf64_Sym *sym)
+{
+ const char *name = sym_name(info, sym);
+ const char *idx;
+
+ idx = name;
+ while (idx) {
+ if (isdigit(*idx))
+ return atoi(idx);
+ idx++;
+ }
+ return -1;
+}
+
+static int complete_pmd_entry(struct elf_info *info, struct pmd_driver *drv)
+{
+ const char *tname;
+ int i = get_symbol_index(info, drv->name_sym);
+ char drvsym[128];
+
+ if (i == -1)
+ return -ENOENT;
+
+ drv->name = get_sym_value(info, drv->name_sym);
+
+ sprintf(drvsym, "this_pmd_driver%d", i);
+
+ drv->driver = find_sym_in_symtab(info, drvsym, NULL);
+
+ /*
+ * If this returns NULL, then this is a PMD_VDEV, because
+ * it has no pci table reference
+ */
+ if (!drv->driver) {
+ drv->pci_tbl = NULL;
+ return 0;
+ }
+
+ tname = get_sym_value(info, drv->driver);
+ drv->pci_tbl_sym = find_sym_in_symtab(info, tname, NULL);
+
+ if (!drv->pci_tbl_sym)
+ return -ENOENT;
+
+ drv->pci_tbl = (struct rte_pci_id *)get_sym_value(info, drv->pci_tbl_sym);
+ if (!drv->pci_tbl)
+ return -ENOENT;
+
+
+ return 0;
+
+}
+
+static int locate_pmd_entries(struct elf_info *info)
+{
+ Elf_Sym *last = NULL;
+ struct pmd_driver *new;
+
+ info->drivers = NULL;
+
+ do {
+ new = malloc(sizeof(struct pmd_driver));
+ new->name_sym = find_sym_in_symtab(info, "this_pmd_name", last);
+ last = new->name_sym;
+ if (!new->name_sym)
+ free(new);
+ else {
+ if (complete_pmd_entry(info, new)) {
+ fprintf(stderr, "Failed to complete pmd entry\n");
+ free(new);
+ } else {
+ new->next = info->drivers;
+ info->drivers = new;
+ }
+ }
+ } while (last);
+}
+
+static void output_pmd_info_string(struct elf_info *info, char *outfile)
+{
+ FILE *ofd;
+ struct pmd_driver *drv;
+ struct rte_pci_id *pci_ids;
+ int idx = 0;
+
+ ofd = fopen(outfile, "w+");
+ if (!ofd) {
+ fprintf(stderr, "Unable to open output file\n");
+ return;
+ }
+
+ drv = info->drivers;
+
+ while (drv) {
+ fprintf(ofd, "const char %s_pmd_info[] __attribute__((used)) = \"PMD_INFO_STRING= {",
+ drv->name);
+ fprintf(ofd,"\\\"name\\\" : \\\"%s\\\", ", drv->name);
+ fprintf(ofd,"\\\"type\\\" : \\\"%s\\\", ", drv->pci_tbl ? "PMD_PDEV" : "PMD_VDEV");
+
+ pci_ids = drv->pci_tbl;
+ fprintf(ofd, "\\\"pci_ids\\\" : [");
+
+ while (pci_ids && pci_ids->device_id) {
+ fprintf(ofd, "[%d, %d, %d, %d]",
+ pci_ids->vendor_id, pci_ids->device_id,
+ pci_ids->subsystem_vendor_id,
+ pci_ids->subsystem_device_id);
+ pci_ids++;
+ if (pci_ids->device_id)
+ fprintf(ofd, ",");
+ else
+ fprintf(ofd, " ");
+ }
+ fprintf(ofd, "]}\";");
+ drv = drv->next;
+ }
+
+ fclose(ofd);
+}
+
+int main(int argc, char **argv)
+{
+ struct elf_info info;
+ int rc = 1;
+
+ if (argc < 3) {
+ fprintf(stderr, "usage: pmdinfo <object file> <c output file>\n");
+ exit(127);
+ }
+ parse_elf(&info, argv[1]);
+
+ locate_pmd_entries(&info);
+
+ if (info.drivers) {
+ output_pmd_info_string(&info, argv[2]);
+ rc = 0;
+ } else {
+ fprintf(stderr, "Hmm, Appears to be a driver but no drivers registered\n");
+ }
+
+ parse_elf_finish(&info);
+ exit(rc);
+}
diff --git a/buildtools/pmdinfo/pmdinfo.h b/buildtools/pmdinfo/pmdinfo.h
new file mode 100644
index 0000000..5dafb67
--- /dev/null
+++ b/buildtools/pmdinfo/pmdinfo.h
@@ -0,0 +1,210 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <elf.h>
+
+
+/* On BSD-alike OSes elf.h defines these according to host's word size */
+#undef ELF_ST_BIND
+#undef ELF_ST_TYPE
+#undef ELF_R_SYM
+#undef ELF_R_TYPE
+
+#if 0
+
+#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Addr Elf32_Addr
+#define Elf_Sword Elf64_Sword
+#define Elf_Section Elf32_Half
+#define ELF_ST_BIND ELF32_ST_BIND
+#define ELF_ST_TYPE ELF32_ST_TYPE
+
+#define Elf_Rel Elf32_Rel
+#define Elf_Rela Elf32_Rela
+#define ELF_R_SYM ELF32_R_SYM
+#define ELF_R_TYPE ELF32_R_TYPE
+#else
+
+#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Addr Elf64_Addr
+#define Elf_Sword Elf64_Sxword
+#define Elf_Section Elf64_Half
+#define ELF_ST_BIND ELF64_ST_BIND
+#define ELF_ST_TYPE ELF64_ST_TYPE
+
+#define Elf_Rel Elf64_Rel
+#define Elf_Rela Elf64_Rela
+#define ELF_R_SYM ELF64_R_SYM
+#define ELF_R_TYPE ELF64_R_TYPE
+#endif
+
+/* The 64-bit MIPS ELF ABI uses an unusual reloc format. */
+typedef struct
+{
+ Elf32_Word r_sym; /* Symbol index */
+ unsigned char r_ssym; /* Special symbol for 2nd relocation */
+ unsigned char r_type3; /* 3rd relocation type */
+ unsigned char r_type2; /* 2nd relocation type */
+ unsigned char r_type1; /* 1st relocation type */
+} _Elf64_Mips_R_Info;
+
+typedef union
+{
+ Elf64_Xword r_info_number;
+ _Elf64_Mips_R_Info r_info_fields;
+} _Elf64_Mips_R_Info_union;
+
+#define ELF64_MIPS_R_SYM(i) \
+ ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_sym)
+
+#define ELF64_MIPS_R_TYPE(i) \
+ ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_type1)
+
+#if 0
+
+static inline void __endian(const void *src, void *dest, unsigned int size)
+{
+ unsigned int i;
+ for (i = 0; i < size; i++)
+ ((unsigned char*)dest)[i] = ((unsigned char*)src)[size - i-1];
+}
+
+#define TO_NATIVE(x) \
+({ \
+ typeof(x) __x; \
+ __endian(&(x), &(__x), sizeof(__x)); \
+ __x; \
+})
+
+#else /* endianness matches */
+
+#define TO_NATIVE(x) (x)
+
+#endif
+
+#define NOFAIL(ptr) do_nofail((ptr), #ptr)
+void *do_nofail(void *ptr, const char *expr);
+
+struct buffer {
+ char *p;
+ int pos;
+ int size;
+};
+
+void __attribute__((format(printf, 2, 3)))
+buf_printf(struct buffer *buf, const char *fmt, ...);
+
+void
+buf_write(struct buffer *buf, const char *s, int len);
+
+struct module {
+ struct module *next;
+ const char *name;
+ int gpl_compatible;
+ struct symbol *unres;
+ int seen;
+ int skip;
+ int has_init;
+ int has_cleanup;
+ struct buffer dev_table_buf;
+ char srcversion[25];
+ int is_dot_o;
+};
+
+struct rte_pci_id {
+ uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
+ uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
+ uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
+ uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */
+};
+
+struct pmd_driver {
+ Elf_Sym *driver;
+ Elf_Sym *pci_tbl_sym;
+ Elf_Sym *name_sym;
+ struct rte_pci_id *pci_tbl;
+ struct pmd_driver *next;
+ const char *name;
+};
+
+struct elf_info {
+ unsigned long size;
+ Elf_Ehdr *hdr;
+ Elf_Shdr *sechdrs;
+ Elf_Sym *symtab_start;
+ Elf_Sym *symtab_stop;
+ Elf_Section export_sec;
+ Elf_Section export_unused_sec;
+ Elf_Section export_gpl_sec;
+ Elf_Section export_unused_gpl_sec;
+ Elf_Section export_gpl_future_sec;
+ char *strtab;
+ char *modinfo;
+ unsigned int modinfo_len;
+
+ /* support for 32bit section numbers */
+
+ unsigned int num_sections; /* max_secindex + 1 */
+ unsigned int secindex_strings;
+ /* if Nth symbol table entry has .st_shndx = SHN_XINDEX,
+ * take shndx from symtab_shndx_start[N] instead */
+ Elf32_Word *symtab_shndx_start;
+ Elf32_Word *symtab_shndx_stop;
+
+ struct pmd_driver *drivers;
+};
+
+static inline int is_shndx_special(unsigned int i)
+{
+ return i != SHN_XINDEX && i >= SHN_LORESERVE && i <= SHN_HIRESERVE;
+}
+
+/*
+ * Move reserved section indices SHN_LORESERVE..SHN_HIRESERVE out of
+ * the way to -256..-1, to avoid conflicting with real section
+ * indices.
+ */
+#define SPECIAL(i) ((i) - (SHN_HIRESERVE + 1))
+
+/* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
+static inline unsigned int get_secindex(const struct elf_info *info,
+ const Elf_Sym *sym)
+{
+ if (is_shndx_special(sym->st_shndx))
+ return SPECIAL(sym->st_shndx);
+ if (sym->st_shndx != SHN_XINDEX)
+ return sym->st_shndx;
+ return info->symtab_shndx_start[sym - info->symtab_start];
+}
+
+/* file2alias.c */
+extern unsigned int cross_build;
+void handle_moddevtable(struct module *mod, struct elf_info *info,
+ Elf_Sym *sym, const char *symname);
+void add_moddevtable(struct buffer *buf, struct module *mod);
+
+/* sumversion.c */
+void maybe_frob_rcs_version(const char *modfilename,
+ char *version,
+ void *modinfo,
+ unsigned long modinfo_offset);
+void get_src_version(const char *modname, char sum[], unsigned sumlen);
+
+/* from modpost.c */
+void *grab_file(const char *filename, unsigned long *size);
+char* get_next_line(unsigned long *pos, void *file, unsigned long size);
+void release_file(void *file, unsigned long size);
+
+void fatal(const char *fmt, ...);
+void warn(const char *fmt, ...);
+void merror(const char *fmt, ...);
diff --git a/mk/rte.buildtools.mk b/mk/rte.buildtools.mk
new file mode 100644
index 0000000..e8bfcef
--- /dev/null
+++ b/mk/rte.buildtools.mk
@@ -0,0 +1,148 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+# Copyright(c) 2014-2015 6WIND S.A.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/internal/rte.compile-pre.mk
+include $(RTE_SDK)/mk/internal/rte.install-pre.mk
+include $(RTE_SDK)/mk/internal/rte.clean-pre.mk
+include $(RTE_SDK)/mk/internal/rte.build-pre.mk
+include $(RTE_SDK)/mk/internal/rte.depdirs-pre.mk
+
+# VPATH contains at least SRCDIR
+VPATH += $(SRCDIR)
+
+_BUILD = $(APP)
+_INSTALL = $(INSTALL-FILES-y) $(SYMLINK-FILES-y)
+_INSTALL += $(RTE_OUTPUT)/buildtools/$(APP) $(RTE_OUTPUT)/buildtools/$(APP).map
+POSTINSTALL += target-appinstall
+_CLEAN = doclean
+POSTCLEAN += target-appclean
+
+.PHONY: all
+all: install
+
+.PHONY: install
+install: build _postinstall
+
+_postinstall: build
+
+.PHONY: build
+build: _postbuild
+
+exe2cmd = $(strip $(call dotfile,$(patsubst %,%.cmd,$(1))))
+
+ifeq ($(LINK_USING_CC),1)
+override EXTRA_LDFLAGS := $(call linkerprefix,$(EXTRA_LDFLAGS))
+O_TO_EXE = $(CC) $(CFLAGS) $(LDFLAGS_$(@)) \
+ -Wl,-Map=$(@).map,--cref -o $@ $(OBJS-y) $(call linkerprefix,$(LDFLAGS)) \
+ $(EXTRA_LDFLAGS) $(call linkerprefix,$(LDLIBS))
+else
+O_TO_EXE = $(LD) $(LDFLAGS) $(LDFLAGS_$(@)) $(EXTRA_LDFLAGS) \
+ -Map=$(@).map --cref -o $@ $(OBJS-y) $(LDLIBS)
+endif
+O_TO_EXE_STR = $(subst ','\'',$(O_TO_EXE)) #'# fix syntax highlight
+O_TO_EXE_DISP = $(if $(V),"$(O_TO_EXE_STR)"," LD $(@)")
+O_TO_EXE_CMD = "cmd_$@ = $(O_TO_EXE_STR)"
+O_TO_EXE_DO = @set -e; \
+ echo $(O_TO_EXE_DISP); \
+ $(O_TO_EXE) && \
+ echo $(O_TO_EXE_CMD) > $(call exe2cmd,$(@))
+
+-include .$(APP).cmd
+
+# path where libraries are retrieved
+LDLIBS_PATH := $(subst -Wl$(comma)-L,,$(filter -Wl$(comma)-L%,$(LDLIBS)))
+LDLIBS_PATH += $(subst -L,,$(filter -L%,$(LDLIBS)))
+
+# list of .a files that are linked to this application
+LDLIBS_NAMES := $(patsubst -l%,lib%.a,$(filter -l%,$(LDLIBS)))
+LDLIBS_NAMES += $(patsubst -Wl$(comma)-l%,lib%.a,$(filter -Wl$(comma)-l%,$(LDLIBS)))
+
+# list of found libraries files (useful for deps). If not found, the
+# library is silently ignored and dep won't be checked
+LDLIBS_FILES := $(wildcard $(foreach dir,$(LDLIBS_PATH),\
+ $(addprefix $(dir)/,$(LDLIBS_NAMES))))
+
+#
+# Compile executable file if needed
+#
+$(APP): $(OBJS-y) $(LDLIBS_FILES) $(DEP_$(APP)) $(LDSCRIPT) FORCE
+ @[ -d $(dir $@) ] || mkdir -p $(dir $@)
+ $(if $(D),\
+ @echo -n "$< -> $@ " ; \
+ echo -n "file_missing=$(call boolean,$(file_missing)) " ; \
+ echo -n "cmdline_changed=$(call boolean,$(call cmdline_changed,$(O_TO_EXE_STR))) " ; \
+ echo -n "depfile_missing=$(call boolean,$(depfile_missing)) " ; \
+ echo "depfile_newer=$(call boolean,$(depfile_newer)) ")
+ $(if $(or \
+ $(file_missing),\
+ $(call cmdline_changed,$(O_TO_EXE_STR)),\
+ $(depfile_missing),\
+ $(depfile_newer)),\
+ $(O_TO_EXE_DO))
+
+#
+# install app in $(RTE_OUTPUT)/app
+#
+$(RTE_OUTPUT)/buildtools/$(APP): $(APP)
+ @echo " INSTALL-APP $(APP)"
+ @[ -d $(RTE_OUTPUT)/buildtools ] || mkdir -p $(RTE_OUTPUT)/buildtools
+ $(Q)cp -f $(APP) $(RTE_OUTPUT)/buildtools
+
+#
+# install app map file in $(RTE_OUTPUT)/app
+#
+$(RTE_OUTPUT)/buildtools/$(APP).map: $(APP)
+ @echo " INSTALL-MAP $(APP).map"
+ @[ -d $(RTE_OUTPUT)/buildtools ] || mkdir -p $(RTE_OUTPUT)/buildtools
+ $(Q)cp -f $(APP).map $(RTE_OUTPUT)/buildtools
+
+#
+# Clean all generated files
+#
+.PHONY: clean
+clean: _postclean
+ $(Q)rm -f $(_BUILD_TARGETS) $(_INSTALL_TARGETS) $(_CLEAN_TARGETS)
+
+.PHONY: doclean
+doclean:
+ $(Q)rm -rf $(APP) $(OBJS-all) $(DEPS-all) $(DEPSTMP-all) \
+ $(CMDS-all) $(INSTALL-FILES-all) .$(APP).cmd
+
+
+include $(RTE_SDK)/mk/internal/rte.compile-post.mk
+include $(RTE_SDK)/mk/internal/rte.install-post.mk
+include $(RTE_SDK)/mk/internal/rte.clean-post.mk
+include $(RTE_SDK)/mk/internal/rte.build-post.mk
+include $(RTE_SDK)/mk/internal/rte.depdirs-post.mk
+
+.PHONY: FORCE
+FORCE:
diff --git a/mk/rte.sdkbuild.mk b/mk/rte.sdkbuild.mk
index eec5241..fb68af2 100644
--- a/mk/rte.sdkbuild.mk
+++ b/mk/rte.sdkbuild.mk
@@ -64,7 +64,8 @@ build: $(ROOTDIRS-y)
clean: $(CLEANDIRS)
@rm -rf $(RTE_OUTPUT)/include $(RTE_OUTPUT)/app \
$(RTE_OUTPUT)/hostapp $(RTE_OUTPUT)/lib \
- $(RTE_OUTPUT)/hostlib $(RTE_OUTPUT)/kmod
+ $(RTE_OUTPUT)/hostlib $(RTE_OUTPUT)/kmod \
+ $(RTE_OUTPUT)/buildtools
@[ -d $(RTE_OUTPUT)/include ] || mkdir -p $(RTE_OUTPUT)/include
@$(RTE_SDK)/scripts/gen-config-h.sh $(RTE_OUTPUT)/.config \
> $(RTE_OUTPUT)/include/rte_config.h
--
2.5.5
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] [PATCH 2/2] doc: announce ABI change of struct rte_port_sink_params
2016-05-16 13:18 18% ` [dpdk-dev] [PATCH 2/2] doc: announce ABI change of struct rte_port_sink_params Fan Zhang
@ 2016-05-16 13:57 9% ` Panu Matilainen
0 siblings, 0 replies; 200+ results
From: Panu Matilainen @ 2016-05-16 13:57 UTC (permalink / raw)
To: Fan Zhang, dev
On 05/16/2016 04:18 PM, Fan Zhang wrote:
> The ABI changes are planned for rte_port_sink_params, which will be
> supported from release 16.11. Here announces that ABI changes in detail.
>
> Signed-off-by: Fan Zhang <roy.fan.zhang@intel.com>
> Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> ---
> doc/guides/rel_notes/deprecation.rst | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
> index d228bae..d2f7306 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -78,3 +78,7 @@ Deprecation Notices
> * ABI will change for rte_port_source_params struct. The member file_name
> data type will be changed from char * to const char *. This change targets
> release 16.11.
> +
> +* ABI will change for rte_port_sink_params struct. The member file_name
> + data type will be changed from char * to const char *. This change targets
> + release 16.11.
>
Surely such a minor change doesn't require two separate announcements or
patches for that matter. In fact I doubt it's an ABI change at all
(although it is an API change certainly).
However to me the bigger issue is that a change like this alone doesn't
seem like worth breaking the ABI. The ABI was just broken in 16.04 to
introduce these struct members (among other things) and to break the ABI
again just to fixup a const-correctness issue seems a bit much. Could it
maybe wait until there's some actually compelling reason to break the ABI?
Note that I'm not against the change as such, const-correctness is a
good thing.
- Panu -
^ permalink raw reply [relevance 9%]
* [dpdk-dev] [PATCH 2/2] doc: announce ABI change of struct rte_port_sink_params
2016-05-16 13:18 9% [dpdk-dev] [PATCH 0/2] doc: announce ABI change of struct rte_port_source_params Fan Zhang
2016-05-16 13:18 18% ` [dpdk-dev] [PATCH 1/2] " Fan Zhang
@ 2016-05-16 13:18 18% ` Fan Zhang
2016-05-16 13:57 9% ` Panu Matilainen
2016-05-19 14:18 20% ` [dpdk-dev] [PATCH v2] doc: announce ABI change of struct rte_port_source_params and rte_port_sink_params Fan Zhang
2 siblings, 1 reply; 200+ results
From: Fan Zhang @ 2016-05-16 13:18 UTC (permalink / raw)
To: dev
The ABI changes are planned for rte_port_sink_params, which will be
supported from release 16.11. Here announces that ABI changes in detail.
Signed-off-by: Fan Zhang <roy.fan.zhang@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
doc/guides/rel_notes/deprecation.rst | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index d228bae..d2f7306 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -78,3 +78,7 @@ Deprecation Notices
* ABI will change for rte_port_source_params struct. The member file_name
data type will be changed from char * to const char *. This change targets
release 16.11.
+
+* ABI will change for rte_port_sink_params struct. The member file_name
+ data type will be changed from char * to const char *. This change targets
+ release 16.11.
--
2.5.5
^ permalink raw reply [relevance 18%]
* [dpdk-dev] [PATCH 1/2] doc: announce ABI change of struct rte_port_source_params
2016-05-16 13:18 9% [dpdk-dev] [PATCH 0/2] doc: announce ABI change of struct rte_port_source_params Fan Zhang
@ 2016-05-16 13:18 18% ` Fan Zhang
2016-05-16 13:18 18% ` [dpdk-dev] [PATCH 2/2] doc: announce ABI change of struct rte_port_sink_params Fan Zhang
2016-05-19 14:18 20% ` [dpdk-dev] [PATCH v2] doc: announce ABI change of struct rte_port_source_params and rte_port_sink_params Fan Zhang
2 siblings, 0 replies; 200+ results
From: Fan Zhang @ 2016-05-16 13:18 UTC (permalink / raw)
To: dev
The ABI changes are planned for rte_port_source_params, which will be
supported from release 16.11. Here announces that ABI changes in detail.
Signed-off-by: Fan Zhang <roy.fan.zhang@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
doc/guides/rel_notes/deprecation.rst | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index fffe9c7..d228bae 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -74,3 +74,7 @@ Deprecation Notices
a handle, like the way kernel exposes an fd to user for locating a
specific file, and to keep all major structures internally, so that
we are likely to be free from ABI violations in future.
+
+* ABI will change for rte_port_source_params struct. The member file_name
+ data type will be changed from char * to const char *. This change targets
+ release 16.11.
--
2.5.5
^ permalink raw reply [relevance 18%]
* [dpdk-dev] [PATCH 0/2] doc: announce ABI change of struct rte_port_source_params
@ 2016-05-16 13:18 9% Fan Zhang
2016-05-16 13:18 18% ` [dpdk-dev] [PATCH 1/2] " Fan Zhang
` (2 more replies)
0 siblings, 3 replies; 200+ results
From: Fan Zhang @ 2016-05-16 13:18 UTC (permalink / raw)
To: dev
The ABI changes are planned for rte_port_source_params and
rte_port_sink_params, which will be supported from release 16.11. Here
announces that ABI changes in detail.
Fan Zhang (2):
doc: announce ABI change of struct rte_port_source_params
doc: announce ABI change of struct rte_port_sink_params
doc/guides/rel_notes/deprecation.rst | 8 ++++++++
1 file changed, 8 insertions(+)
--
2.5.5
^ permalink raw reply [relevance 9%]
* Re: [dpdk-dev] Ring PMD: why are stats counters atomic?
@ 2016-05-16 13:16 3% ` Bruce Richardson
0 siblings, 0 replies; 200+ results
From: Bruce Richardson @ 2016-05-16 13:16 UTC (permalink / raw)
To: Mauricio Vásquez; +Cc: dev
On Mon, May 16, 2016 at 03:12:10PM +0200, Mauricio Vásquez wrote:
> Hello Bruce,
>
> Although having this support does not harm anyone, I am not convinced that
> it is useful, mainly because there exists the single-thread limitation in
> other PMDs. Then, if an application has to use different kind of NICs (i.e,
> different PMDs) it has to implement the locking strategies. On the other
> hand, if an application only uses rte_rings, it could just use the
> rte_ring library.
>
> Thanks, Mauricio V
>
I agree.
If you want, please submit a patch to remove this behaviour and see
if anyone objects to it. If there are no objections, I have no problem accepting
the patch.
However, since this is a behaviour change to existing functionality, we may
need to implement function versionning for this for ABI compatibility. Please
take that into account when drafting any patch.
Regards,
/Bruce
> On Tue, May 10, 2016 at 11:36 AM, Bruce Richardson <
> bruce.richardson@intel.com> wrote:
>
> > On Tue, May 10, 2016 at 11:13:08AM +0200, Mauricio Vásquez wrote:
> > > Hello,
> > >
> > > Per-queue stats counters are defined as rte_atomic64_t, in the tx/rx
> > > functions, they are atomically increased if the rings have the multiple
> > > consumers/producer flag enabled.
> > >
> > > According to the design principles, the application should not invoke
> > those
> > > functions on the same queue on different cores, then I think that atomic
> > > increasing is not necessary.
> > >
> > > Is there something wrong with my reasoning?, If not, I am willing to
> > send a
> > > patch.
> > >
> > > Thank you very much,
> > >
> > Since the rte_rings, on which the ring pmd is obviously based, have
> > multi-producer
> > and multi-consumer support built-in, I thought it might be useful in the
> > ring
> > PMD itself to allow multiple threads to access the ring queues at the same
> > time,
> > if the underlying rings are marked as MP/MC safe. When doing enqueues and
> > dequeue
> > from the ring, the stats are either incremented atomically, or
> > non-atomically,
> > depending on the underlying queue type.
> >
> > const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
> > ptrs, nb_bufs);
> > if (r->rng->flags & RING_F_SC_DEQ)
> > r->rx_pkts.cnt += nb_rx;
> > else
> > rte_atomic64_add(&(r->rx_pkts), nb_rx);
> >
> > If people don't think this behaviour is worthwhile keeping, I'm ok with
> > removing
> > it, since all other PMDs have the restriction that the queues are
> > single-thread
> > only.
> >
> > Regards,
> > /Bruce
> >
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH] doc: move rel_notes instructions as comments
@ 2016-05-13 13:27 16% Olivier Matz
0 siblings, 0 replies; 200+ results
From: Olivier Matz @ 2016-05-13 13:27 UTC (permalink / raw)
To: dev; +Cc: john.mcnamara
We don't want to have this instructions in the generated docs, so use
comments. It's also less confusing for people adding entries in the
documentation.
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
doc/guides/rel_notes/release_16_07.rst | 86 +++++++++++++++++-----------------
1 file changed, 43 insertions(+), 43 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index f6d543c..71d3540 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -1,50 +1,50 @@
DPDK Release 16.07
==================
-**Read this first.**
+.. **Read this first.**
-The text below explains how to update the release notes.
+ The text below explains how to update the release notes.
-Use proper spelling, capitalization and punctuation in all sections.
+ Use proper spelling, capitalization and punctuation in all sections.
-Variable and config names should be quoted as fixed width text: ``LIKE_THIS``.
+ Variable and config names should be quoted as fixed width text: ``LIKE_THIS``.
-Build the docs and view the output file to ensure the changes are correct::
+ Build the docs and view the output file to ensure the changes are correct::
- make doc-guides-html
+ make doc-guides-html
- firefox build/doc/html/guides/rel_notes/release_16_07.html
+ firefox build/doc/html/guides/rel_notes/release_16_07.html
New Features
------------
-This section should contain new features added in this release. Sample format:
+.. This section should contain new features added in this release. Sample format:
-* **Add a title in the past tense with a full stop.**
+ * **Add a title in the past tense with a full stop.**
- Add a short 1-2 sentence description in the past tense. The description
- should be enough to allow someone scanning the release notes to understand
- the new feature.
+ Add a short 1-2 sentence description in the past tense. The description
+ should be enough to allow someone scanning the release notes to understand
+ the new feature.
- If the feature adds a lot of sub-features you can use a bullet list like this.
+ If the feature adds a lot of sub-features you can use a bullet list like this.
- * Added feature foo to do something.
- * Enhanced feature bar to do something else.
+ * Added feature foo to do something.
+ * Enhanced feature bar to do something else.
- Refer to the previous release notes for examples.
+ Refer to the previous release notes for examples.
Resolved Issues
---------------
-This section should contain bug fixes added to the relevant sections. Sample format:
+.. This section should contain bug fixes added to the relevant sections. Sample format:
-* **code/section Fixed issue in the past tense with a full stop.**
+ * **code/section Fixed issue in the past tense with a full stop.**
- Add a short 1-2 sentence description of the resolved issue in the past tense.
- The title should contain the code/lib section like a commit message.
- Add the entries in alphabetic order in the relevant sections below.
+ Add a short 1-2 sentence description of the resolved issue in the past tense.
+ The title should contain the code/lib section like a commit message.
+ Add the entries in alphabetic order in the relevant sections below.
EAL
@@ -77,21 +77,21 @@ Other
Known Issues
------------
-This section should contain new known issues in this release. Sample format:
+.. This section should contain new known issues in this release. Sample format:
-* **Add title in present tense with full stop.**
+ * **Add title in present tense with full stop.**
- Add a short 1-2 sentence description of the known issue in the present
- tense. Add information on any known workarounds.
+ Add a short 1-2 sentence description of the known issue in the present
+ tense. Add information on any known workarounds.
API Changes
-----------
-This section should contain API changes. Sample format:
+.. This section should contain API changes. Sample format:
-* Add a short 1-2 sentence description of the API change. Use fixed width
- quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
+ * Add a short 1-2 sentence description of the API change. Use fixed width
+ quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
* The following counters are removed from ``rte_eth_stats`` structure:
ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
@@ -101,9 +101,9 @@ This section should contain API changes. Sample format:
ABI Changes
-----------
-* Add a short 1-2 sentence description of the ABI change that was announced in
- the previous releases and made in this release. Use fixed width quotes for
- ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
+.. * Add a short 1-2 sentence description of the ABI change that was announced in
+ the previous releases and made in this release. Use fixed width quotes for
+ ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
* The ``rte_port_source_params`` structure has new fields to support PCAP file.
It was already in release 16.04 with ``RTE_NEXT_ABI`` flag.
@@ -112,7 +112,7 @@ ABI Changes
Shared Library Versions
-----------------------
-Update any library version updated in this release and prepend with a ``+`` sign.
+.. Update any library version updated in this release and prepend with a ``+`` sign.
The libraries prepended with a plus sign were incremented in this version.
@@ -150,25 +150,25 @@ The libraries prepended with a plus sign were incremented in this version.
Tested Platforms
----------------
-This section should contain a list of platforms that were tested with this
-release.
+.. This section should contain a list of platforms that were tested with this
+ release.
-The format is:
+ The format is:
-#. Platform name.
+ #. Platform name.
- - Platform details.
- - Platform details.
+ - Platform details.
+ - Platform details.
Tested NICs
-----------
-This section should contain a list of NICs that were tested with this release.
+.. This section should contain a list of NICs that were tested with this release.
-The format is:
+ The format is:
-#. NIC name.
+ #. NIC name.
- - NIC details.
- - NIC details.
+ - NIC details.
+ - NIC details.
--
2.8.0.rc3
^ permalink raw reply [relevance 16%]
* Re: [dpdk-dev] [PATCH] sched: fix useless call
@ 2016-05-13 11:04 4% ` Dumitrescu, Cristian
0 siblings, 0 replies; 200+ results
From: Dumitrescu, Cristian @ 2016-05-13 11:04 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dev, Yigit, Ferruh, Mrzyglod, DanielX T
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Friday, May 13, 2016 11:12 AM
> To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>; Mrzyglod, DanielX
> T <danielx.t.mrzyglod@intel.com>
> Subject: Re: [dpdk-dev] [PATCH] sched: fix useless call
>
> 2016-05-11 10:46, Ferruh Yigit:
> > On 5/10/2016 6:18 PM, Dumitrescu, Cristian wrote:
> > > As previously discussed on this email list, the rte_bitmap_free() is an API
> function that works as a placeholder for any resource freeing that needs to
> be done for the bitmap. The API function should not be removed and also
> the call to this function from the rte_sched_port_free() should not be
> removed either.
> > >
> >
> > Right now it isn't required and doesn't do anything.
> > Why not add this function when it is required?
>
> I don't understand why we keep a function which does nothing.
Every data type/class/object should have a constructor/create and destructor/free function. This is standard programming practice, right?
This API function is the free function for the bitmap object. Right now there are no internally allocated resources to be freed, but as code evolves, some other internal memory could be allocated by the bitmap, which needs to be freed in the bitmap free function.
This function should be kept in order to have a stable API. We should not go back and forth with adding / removing API functions as code evolves. It does not make any sense to go through the ABI change process to remove this API function now just to come back later on and go again through ABI change to add back this API function later.
I think each DPDK object should have its create and free functions clearly identified in the API.
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v2 0/6] vhost: add vhost-user client mode and reconnect ability
2016-05-07 6:40 3% [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
2016-05-10 3:23 3% ` Xu, Qian Q
@ 2016-05-13 6:16 3% ` Yuanhan Liu
2016-06-07 4:05 3% ` [dpdk-dev] [PATCH v3 " Yuanhan Liu
1 sibling, 1 reply; 200+ results
From: Yuanhan Liu @ 2016-05-13 6:16 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
v2: - added release doc
- do not remove socket file for the client mode
- create one thread ony to handle all reconnects
NOTE: I created a branch at dpdk.org [0] for more convenient testing:
[0]: git://dpdk.org/next/dpdk-next-virtio for-testing
When the DPDK vhost-user application (such as OVS) restarts (due to
crash, or update), the vhost-user connection between DPDK and QEMU
won't be established automatically again. In another word, the virtio
net is broken.
The reason it doesn't work is that DPDK just acts as server only.
A restart of the server needs a reconnection from the client (QEMU).
However, reconnect from QEMU is not supported from QEMU.
Adding the support of client mode and let DPDK be the client somehow
would resolve above issue a bit easier: DPDK vhost-user as the client,
a restart of DPDK would naturally try to connect to the server (QEMU)
automatically.
Therefore, this patchset implements the DPDK vhost-user client mode, by
introducing a new arg (flags) for API rte_vhost_driver_register(). And the
client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this
implies an API breakage. However, since this release deals with ABI/API
refactoring, it should not be an issue.
Another interesting thing to make it work is that you not only have
to consider that case the DPDK vhost-user app might restart, but also
have to think that QEMU might restart as well: guest OS sometimes
just reboots. In such case, when the server is down, the client has
to keep reconnecting with the server until the server is back and the
connection is established again. And that's what "reconnect" patch for.
Note that current QEMU doesn't not support a second time connection
from client, thus a restart of DPDK vhost-user will not work. This is
because current QEMU won't be able to detect the disconnect from
restart, thus it will not listen for later connections. Patches [1] have
been sent, it's just not merged yet. But unlike the vhost-user mulitple
queue case, that we have critical depends on QEMU implementation, here
we have no such dependency, therefore, I think it's okay to make DPDK
be ready for the "reconnect" stuff first. (note that I also mentioned
this fact in the release doc).
[1]: http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg01507.html
Thanks.
--yliu
---
Yuanhan Liu (6):
vhost: rename structs for enabling client mode
vhost: add vhost-user client mode
vhost: add reconnect ability
vhost: workaround stale vring base
examples/vhost: add client and reconnect option
vhost: add pmd client and reconnect option
doc/guides/rel_notes/release_16_07.rst | 23 ++
drivers/net/vhost/rte_eth_vhost.c | 54 +++-
examples/vhost/main.c | 23 +-
lib/librte_vhost/rte_virtio_net.h | 12 +-
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 8 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 403 ++++++++++++++++++---------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
lib/librte_vhost/virtio-net.c | 9 +
8 files changed, 390 insertions(+), 148 deletions(-)
--
1.9.0
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH v2 17/19] vhost: reserve few more space for future extension
2016-05-13 5:24 8% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Yuanhan Liu
` (2 preceding siblings ...)
2016-05-13 5:25 12% ` [dpdk-dev] [PATCH v2 13/19] vhost: export vid as the only interface to applications Yuanhan Liu
@ 2016-05-13 5:25 4% ` Yuanhan Liu
2016-05-26 17:04 4% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Rich Lane
2016-06-07 3:51 9% ` [dpdk-dev] [PATCH v3 00/20] " Yuanhan Liu
5 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-13 5:25 UTC (permalink / raw)
To: dev
Cc: Thomas Monjalon, huawei.xie, Panu Matilainen, Tetsuya Mukawa,
Traynor Kevin, Rich Lane, Yuanhan Liu
"virtio_net_device_ops" is the only left open struct that an application
can access, therefore, it's the only place that might introduce potential
ABI break in future for extension.
So, do some reservation for it. 5 should be pretty enough, considering
that we have barely touched it for a long while. Another reason to
choose 5 is for cache alignment: 5 makes the struct 64 bytes for 64 bit
machine.
With this, it's confidence to say that we might be able to be free from
the ABI violation forever.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
lib/librte_vhost/rte_virtio_net.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index fc1d799..bc2b74b 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -66,6 +66,8 @@ struct virtio_net_device_ops {
void (*destroy_device)(int vid); /**< Remove device. */
int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
+
+ void *reserved[5]; /**< Reserved for future extension */
};
/**
--
1.9.0
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v2 13/19] vhost: export vid as the only interface to applications
2016-05-13 5:24 8% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Yuanhan Liu
2016-05-13 5:25 7% ` [dpdk-dev] [PATCH v2 11/19] vhost: introduce new API to export queue free entries Yuanhan Liu
2016-05-13 5:25 3% ` [dpdk-dev] [PATCH v2 12/19] vhost: remove dependency on priv field Yuanhan Liu
@ 2016-05-13 5:25 12% ` Yuanhan Liu
2016-05-13 5:25 4% ` [dpdk-dev] [PATCH v2 17/19] vhost: reserve few more space for future extension Yuanhan Liu
` (2 subsequent siblings)
5 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-13 5:25 UTC (permalink / raw)
To: dev
Cc: Thomas Monjalon, huawei.xie, Panu Matilainen, Tetsuya Mukawa,
Traynor Kevin, Rich Lane, Yuanhan Liu
With all the previous prepare works, we are just one step away from
the final ABI refactoring. That is, to change current API to let them
stick to vid instead of the old virtio_net dev.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v2: update release note
---
doc/guides/rel_notes/release_16_07.rst | 7 ++++
drivers/net/vhost/rte_eth_vhost.c | 47 +++++++++------------------
examples/vhost/main.c | 25 +++++++-------
lib/librte_vhost/rte_virtio_net.h | 18 +++++-----
lib/librte_vhost/vhost_rxtx.c | 15 +++++++--
lib/librte_vhost/vhost_user/virtio-net-user.c | 14 ++++----
lib/librte_vhost/virtio-net.c | 17 ++++++----
7 files changed, 75 insertions(+), 68 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index d293eda..ebc507b 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -99,6 +99,10 @@ This section should contain API changes. Sample format:
* ``rte_vring_available_entries`` is renamed to ``rte_vhost_avail_entries``.
+* All existing vhost APIs and callbacks with ``virtio_net`` struct pointer
+ as the parameter have been changed due to the ABI refactoring mentioned
+ below: it's replaced by "int vid".
+
ABI Changes
-----------
@@ -110,6 +114,9 @@ ABI Changes
* The ``rte_port_source_params`` structure has new fields to support PCAP file.
It was already in release 16.04 with ``RTE_NEXT_ABI`` flag.
+* vhost ABI refactoring has been made: ``virtio_net`` structure is never
+ exported to application any more. Instead, a handle, ``vid``, has been
+ used to represent this structure internally.
Shared Library Versions
-----------------------
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index de0f25e..56c1c36 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -71,9 +71,9 @@ static struct ether_addr base_eth_addr = {
};
struct vhost_queue {
+ int vid;
rte_atomic32_t allow_queuing;
rte_atomic32_t while_queuing;
- struct virtio_net *device;
struct pmd_internal *internal;
struct rte_mempool *mb_pool;
uint8_t port;
@@ -139,7 +139,7 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
goto out;
/* Dequeue packets from guest TX queue */
- nb_rx = rte_vhost_dequeue_burst(r->device,
+ nb_rx = rte_vhost_dequeue_burst(r->vid,
r->virtqueue_id, r->mb_pool, bufs, nb_bufs);
r->rx_pkts += nb_rx;
@@ -170,7 +170,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
goto out;
/* Enqueue packets to guest RX queue */
- nb_tx = rte_vhost_enqueue_burst(r->device,
+ nb_tx = rte_vhost_enqueue_burst(r->vid,
r->virtqueue_id, bufs, nb_bufs);
r->tx_pkts += nb_tx;
@@ -222,7 +222,7 @@ find_internal_resource(char *ifname)
}
static int
-new_device(struct virtio_net *dev)
+new_device(int vid)
{
struct rte_eth_dev *eth_dev;
struct internal_list *list;
@@ -234,12 +234,7 @@ new_device(struct virtio_net *dev)
int newnode;
#endif
- if (dev == NULL) {
- RTE_LOG(INFO, PMD, "Invalid argument\n");
- return -1;
- }
-
- rte_vhost_get_ifname(dev->vid, ifname, sizeof(ifname));
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
list = find_internal_resource(ifname);
if (list == NULL) {
RTE_LOG(INFO, PMD, "Invalid device name: %s\n", ifname);
@@ -250,7 +245,7 @@ new_device(struct virtio_net *dev)
internal = eth_dev->data->dev_private;
#ifdef RTE_LIBRTE_VHOST_NUMA
- newnode = rte_vhost_get_numa_node(dev->vid);
+ newnode = rte_vhost_get_numa_node(vid);
if (newnode > 0)
eth_dev->data->numa_node = newnode;
#endif
@@ -259,7 +254,7 @@ new_device(struct virtio_net *dev)
vq = eth_dev->data->rx_queues[i];
if (vq == NULL)
continue;
- vq->device = dev;
+ vq->vid = vid;
vq->internal = internal;
vq->port = eth_dev->data->port_id;
}
@@ -267,13 +262,13 @@ new_device(struct virtio_net *dev)
vq = eth_dev->data->tx_queues[i];
if (vq == NULL)
continue;
- vq->device = dev;
+ vq->vid = vid;
vq->internal = internal;
vq->port = eth_dev->data->port_id;
}
- for (i = 0; i < rte_vhost_get_queue_num(dev->vid) * VIRTIO_QNUM; i++)
- rte_vhost_enable_guest_notification(dev, i, 0);
+ for (i = 0; i < rte_vhost_get_queue_num(vid) * VIRTIO_QNUM; i++)
+ rte_vhost_enable_guest_notification(vid, i, 0);
eth_dev->data->dev_link.link_status = ETH_LINK_UP;
@@ -298,7 +293,7 @@ new_device(struct virtio_net *dev)
}
static void
-destroy_device(volatile struct virtio_net *dev)
+destroy_device(int vid)
{
struct rte_eth_dev *eth_dev;
struct vhost_queue *vq;
@@ -306,12 +301,7 @@ destroy_device(volatile struct virtio_net *dev)
char ifname[PATH_MAX];
unsigned i;
- if (dev == NULL) {
- RTE_LOG(INFO, PMD, "Invalid argument\n");
- return;
- }
-
- rte_vhost_get_ifname(dev->vid, ifname, sizeof(ifname));
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
list = find_internal_resource(ifname);
if (list == NULL) {
RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname);
@@ -343,13 +333,13 @@ destroy_device(volatile struct virtio_net *dev)
vq = eth_dev->data->rx_queues[i];
if (vq == NULL)
continue;
- vq->device = NULL;
+ vq->vid = -1;
}
for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
vq = eth_dev->data->tx_queues[i];
if (vq == NULL)
continue;
- vq->device = NULL;
+ vq->vid = -1;
}
RTE_LOG(INFO, PMD, "Connection closed\n");
@@ -358,19 +348,14 @@ destroy_device(volatile struct virtio_net *dev)
}
static int
-vring_state_changed(struct virtio_net *dev, uint16_t vring, int enable)
+vring_state_changed(int vid, uint16_t vring, int enable)
{
struct rte_vhost_vring_state *state;
struct rte_eth_dev *eth_dev;
struct internal_list *list;
char ifname[PATH_MAX];
- if (dev == NULL) {
- RTE_LOG(ERR, PMD, "Invalid argument\n");
- return -1;
- }
-
- rte_vhost_get_ifname(dev->vid, ifname, sizeof(ifname));
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
list = find_internal_resource(ifname);
if (list == NULL) {
RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname);
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index c408577..f3a6277 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -808,7 +808,7 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
{
uint16_t ret;
- ret = rte_vhost_enqueue_burst(dst_vdev->dev, VIRTIO_RXQ, &m, 1);
+ ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
if (enable_stats) {
rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
@@ -1054,7 +1054,6 @@ static inline void __attribute__((always_inline))
drain_eth_rx(struct vhost_dev *vdev)
{
uint16_t rx_count, enqueue_count;
- struct virtio_net *dev = vdev->dev;
struct rte_mbuf *pkts[MAX_PKT_BURST];
rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
@@ -1068,19 +1067,19 @@ drain_eth_rx(struct vhost_dev *vdev)
* to diminish packet loss.
*/
if (enable_retry &&
- unlikely(rx_count > rte_vhost_avail_entries(dev->vid,
+ unlikely(rx_count > rte_vhost_avail_entries(vdev->vid,
VIRTIO_RXQ))) {
uint32_t retry;
for (retry = 0; retry < burst_rx_retry_num; retry++) {
rte_delay_us(burst_rx_delay_time);
- if (rx_count <= rte_vhost_avail_entries(dev->vid,
+ if (rx_count <= rte_vhost_avail_entries(vdev->vid,
VIRTIO_RXQ))
break;
}
}
- enqueue_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ,
+ enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
pkts, rx_count);
if (enable_stats) {
rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
@@ -1097,7 +1096,7 @@ drain_virtio_tx(struct vhost_dev *vdev)
uint16_t count;
uint16_t i;
- count = rte_vhost_dequeue_burst(vdev->dev, VIRTIO_TXQ, mbuf_pool,
+ count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ, mbuf_pool,
pkts, MAX_PKT_BURST);
/* setup VMDq for the first packet */
@@ -1183,12 +1182,12 @@ switch_worker(void *arg __rte_unused)
* of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
*/
static void
-destroy_device (volatile struct virtio_net *dev)
+destroy_device(int vid)
{
struct vhost_dev *vdev;
int lcore;
- vdev = find_vhost_dev_by_vid(dev->vid);
+ vdev = find_vhost_dev_by_vid(vid);
if (!vdev)
return;
/*set the remove flag. */
@@ -1228,12 +1227,11 @@ destroy_device (volatile struct virtio_net *dev)
* and the allocated to a specific data core.
*/
static int
-new_device (struct virtio_net *dev)
+new_device(int vid)
{
int lcore, core_add = 0;
uint32_t device_num_min = num_devices;
struct vhost_dev *vdev;
- int vid = dev->vid;
vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
if (vdev == NULL) {
@@ -1242,7 +1240,6 @@ new_device (struct virtio_net *dev)
vid);
return -1;
}
- vdev->dev = dev;
vdev->vid = vid;
TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, next);
@@ -1265,8 +1262,8 @@ new_device (struct virtio_net *dev)
lcore_info[vdev->coreid].device_num++;
/* Disable notifications. */
- rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0);
- rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0);
+ rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
+ rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
RTE_LOG(INFO, VHOST_DATA,
"(%d) device has been added to data core %d\n",
@@ -1322,7 +1319,7 @@ print_stats(void)
"RX total: %" PRIu64 "\n"
"RX dropped: %" PRIu64 "\n"
"RX successful: %" PRIu64 "\n",
- vdev->dev->vid,
+ vdev->vid,
tx_total, tx_dropped, tx,
rx_total, rx_dropped, rx);
}
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 0427461..370345e 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -178,10 +178,10 @@ struct virtio_memory {
*
*/
struct virtio_net_device_ops {
- int (*new_device)(struct virtio_net *); /**< Add device. */
- void (*destroy_device)(volatile struct virtio_net *); /**< Remove device. */
+ int (*new_device)(int vid); /**< Add device. */
+ void (*destroy_device)(int vid); /**< Remove device. */
- int (*vring_state_changed)(struct virtio_net *dev, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
+ int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
};
/**
@@ -220,7 +220,7 @@ int rte_vhost_feature_enable(uint64_t feature_mask);
/* Returns currently supported vhost features */
uint64_t rte_vhost_feature_get(void);
-int rte_vhost_enable_guest_notification(struct virtio_net *dev, uint16_t queue_id, int enable);
+int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
/* Register vhost driver. dev_name could be different for multiple instance support. */
int rte_vhost_driver_register(const char *dev_name);
@@ -291,8 +291,8 @@ uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
* be received from the physical port or from another virtual device. A packet
* count is returned to indicate the number of packets that were succesfully
* added to the RX queue.
- * @param dev
- * virtio-net device
+ * @param vid
+ * virtio-net device ID
* @param queue_id
* virtio queue index in mq case
* @param pkts
@@ -302,14 +302,14 @@ uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
* @return
* num of packets enqueued
*/
-uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
/**
* This function gets guest buffers from the virtio device TX virtqueue,
* construct host mbufs, copies guest buffer content to host mbufs and
* store them in pkts to be processed.
- * @param dev
+ * @param vid
* virtio-net device
* @param queue_id
* virtio queue index in mq case
@@ -322,7 +322,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
* @return
* num of packets dequeued
*/
-uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
#endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 8d87508..08cab08 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -46,6 +46,7 @@
#include <rte_arp.h>
#include "vhost-net.h"
+#include "virtio-net.h"
#define MAX_PKT_BURST 32
#define VHOST_LOG_PAGE 4096
@@ -587,9 +588,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
}
uint16_t
-rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
{
+ struct virtio_net *dev = get_device(vid);
+
+ if (!dev)
+ return 0;
+
if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
return virtio_dev_merge_rx(dev, queue_id, pkts, count);
else
@@ -815,9 +821,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
uint16_t
-rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
{
+ struct virtio_net *dev;
struct rte_mbuf *rarp_mbuf = NULL;
struct vhost_virtqueue *vq;
uint32_t desc_indexes[MAX_PKT_BURST];
@@ -826,6 +833,10 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
uint16_t free_entries;
uint16_t avail_idx;
+ dev = get_device(vid);
+ if (!dev)
+ return 0;
+
if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
dev->vid, __func__, queue_id);
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 9385af1..7fa69a7 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -117,7 +117,7 @@ user_set_mem_table(int vid, struct VhostUserMsg *pmsg)
/* Remove from the data plane. */
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
if (dev->mem) {
@@ -279,6 +279,9 @@ user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
struct vhost_vring_file file;
struct virtio_net *dev = get_device(vid);
+ if (!dev)
+ return;
+
file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
file.fd = VIRTIO_INVALID_EVENTFD;
@@ -289,7 +292,7 @@ user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
vhost_set_vring_kick(vid, &file);
if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
- if (notify_ops->new_device(dev) == 0)
+ if (notify_ops->new_device(vid) == 0)
dev->flags |= VIRTIO_DEV_RUNNING;
}
}
@@ -307,7 +310,7 @@ user_get_vring_base(int vid,
return -1;
/* We have to stop the queue (virtio) if it is running. */
if (dev->flags & VIRTIO_DEV_RUNNING)
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
/* Here we are safe to get the last used index */
vhost_get_vring_base(vid, state->index, state);
@@ -342,9 +345,8 @@ user_set_vring_enable(int vid,
"set queue enable: %d to qp idx: %d\n",
enable, state->index);
- if (notify_ops->vring_state_changed) {
- notify_ops->vring_state_changed(dev, state->index, enable);
- }
+ if (notify_ops->vring_state_changed)
+ notify_ops->vring_state_changed(vid, state->index, enable);
dev->virtqueue[state->index]->enabled = enable;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 115eba4..ea216c0 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -296,7 +296,7 @@ vhost_destroy_device(int vid)
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
cleanup_device(dev, 1);
@@ -354,7 +354,7 @@ vhost_reset_owner(int vid)
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
cleanup_device(dev, 0);
@@ -718,13 +718,13 @@ vhost_set_backend(int vid, struct vhost_vring_file *file)
if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
if (dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED &&
dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED) {
- if (notify_ops->new_device(dev) < 0)
+ if (notify_ops->new_device(vid) < 0)
return -1;
dev->flags |= VIRTIO_DEV_RUNNING;
}
} else if (file->fd == VIRTIO_DEV_STOPPED) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
return 0;
@@ -800,9 +800,14 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
}
-int rte_vhost_enable_guest_notification(struct virtio_net *dev,
- uint16_t queue_id, int enable)
+int
+rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
{
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL)
+ return -1;
+
if (enable) {
RTE_LOG(ERR, VHOST_CONFIG,
"guest notification isn't supported.\n");
--
1.9.0
^ permalink raw reply [relevance 12%]
* [dpdk-dev] [PATCH v2 12/19] vhost: remove dependency on priv field
2016-05-13 5:24 8% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Yuanhan Liu
2016-05-13 5:25 7% ` [dpdk-dev] [PATCH v2 11/19] vhost: introduce new API to export queue free entries Yuanhan Liu
@ 2016-05-13 5:25 3% ` Yuanhan Liu
2016-05-13 5:25 12% ` [dpdk-dev] [PATCH v2 13/19] vhost: export vid as the only interface to applications Yuanhan Liu
` (3 subsequent siblings)
5 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-13 5:25 UTC (permalink / raw)
To: dev
Cc: Thomas Monjalon, huawei.xie, Panu Matilainen, Tetsuya Mukawa,
Traynor Kevin, Rich Lane, Yuanhan Liu
This change could let us avoid the dependency of "virtio_net"
struct, to prepare for the ABI refactoring.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
drivers/net/vhost/rte_eth_vhost.c | 13 +++++++------
examples/vhost/main.c | 18 ++++++++++++++++--
2 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 6fa9f6b..de0f25e 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -275,7 +275,6 @@ new_device(struct virtio_net *dev)
for (i = 0; i < rte_vhost_get_queue_num(dev->vid) * VIRTIO_QNUM; i++)
rte_vhost_enable_guest_notification(dev, i, 0);
- dev->priv = eth_dev;
eth_dev->data->dev_link.link_status = ETH_LINK_UP;
for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
@@ -303,6 +302,8 @@ destroy_device(volatile struct virtio_net *dev)
{
struct rte_eth_dev *eth_dev;
struct vhost_queue *vq;
+ struct internal_list *list;
+ char ifname[PATH_MAX];
unsigned i;
if (dev == NULL) {
@@ -310,11 +311,13 @@ destroy_device(volatile struct virtio_net *dev)
return;
}
- eth_dev = (struct rte_eth_dev *)dev->priv;
- if (eth_dev == NULL) {
- RTE_LOG(INFO, PMD, "Failed to find a ethdev\n");
+ rte_vhost_get_ifname(dev->vid, ifname, sizeof(ifname));
+ list = find_internal_resource(ifname);
+ if (list == NULL) {
+ RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname);
return;
}
+ eth_dev = list->eth_dev;
/* Wait until rx/tx_pkt_burst stops accessing vhost device */
for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
@@ -336,8 +339,6 @@ destroy_device(volatile struct virtio_net *dev)
eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
- dev->priv = NULL;
-
for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
vq = eth_dev->data->rx_queues[i];
if (vq == NULL)
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 67ef0ad..c408577 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -700,6 +700,19 @@ find_vhost_dev(struct ether_addr *mac)
return NULL;
}
+static inline struct vhost_dev *__attribute__((always_inline))
+find_vhost_dev_by_vid(int vid)
+{
+ struct vhost_dev *vdev;
+
+ TAILQ_FOREACH(vdev, &vhost_dev_list, next) {
+ if (vdev->ready == DEVICE_RX && vdev->vid == vid)
+ return vdev;
+ }
+
+ return NULL;
+}
+
/*
* This function learns the MAC address of the device and registers this along with a
* vlan tag to a VMDQ.
@@ -1175,7 +1188,9 @@ destroy_device (volatile struct virtio_net *dev)
struct vhost_dev *vdev;
int lcore;
- vdev = (struct vhost_dev *)dev->priv;
+ vdev = find_vhost_dev_by_vid(dev->vid);
+ if (!vdev)
+ return;
/*set the remove flag. */
vdev->remove = 1;
while(vdev->ready != DEVICE_SAFE_REMOVE) {
@@ -1228,7 +1243,6 @@ new_device (struct virtio_net *dev)
return -1;
}
vdev->dev = dev;
- dev->priv = vdev;
vdev->vid = vid;
TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, next);
--
1.9.0
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH v2 11/19] vhost: introduce new API to export queue free entries
2016-05-13 5:24 8% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Yuanhan Liu
@ 2016-05-13 5:25 7% ` Yuanhan Liu
2016-05-13 5:25 3% ` [dpdk-dev] [PATCH v2 12/19] vhost: remove dependency on priv field Yuanhan Liu
` (4 subsequent siblings)
5 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-13 5:25 UTC (permalink / raw)
To: dev
Cc: Thomas Monjalon, huawei.xie, Panu Matilainen, Tetsuya Mukawa,
Traynor Kevin, Rich Lane, Yuanhan Liu
The new API rte_vhost_avail_entries() is actually a rename of
rte_vring_available_entries(), with the "vring" to "vhost" name
change to keep the consistency of other vhost exported APIs.
This change could let us avoid the dependency of "virtio_net"
struct, to prepare for the ABI refactoring.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
doc/guides/rel_notes/release_16_07.rst | 2 ++
examples/vhost/main.c | 4 ++--
lib/librte_vhost/rte_vhost_version.map | 1 +
lib/librte_vhost/rte_virtio_net.h | 24 +++++++++++++-----------
lib/librte_vhost/virtio-net.c | 17 +++++++++++++++++
5 files changed, 35 insertions(+), 13 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index f6d543c..d293eda 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -97,6 +97,8 @@ This section should contain API changes. Sample format:
ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+* ``rte_vring_available_entries`` is renamed to ``rte_vhost_avail_entries``.
+
ABI Changes
-----------
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index cb04585..67ef0ad 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1055,13 +1055,13 @@ drain_eth_rx(struct vhost_dev *vdev)
* to diminish packet loss.
*/
if (enable_retry &&
- unlikely(rx_count > rte_vring_available_entries(dev,
+ unlikely(rx_count > rte_vhost_avail_entries(dev->vid,
VIRTIO_RXQ))) {
uint32_t retry;
for (retry = 0; retry < burst_rx_retry_num; retry++) {
rte_delay_us(burst_rx_delay_time);
- if (rx_count <= rte_vring_available_entries(dev,
+ if (rx_count <= rte_vhost_avail_entries(dev->vid,
VIRTIO_RXQ))
break;
}
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index 4608e3b..93f1188 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -24,6 +24,7 @@ DPDK_2.1 {
DPDK_16.07 {
global:
+ rte_vhost_avail_entries;
rte_vhost_get_ifname;
rte_vhost_get_numa_node;
rte_vhost_get_queue_num;
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 0898e8b..0427461 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -184,17 +184,6 @@ struct virtio_net_device_ops {
int (*vring_state_changed)(struct virtio_net *dev, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
};
-static inline uint16_t __attribute__((always_inline))
-rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
-{
- struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
-
- if (!vq->enabled)
- return 0;
-
- return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
-}
-
/**
* Function to convert guest physical addresses to vhost virtual addresses.
* This is used to convert guest virtio buffer addresses.
@@ -285,6 +274,19 @@ uint32_t rte_vhost_get_queue_num(int vid);
int rte_vhost_get_ifname(int vid, char *buf, size_t len);
/**
+ * Get how many avail entries are left in the queue
+ *
+ * @param vid
+ * virtio-net device ID
+ * @param queue_id
+ * virtio queue index
+ *
+ * @return
+ * num of avail entires left
+ */
+uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
+
+/**
* This function adds buffers to the virtio devices RX virtqueue. Buffers can
* be received from the physical port or from another virtual device. A packet
* count is returned to indicate the number of packets that were succesfully
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 375c9d4..115eba4 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -783,6 +783,23 @@ rte_vhost_get_ifname(int vid, char *buf, size_t len)
return 0;
}
+uint16_t
+rte_vhost_avail_entries(int vid, uint16_t queue_id)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (!dev)
+ return 0;
+
+ vq = dev->virtqueue[queue_id];
+ if (!vq->enabled)
+ return 0;
+
+ return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
+}
+
int rte_vhost_enable_guest_notification(struct virtio_net *dev,
uint16_t queue_id, int enable)
{
--
1.9.0
^ permalink raw reply [relevance 7%]
* [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring
2016-05-02 22:25 9% [dpdk-dev] [PATCH 00/16] vhost ABI/API refactoring Yuanhan Liu
2016-05-02 22:25 2% ` [dpdk-dev] [PATCH 10/16] vhost: export vid as the only interface to applications Yuanhan Liu
2016-05-02 22:25 4% ` [dpdk-dev] [PATCH 14/16] vhost: reserve few more space for future extension Yuanhan Liu
@ 2016-05-13 5:24 8% ` Yuanhan Liu
2016-05-13 5:25 7% ` [dpdk-dev] [PATCH v2 11/19] vhost: introduce new API to export queue free entries Yuanhan Liu
` (5 more replies)
2 siblings, 6 replies; 200+ results
From: Yuanhan Liu @ 2016-05-13 5:24 UTC (permalink / raw)
To: dev
Cc: Thomas Monjalon, huawei.xie, Panu Matilainen, Tetsuya Mukawa,
Traynor Kevin, Rich Lane, Yuanhan Liu
v2: - exported ifname as well to fix a vhost-pmd issue reported
by Rich
- separated the big patch that introduces several new APIs
into some small patches.
- updated release note
- updated version.map
NOTE: I created a branch at dpdk.org [0] for more conveinient
testing:
[0]: git://dpdk.org/next/dpdk-next-virtio for-testing
Every time we introduce a new feature to vhost, we are likely
to break ABI. Moreover, some cleanups (such as the one from Ilya
to remove vec_buf from vhost_virtqueue struct) also break ABI.
This patch set is meant to resolve above issue ultimately, by
hiding virtio_net structure (as well as few others) internaly,
and export the virtio_net dev strut to applications by a number,
vid, like the way kernel exposes an fd to user space.
Back to the patch set, the first part of this set makes some
changes to vhost example, vhost-pmd and vhost, bit by bit, to
remove the dependence to "virtio_net" struct. And then do the
final change to make the current APIs to adapt to using "vid".
After that, "vrtio_net_device_ops" is the only left open struct
that an application can acces, therefore, it's the only place
that might introduce potential ABI breakage in future for
extension. Hence, I made few more (5) space reservation, to
make sure we will not break ABI for a long time, and hopefuly,
forever.
The last bit of this patch set is some cleanups, including the
one from Ilya.
Note that this refactoring breaks the tep_termination example.
Well, it's just another copy of the original messy vhost example,
and I have no interest to cleanup it again. Therefore, I might
consider to remove that example later, and add the vxlan bits
into vhost example.
Thanks.
--yliu
---
Ilya Maximets (1):
vhost: make buf vector for scatter Rx local
Yuanhan Liu (18):
vhost: declare backend with int type
vhost: set/reset dev flags internally
vhost: declare device fh as int
examples/vhost: make a copy of virtio device id
vhost: rename device fh to vid
vhost: get device by vid only
vhost: move vhost device ctx to cuse
vhost: introduce new API to export numa node
vhost: introduce new API to export number of queues
vhost: introduce new API to export ifname
vhost: introduce new API to export queue free entries
vhost: remove dependency on priv field
vhost: export vid as the only interface to applications
vhost: hide internal structs/macros/functions
vhost: remove unnecessary fields
vhost: remove virtio-net.h
vhost: reserve few more space for future extension
vhost: per device virtio net header len
doc/guides/rel_notes/release_16_07.rst | 9 +
drivers/net/vhost/rte_eth_vhost.c | 79 ++++-----
examples/vhost/main.c | 124 +++++++-------
examples/vhost/main.h | 1 +
lib/librte_vhost/rte_vhost_version.map | 10 ++
lib/librte_vhost/rte_virtio_net.h | 223 +++++++------------------
lib/librte_vhost/vhost-net.h | 201 ++++++++++++++++++----
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 83 +++++-----
lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 30 ++--
lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 12 +-
lib/librte_vhost/vhost_rxtx.c | 133 ++++++++-------
lib/librte_vhost/vhost_user/vhost-net-user.c | 53 +++---
lib/librte_vhost/vhost_user/vhost-net-user.h | 2 +
lib/librte_vhost/vhost_user/virtio-net-user.c | 64 +++----
lib/librte_vhost/vhost_user/virtio-net-user.h | 18 +-
lib/librte_vhost/virtio-net.c | 229 +++++++++++++++++---------
lib/librte_vhost/virtio-net.h | 43 -----
17 files changed, 702 insertions(+), 612 deletions(-)
delete mode 100644 lib/librte_vhost/virtio-net.h
--
1.9.0
^ permalink raw reply [relevance 8%]
* Re: [dpdk-dev] [PATCH] pci: Add the class_id support in pci probe
2016-05-11 15:21 3% ` Stephen Hemminger
@ 2016-05-11 15:34 3% ` Richardson, Bruce
0 siblings, 0 replies; 200+ results
From: Richardson, Bruce @ 2016-05-11 15:34 UTC (permalink / raw)
To: Stephen Hemminger, Yang, Ziye; +Cc: dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
> Sent: Wednesday, May 11, 2016 4:21 PM
> To: Yang, Ziye <ziye.yang@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] pci: Add the class_id support in pci probe
>
> On Wed, 11 May 2016 14:08:15 +0800
> Ziye Yang <ziye.yang@intel.com> wrote:
>
> > This patch is used to add the class_id (class_code, subclass_code,
> > programming_interface) support for pci_device probe. With this patch,
> > it will be flexible for users to probe a class of devices by class_id.
> >
> > Signed-off-by: Ziye Yang <ziye.yang@intel.com>
>
> I like this, and it is necessary but since rte_pci_id is a visible data
> structure it causes ABI breakage.
A notice was published for this change in 16.04, so we should be ok ABI-wise.
/Bruce
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH] pci: Add the class_id support in pci probe
@ 2016-05-11 15:21 3% ` Stephen Hemminger
2016-05-11 15:34 3% ` Richardson, Bruce
2016-05-19 12:25 7% ` [dpdk-dev] [PATCH v2] ci: " Ziye Yang
1 sibling, 1 reply; 200+ results
From: Stephen Hemminger @ 2016-05-11 15:21 UTC (permalink / raw)
To: Ziye Yang; +Cc: dev
On Wed, 11 May 2016 14:08:15 +0800
Ziye Yang <ziye.yang@intel.com> wrote:
> This patch is used to add the class_id (class_code,
> subclass_code, programming_interface) support for
> pci_device probe. With this patch, it will be
> flexible for users to probe a class of devices
> by class_id.
>
> Signed-off-by: Ziye Yang <ziye.yang@intel.com>
I like this, and it is necessary but since rte_pci_id is a visible
data structure it causes ABI breakage.
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH 0/3] [RFC] vhost: micro vhost optimization
2016-05-10 21:49 0% ` Rich Lane
@ 2016-05-10 22:08 0% ` Yuanhan Liu
0 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-10 22:08 UTC (permalink / raw)
To: Rich Lane; +Cc: dev, huawei.xie
On Tue, May 10, 2016 at 02:49:43PM -0700, Rich Lane wrote:
> I see a significant performance improvement with these patches, around 5% at 64
> bytes.
Thanks for testing.
>
> The one patch that didn't give any performance boost for me was "vhost: arrange
> virtio_net fields for better cache sharing".
Yeah, same here from my test. I mean, in theory, it should give us a
tiny boost, it doesn't in real life though. And since it (should) do
no harm, I would still include this patch in this set. Maybe I should
have noted at first that no real perf gain from the 3rd patch.
--yliu
>
> Tested-by: Rich Lane <rich.lane@bigswitch.com>
>
> On Mon, May 2, 2016 at 5:46 PM, Yuanhan Liu <yuanhan.liu@linux.intel.com>
> wrote:
>
> Here is a small patch set does the micro optimization, which brings about
> 10% performance boost in my 64B packet testing, with the following topo:
>
> pkt generator <----> NIC <-----> Virtio NIC
>
> Patch 1 pre updates the used ring and update them in batch. It should be
> feasible from my understanding: there will be no issue, guest driver will
> not start processing them as far as we haven't updated the "used->idx"
> yet. I could miss something though.
>
> Patch 2 saves one check for small packets (that can be hold in one desc
> buf and mbuf).
>
> Patch 3 moves several frequently used fields into one cache line, for
> better cache sharing.
>
> Note that this patch set is based on my latest vhost ABI refactoring
> patchset.
>
>
> ---
> Yuanhan Liu (3):
> vhost: pre update used ring for Tx and Rx
> vhost: optimize dequeue for small packets
> vhost: arrange virtio_net fields for better cache sharing
>
> lib/librte_vhost/vhost-net.h | 8 +--
> lib/librte_vhost/vhost_rxtx.c | 110
> ++++++++++++++++++++++++------------------
> 2 files changed, 68 insertions(+), 50 deletions(-)
>
> --
> 1.9.0
>
>
>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 0/3] [RFC] vhost: micro vhost optimization
2016-05-03 0:46 3% [dpdk-dev] [PATCH 0/3] [RFC] vhost: micro vhost optimization Yuanhan Liu
@ 2016-05-10 21:49 0% ` Rich Lane
2016-05-10 22:08 0% ` Yuanhan Liu
0 siblings, 1 reply; 200+ results
From: Rich Lane @ 2016-05-10 21:49 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: dev, huawei.xie
I see a significant performance improvement with these patches, around 5%
at 64 bytes.
The one patch that didn't give any performance boost for me was "vhost:
arrange virtio_net fields for better cache sharing".
Tested-by: Rich Lane <rich.lane@bigswitch.com>
On Mon, May 2, 2016 at 5:46 PM, Yuanhan Liu <yuanhan.liu@linux.intel.com>
wrote:
> Here is a small patch set does the micro optimization, which brings about
> 10% performance boost in my 64B packet testing, with the following topo:
>
> pkt generator <----> NIC <-----> Virtio NIC
>
> Patch 1 pre updates the used ring and update them in batch. It should be
> feasible from my understanding: there will be no issue, guest driver will
> not start processing them as far as we haven't updated the "used->idx"
> yet. I could miss something though.
>
> Patch 2 saves one check for small packets (that can be hold in one desc
> buf and mbuf).
>
> Patch 3 moves several frequently used fields into one cache line, for
> better cache sharing.
>
> Note that this patch set is based on my latest vhost ABI refactoring
> patchset.
>
>
> ---
> Yuanhan Liu (3):
> vhost: pre update used ring for Tx and Rx
> vhost: optimize dequeue for small packets
> vhost: arrange virtio_net fields for better cache sharing
>
> lib/librte_vhost/vhost-net.h | 8 +--
> lib/librte_vhost/vhost_rxtx.c | 110
> ++++++++++++++++++++++++------------------
> 2 files changed, 68 insertions(+), 50 deletions(-)
>
> --
> 1.9.0
>
>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
2016-05-10 3:23 3% ` Xu, Qian Q
@ 2016-05-10 17:41 0% ` Yuanhan Liu
0 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-10 17:41 UTC (permalink / raw)
To: Xu, Qian Q; +Cc: dev, Xie, Huawei, marcandre.lureau
On Tue, May 10, 2016 at 03:23:15AM +0000, Xu, Qian Q wrote:
> Do we need patch qemu for the reconnect case?
Yes, we need some support from QEMU: currently QEMU will not be able
to detect disconnection and hence will not establish the connection
when DPDK vhost restarts.
Following patchset from Marc resolves above issue.
http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg01507.html
And note that unlike the vhost-user multiple queue support that depends
on some new vhost-uesr message from QEMU, this patchset does not depond
on QEMU.
--yliu
>
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yuanhan Liu
> Sent: Saturday, May 07, 2016 2:40 PM
> To: dev@dpdk.org
> Cc: Xie, Huawei; Yuanhan Liu
> Subject: [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
>
> Both the vhost-user backend (DPDK here) and frontend (QEMU) could be server, as well as client. DPDK just acts as server so far. This patch set would make it possible to act as both.
>
> A new arg (flags) is introduced for API rte_vhost_driver_register(). And the client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this implies an API breakage. However, since this release deals with ABI/API refactoring, it should not be an issue.
>
> With the DPDK as client, it's easier to implement the "reconnect" ability, which means we could still make vhost-user work after DPDK restarts.
>
>
> ---
> Yuanhan Liu (6):
> vhost: rename structs for enabling client mode
> vhost: add vhost-user client mode
> vhost: add reconnect ability
> vhost: workaround stale vring base
> examples/vhost: add client and reconnect option
> vhost: add pmd client and reconnect option
>
> drivers/net/vhost/rte_eth_vhost.c | 54 +++-
> examples/vhost/main.c | 23 +-
> lib/librte_vhost/rte_virtio_net.h | 12 +-
> lib/librte_vhost/vhost_user/vhost-net-user.c | 355 ++++++++++++++++++---------
> lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
> lib/librte_vhost/virtio-net.c | 8 +
> 6 files changed, 313 insertions(+), 145 deletions(-)
>
> --
> 1.9.0
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 10/16] vhost: export vid as the only interface to applications
2016-05-10 16:17 0% ` Rich Lane
@ 2016-05-10 16:39 0% ` Yuanhan Liu
0 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-10 16:39 UTC (permalink / raw)
To: Rich Lane
Cc: dev, huawei.xie, Thomas Monjalon, Panu Matilainen,
Tetsuya Mukawa, Traynor Kevin
On Tue, May 10, 2016 at 09:17:23AM -0700, Rich Lane wrote:
> On Mon, May 2, 2016 at 3:25 PM, Yuanhan Liu <yuanhan.liu@linux.intel.com>
> wrote:
>
> With all the previous prepare works, we are just one step away from
> the final ABI refactoring. That is, to change current API to let them
> stick to vid instead of the old virtio_net dev.
>
>
> This patch removes the only assignment to internal->vid in the PMD. It's
> initialized to zero, so only the first vhost connection will work.
I assume you meant to following diff:
- if (dev == NULL) {
- RTE_LOG(INFO, PMD, "Invalid argument\n");
- return -1;
- }
-
- list = find_internal_resource(dev->vid);
+ list = find_internal_resource(vid);
if (list == NULL) {
- RTE_LOG(INFO, PMD, "Invalid vid %d\n", dev->vid);
+ RTE_LOG(INFO, PMD, "Invalid vid %d\n", vid);
return -1;
}
eth_dev = list->eth_dev;
internal = eth_dev->data->dev_private;
- internal->vid = dev->vid;
Then yes, I have no idea why I did that; it's a careless and
hard-to-catch issue. So, thanks a lot for catching it!
Rich, would you help try by adding following line there and
do a test? It would be great if this patch has your Tested-by :)
internal->vid = vid;
Thanks.
--yliu
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 10/16] vhost: export vid as the only interface to applications
2016-05-02 22:25 2% ` [dpdk-dev] [PATCH 10/16] vhost: export vid as the only interface to applications Yuanhan Liu
@ 2016-05-10 16:17 0% ` Rich Lane
2016-05-10 16:39 0% ` Yuanhan Liu
0 siblings, 1 reply; 200+ results
From: Rich Lane @ 2016-05-10 16:17 UTC (permalink / raw)
To: Yuanhan Liu
Cc: dev, huawei.xie, Thomas Monjalon, Panu Matilainen,
Tetsuya Mukawa, Traynor Kevin
On Mon, May 2, 2016 at 3:25 PM, Yuanhan Liu <yuanhan.liu@linux.intel.com>
wrote:
> With all the previous prepare works, we are just one step away from
> the final ABI refactoring. That is, to change current API to let them
> stick to vid instead of the old virtio_net dev.
>
This patch removes the only assignment to internal->vid in the PMD. It's
initialized to zero, so only the first vhost connection will work.
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCHv2 5/5] doc: update doc for packet capture framework
@ 2016-05-10 9:40 6% ` Reshma Pattan
1 sibling, 0 replies; 200+ results
From: Reshma Pattan @ 2016-05-10 9:40 UTC (permalink / raw)
To: dev; +Cc: Reshma Pattan
added programmers guide for librte_pdump.
added sample application guide for app/pdump application.
updated release note for packet capture framework changes.
Signed-off-by: Reshma Pattan <reshma.pattan@intel.com>
---
MAINTAINERS | 3 +
doc/guides/prog_guide/index.rst | 1 +
doc/guides/prog_guide/pdump_library.rst | 121 ++++++++++++++++++++++++++++++++
doc/guides/rel_notes/release_16_07.rst | 7 ++
doc/guides/sample_app_ug/index.rst | 1 +
doc/guides/sample_app_ug/pdump.rst | 109 ++++++++++++++++++++++++++++
6 files changed, 242 insertions(+)
create mode 100644 doc/guides/prog_guide/pdump_library.rst
create mode 100644 doc/guides/sample_app_ug/pdump.rst
diff --git a/MAINTAINERS b/MAINTAINERS
index b6a39c7..6ddc818 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -432,6 +432,9 @@ Pdump
M: Reshma Pattan <reshma.pattan@intel.com>
F: lib/librte_pdump/
F: app/pdump/
+F: doc/guides/prog_guide/pdump_library.rst
+F: doc/guides/sample_app_ug/pdump.rst
+
Hierarchical scheduler
M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst
index b862d0c..4caf969 100644
--- a/doc/guides/prog_guide/index.rst
+++ b/doc/guides/prog_guide/index.rst
@@ -71,6 +71,7 @@ Programmer's Guide
writing_efficient_code
profile_app
glossary
+ pdump_library
**Figures**
diff --git a/doc/guides/prog_guide/pdump_library.rst b/doc/guides/prog_guide/pdump_library.rst
new file mode 100644
index 0000000..6af77b9
--- /dev/null
+++ b/doc/guides/prog_guide/pdump_library.rst
@@ -0,0 +1,121 @@
+.. BSD LICENSE
+ Copyright(c) 2016 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.. _Pdump_Library:
+
+pdump Library
+=============
+
+Pdump library provides framework for packet capturing on DPDK.
+
+Operation
+---------
+
+Pdump library provides APIs to support packet capturing on dpdk ethernet devices.
+Library provides APIs to initialize the packet capture framework, enable/disable
+the packet capture and un initialize the packet capture framework.
+
+Pdump library works on server and client based model.
+
+Sever is responsible for enabling/disabling the packet captures.
+Clients are responsible for requesting enable/disable of the
+packet captures.
+
+As part of packet capture framework initialization, pthread and
+the server socket is created. Only one server socket is allowed on the system.
+As part of enabling/disabling the packet capture, client sockets are created
+and multiple client sockets are allowed.
+Who ever calls initialization first they will succeed with the initialization,
+next subsequent calls of initialization are not allowed. So next users can only
+request enabling/disabling the packet capture.
+
+Library provides below APIs
+
+``rte_pdump_init()``
+This API initializes the packet capture framework.
+
+``rte_pdump_enable()``
+This API enables the packet capturing on a given port and queue.
+Note: filter option in the API is place holder for future use.
+
+``rte_pdump_enable_by_deviceid()``
+This API enables the packet capturing on a given device id
+(device name or pci address) and queue.
+Note: filter option in the API is place holder for future use.
+
+``rte_pdump_disable()``
+This API disables the packet capturing on a given port and queue.
+
+``rte_pdump_disable_by_deviceid()``
+This API disables the packet capturing on a given device_id and queue.
+
+``rte_pdump_uninit()``
+This API un initializes the packet capture framework.
+
+
+Implementation Details
+----------------------
+
+On a call to library API ``rte_pdump_init()``, library creates pthread and server socket.
+Server socket in pthread context will be listening to the client requests to enable/disable
+the packet capture.
+
+Who ever calls this API first will have server socket created,
+subsequent calls to this APIs will not create any further server sockets. i.e only one server
+socket is allowed.
+
+On each call to library APIs ``rte_pdump_enable()/rte_pdump_enable_by_deviceid()``
+to enable the packet capture, library creates separate client sockets,
+builds up enable request and sends the request to the server.
+Server listening on the socket will serve the request, enable the packet capture
+by registering ethernet rx/tx callbacks for the given port/device_id and queue combinations.
+Server mirrors the packets to new mempool and enqueue them to the ring that clients has passed
+in these APIs.
+Server sends the response back to the client about the status of the request that was processed.
+After the response is received from the server, client sockets will be closed.
+
+On each call to library APIs ``rte_pdump_disable()/rte_pdump_disable_by_deviceid()``
+to disable packet capture, library creates separate client sockets,
+builds up disable request and sends the request to the server.
+Server listening on the socket will serve the request, disable the packet capture
+by removing the ethernet rx/tx callbacks for the given port/device_id and queue combinations.
+Server sends the response back to the client about the status of the request that was processed.
+After the response is received from the server, client sockets will be closed.
+
+On a call to library API ``rte_pdump_uninit()``, library closes the pthread and the server socket.
+
+
+Use Case: Packet Capturing
+--------------------------
+
+app/pdump tool is developed based on this library to capture the packets
+in DPDK.
+Users can develop their own packet capturing application using new library
+if they wish to do so.
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 83c841b..4d6ab10 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -34,6 +34,10 @@ This section should contain new features added in this release. Sample format:
Refer to the previous release notes for examples.
+* **Added packet capturing support.**
+
+ Now users have facility to capture packets on dpdk ports using librte_pdump
+ and app/pdump tool.
Resolved Issues
---------------
@@ -90,6 +94,7 @@ This section should contain API changes. Sample format:
ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+* Now function ``rte_eth_dev_get_port_by_name`` changed to public API.
ABI Changes
-----------
@@ -101,6 +106,8 @@ ABI Changes
* The ``rte_port_source_params`` structure has new fields to support PCAP file.
It was already in release 16.04 with ``RTE_NEXT_ABI`` flag.
+* The ``rte_eth_dev_info`` structure has new fields ``nb_rx_queues`` and ``nb_tx_queues``
+ to support number of queues configured by software.
Shared Library Versions
-----------------------
diff --git a/doc/guides/sample_app_ug/index.rst b/doc/guides/sample_app_ug/index.rst
index 930f68c..96bb317 100644
--- a/doc/guides/sample_app_ug/index.rst
+++ b/doc/guides/sample_app_ug/index.rst
@@ -76,6 +76,7 @@ Sample Applications User Guide
ptpclient
performance_thread
ipsec_secgw
+ pdump
**Figures**
diff --git a/doc/guides/sample_app_ug/pdump.rst b/doc/guides/sample_app_ug/pdump.rst
new file mode 100644
index 0000000..c185550
--- /dev/null
+++ b/doc/guides/sample_app_ug/pdump.rst
@@ -0,0 +1,109 @@
+
+.. BSD LICENSE
+ Copyright(c) 2016 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+dpdk_pdump Application
+======================
+The dpdk_pdump application is a Data Plane Development Kit (DPDK) application
+that runs as a DPDK secondary process and is capable of enabling packet capturing
+on dpdk ports and capturing the packets.
+
+Running the Application
+-----------------------
+The application has a pdump command line option with various sub arguments inside:
+Parameters inside the parenthesis represents the mandatory parameters.
+Parameters inside the square brackets represents optional parameters.
+User has to pass on packet capture parameters under --pdump parameters, multiples of
+--pdump can be passed to capture packets on different port and queue combinations.
+
+.. code-block:: console
+
+ ./$(RTE_TARGET)/app/pdump -- --pdump '(port=<port_id> |
+ device_id=<pci address or device name>),
+ (queue=2), (rx-dev=<iface/path to pcap file> |
+ tx-dev=<iface/path to pcap file> |
+ rxtx-dev=<iface/path to pcap file>),
+ [ring-size=1024], [mbuf-size=2048], [total-num-mbufs=8191]'
+
+Parameters
+~~~~~~~~~~
+``--pdump``: Specifies arguments needed for packet capturing.
+
+``port``
+Port id of the eth device on which packets should be captured.
+
+``device_id``
+PCI address (or) name of the eth device on which packets should be captured.
+
+``queue``
+Queue id of the eth device on which packets should be captured.
+User can pass on queue value as ‘*’ if packets capturing has to be enabled
+on all queues of the eth device.
+
+``rx-dev``
+Can be either pcap file name or any linux iface onto which ingress side packets of
+dpdk eth device will be sent on for users to view.
+
+``tx-dev``
+Can be either pcap file name or any linux iface onto which egress side packets of
+dpdk eth device will be sent on for users to view.
+
+``rxtx-dev``
+Can be either pcap file name or any linux iface onto which both ingress &
+egress side packets of dpdk eth device will be sent on for users to view.
+
+Note:
+To receive ingress packets only, rx-dev should be passed.
+To receive egress packets only, tx-dev should be passed.
+To receive ingress and egress packets separately should pass on both rx-dev and tx-dev.
+To receive both ingress and egress packets on same device, should pass only rxtx-dev.
+
+Pdump tool uses these devices internally to create PCAPPMD vdev having ``tx_stream``
+as either of these devices.
+
+``ring-size``
+Size of the ring. This value is used internally for ring creation.
+The ring will be used to enqueue the packets from primary application to secondary.
+
+``mbuf-size``
+Size of the mbuf data room size. This is used internally for mempool creation.
+Ideally this value must be same as primary application's mempool which is used for
+packet rx.
+
+``total-num-mbufs``
+Total number mbufs in mempool. This is used internally for mempool creation.
+
+Example
+-------
+
+.. code-block:: console
+
+ $ sudo ./x86_64-native-linuxapp-gcc/app/dpdk_pdump -- --pdump 'device_id=0000:02:00.0,queue=*,rx-dev=/tmp/rx-file.pcap,tx-dev=/tmp/tx-file.pcap,ring-size=8192,mbuf-size=2176,total-num-mbufs=16384' --pdump 'device_id=0000:01:00.0,queue=*,rx-dev=/tmp/rx2-file.pcap,tx-dev=/tmp/tx2-file.pcap,ring-size=16384,mbuf-size=2176,total-num-mbufs=32768'
--
2.5.0
^ permalink raw reply [relevance 6%]
* Re: [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
2016-05-07 6:40 3% [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
@ 2016-05-10 3:23 3% ` Xu, Qian Q
2016-05-10 17:41 0% ` Yuanhan Liu
2016-05-13 6:16 3% ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
1 sibling, 1 reply; 200+ results
From: Xu, Qian Q @ 2016-05-10 3:23 UTC (permalink / raw)
To: Yuanhan Liu, dev; +Cc: Xie, Huawei
Do we need patch qemu for the reconnect case?
Thanks
Qian
-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yuanhan Liu
Sent: Saturday, May 07, 2016 2:40 PM
To: dev@dpdk.org
Cc: Xie, Huawei; Yuanhan Liu
Subject: [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
Both the vhost-user backend (DPDK here) and frontend (QEMU) could be server, as well as client. DPDK just acts as server so far. This patch set would make it possible to act as both.
A new arg (flags) is introduced for API rte_vhost_driver_register(). And the client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this implies an API breakage. However, since this release deals with ABI/API refactoring, it should not be an issue.
With the DPDK as client, it's easier to implement the "reconnect" ability, which means we could still make vhost-user work after DPDK restarts.
---
Yuanhan Liu (6):
vhost: rename structs for enabling client mode
vhost: add vhost-user client mode
vhost: add reconnect ability
vhost: workaround stale vring base
examples/vhost: add client and reconnect option
vhost: add pmd client and reconnect option
drivers/net/vhost/rte_eth_vhost.c | 54 +++-
examples/vhost/main.c | 23 +-
lib/librte_vhost/rte_virtio_net.h | 12 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 355 ++++++++++++++++++---------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
lib/librte_vhost/virtio-net.c | 8 +
6 files changed, 313 insertions(+), 145 deletions(-)
--
1.9.0
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH v2 0/8] vhost/example cleanup/fix
[not found] ` <1462224230-19460-1-git-send-email-yuanhan.liu@linux.intel.com>
2016-05-02 21:23 2% ` [dpdk-dev] [PATCH v2 7/8] examples/vhost: switch_worker cleanup Yuanhan Liu
@ 2016-05-09 18:06 0% ` Yuanhan Liu
1 sibling, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-09 18:06 UTC (permalink / raw)
To: dev; +Cc: huawei.xie
Series applied to dpdk-next-virtio.
--yliu
On Mon, May 02, 2016 at 02:23:42PM -0700, Yuanhan Liu wrote:
> I'm starting to work on the vhost ABI refactoring, that I also have to
> touch the vhost example code. The vhost example code, however, is very
> messy, full of __very__ long lines. This would make a later diff to
> apply the new vhost API be very ugly, therefore, not friendly for review.
> This is how this cleanup comes.
>
> Besides that, there is one enhancement patch, which handles the broadcast
> packets so that we could rely the ARP request packet, to let vhost-switch
> be more like a real switch. There is another patch that (hopefully) would
> fix the mbuf allocation failure ultimately. I also added some guidelines
> there as comments to show how to count how many mbuf entries is enough for
> our usage.
>
> In another word, an example is meant to be clean/simple and with good
> coding style so that people can get the usage easily. So, one way or
> another, this patch is good to have, even without this ABI refactoring
> stuff.
>
> Note that I'm going to apply it before the end of this week, if no objections.
>
>
> v2: - some checkpatch fixes
>
> - cleaned the code about device statistics
>
> ---
> Yuanhan Liu (8):
> examples/vhost: remove the non-working zero copy code
> examples/vhost: remove unused macro and struct
> examples/vhost: use tailq to link vhost devices
> examples/vhost: use mac compare helper function directly
> examples/vhost: handle broadcast packet
> examples/vhost: fix mbuf allocation failure
> examples/vhost: switch_worker cleanup
> examples/vhost: embed statistics into vhost_dev struct
>
> doc/guides/sample_app_ug/vhost.rst | 36 +-
> examples/vhost/main.c | 2394 ++++++------------------------------
> examples/vhost/main.h | 56 +-
> 3 files changed, 391 insertions(+), 2095 deletions(-)
>
> --
> 1.9.3
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH v1] hash: add tsx support for cuckoo hash
2016-05-07 4:56 3% ` Stephen Hemminger
@ 2016-05-09 16:51 4% ` Shen, Wei1
0 siblings, 0 replies; 200+ results
From: Shen, Wei1 @ 2016-05-09 16:51 UTC (permalink / raw)
To: Stephen Hemminger
Cc: dev, De Lara Guarch, Pablo, Maciocco, Christian, Gobriel, Sameh
Hi Stephen,
Greetings. Thanks for your great feedback. Let’s me address your concern here.
1) It changes ABI, so it breaks old programs
The patch uses the extra_flag field in the rte_hash_parameters struct to set the default insertion behavior. Today there is only one bit used by this flag (RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT 0x1) and we used the next unused bit (RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x2) in this patch. So ABI are maintained.
2) What about older processors, need to detect and handle them at runtime.
Correct. This patch is based on the previous Transactional Memory patch. Since these previous patches already assume the user to check the presence of TSX so we build on top this assumption. But I personally agree with you that handling TSX check should be made easier.
http://dpdk.org/ml/archives/dev/2015-June/018571.html
http://dpdk.org/ml/archives/dev/2015-June/018566.html
3) Why can't this just be the default behavior with correct fallback to locking on older processors.
This is an excellent point. We discussed this before. Our thought at that time is, since TSX insertion is a bit slower than without anything (TSX or other locks), it would benefit apps that is designed to have a single writer to the hash table (for instance in some master-slave model). We might need more feedback from user about whether making it default is more desirable if most the app is designed with multi-writer manner.
Thanks,
--
Best,
Wei Shen.
On 5/6/16, 9:56 PM, "Stephen Hemminger" <stephen@networkplumber.org> wrote:
>On Fri, 6 May 2016 21:05:02 +0100
>Shen Wei <wei1.shen@intel.com> wrote:
>
>> --- a/lib/librte_hash/rte_cuckoo_hash.c
>> +++ b/lib/librte_hash/rte_cuckoo_hash.c
>> @@ -1,7 +1,7 @@
>> /*-
>> * BSD LICENSE
>> *
>> - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
>> + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
>> * All rights reserved.
>> *
>> * Redistribution and use in source and binary forms, with or without
>> @@ -100,7 +100,9 @@ EAL_REGISTER_TAILQ(rte_hash_tailq)
>>
>> #define KEY_ALIGNMENT 16
>>
>> -#define LCORE_CACHE_SIZE 8
>> +#define LCORE_CACHE_SIZE 64
>> +
>> +#define RTE_HASH_BFS_QUEUEs_MAX_LEN 5000
>>
>> #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
>> /*
>> @@ -190,6 +192,7 @@ struct rte_hash {
>> memory support */
>> struct lcore_cache *local_free_slots;
>> /**< Local cache per lcore, storing some indexes of the free slots */
>> + uint8_t multiwrite_add; /**< Multi-write safe hash add behavior */
>> } __rte_cache_aligned;
>>
>
>I like the idea of using TSX to allow multi-writer safety, but there are
>several problems with this patch.
>
>1) It changes ABI, so it breaks old programs
>2) What about older processors, need to detect and handle them at runtime.
>3) Why can't this just be the default behavior with correct
> fallback to locking on older processors.
>
>Actually lock ellision in DPDK is an interesting topic in general that
>needs to be addressed.
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
@ 2016-05-07 6:40 3% Yuanhan Liu
2016-05-10 3:23 3% ` Xu, Qian Q
2016-05-13 6:16 3% ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
0 siblings, 2 replies; 200+ results
From: Yuanhan Liu @ 2016-05-07 6:40 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
Both the vhost-user backend (DPDK here) and frontend (QEMU) could be
server, as well as client. DPDK just acts as server so far. This patch
set would make it possible to act as both.
A new arg (flags) is introduced for API rte_vhost_driver_register(). And the
client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this
implies an API breakage. However, since this release deals with ABI/API
refactoring, it should not be an issue.
With the DPDK as client, it's easier to implement the "reconnect" ability,
which means we could still make vhost-user work after DPDK restarts.
---
Yuanhan Liu (6):
vhost: rename structs for enabling client mode
vhost: add vhost-user client mode
vhost: add reconnect ability
vhost: workaround stale vring base
examples/vhost: add client and reconnect option
vhost: add pmd client and reconnect option
drivers/net/vhost/rte_eth_vhost.c | 54 +++-
examples/vhost/main.c | 23 +-
lib/librte_vhost/rte_virtio_net.h | 12 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 355 ++++++++++++++++++---------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
lib/librte_vhost/virtio-net.c | 8 +
6 files changed, 313 insertions(+), 145 deletions(-)
--
1.9.0
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH v1] hash: add tsx support for cuckoo hash
@ 2016-05-07 4:56 3% ` Stephen Hemminger
2016-05-09 16:51 4% ` Shen, Wei1
0 siblings, 1 reply; 200+ results
From: Stephen Hemminger @ 2016-05-07 4:56 UTC (permalink / raw)
To: Shen Wei; +Cc: dev, pablo.de.lara.guarch, christian.maciocco, Sameh Gobriel
On Fri, 6 May 2016 21:05:02 +0100
Shen Wei <wei1.shen@intel.com> wrote:
> --- a/lib/librte_hash/rte_cuckoo_hash.c
> +++ b/lib/librte_hash/rte_cuckoo_hash.c
> @@ -1,7 +1,7 @@
> /*-
> * BSD LICENSE
> *
> - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
> * All rights reserved.
> *
> * Redistribution and use in source and binary forms, with or without
> @@ -100,7 +100,9 @@ EAL_REGISTER_TAILQ(rte_hash_tailq)
>
> #define KEY_ALIGNMENT 16
>
> -#define LCORE_CACHE_SIZE 8
> +#define LCORE_CACHE_SIZE 64
> +
> +#define RTE_HASH_BFS_QUEUE_MAX_LEN 5000
>
> #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> /*
> @@ -190,6 +192,7 @@ struct rte_hash {
> memory support */
> struct lcore_cache *local_free_slots;
> /**< Local cache per lcore, storing some indexes of the free slots */
> + uint8_t multiwrite_add; /**< Multi-write safe hash add behavior */
> } __rte_cache_aligned;
>
I like the idea of using TSX to allow multi-writer safety, but there are
several problems with this patch.
1) It changes ABI, so it breaks old programs
2) What about older processors, need to detect and handle them at runtime.
3) Why can't this just be the default behavior with correct
fallback to locking on older processors.
Actually lock ellision in DPDK is an interesting topic in general that
needs to be addressed.
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH 5/5] doc: update doc for packet capture framework
@ 2016-05-06 10:55 6% ` Reshma Pattan
1 sibling, 0 replies; 200+ results
From: Reshma Pattan @ 2016-05-06 10:55 UTC (permalink / raw)
To: dev; +Cc: Reshma Pattan
added programmers guide for librte_pdump.
added sample application guide for app/pdump application.
updated release note for packet capture framework changes.
Signed-off-by: Reshma Pattan <reshma.pattan@intel.com>
---
MAINTAINERS | 3 +
doc/guides/prog_guide/index.rst | 1 +
doc/guides/prog_guide/pdump_library.rst | 121 ++++++++++++++++++++++++++++++++
doc/guides/rel_notes/release_16_07.rst | 7 ++
doc/guides/sample_app_ug/index.rst | 1 +
doc/guides/sample_app_ug/pdump.rst | 109 ++++++++++++++++++++++++++++
6 files changed, 242 insertions(+)
create mode 100644 doc/guides/prog_guide/pdump_library.rst
create mode 100644 doc/guides/sample_app_ug/pdump.rst
diff --git a/MAINTAINERS b/MAINTAINERS
index b6a39c7..6ddc818 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -432,6 +432,9 @@ Pdump
M: Reshma Pattan <reshma.pattan@intel.com>
F: lib/librte_pdump/
F: app/pdump/
+F: doc/guides/prog_guide/pdump_library.rst
+F: doc/guides/sample_app_ug/pdump.rst
+
Hierarchical scheduler
M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst
index b862d0c..4caf969 100644
--- a/doc/guides/prog_guide/index.rst
+++ b/doc/guides/prog_guide/index.rst
@@ -71,6 +71,7 @@ Programmer's Guide
writing_efficient_code
profile_app
glossary
+ pdump_library
**Figures**
diff --git a/doc/guides/prog_guide/pdump_library.rst b/doc/guides/prog_guide/pdump_library.rst
new file mode 100644
index 0000000..6af77b9
--- /dev/null
+++ b/doc/guides/prog_guide/pdump_library.rst
@@ -0,0 +1,121 @@
+.. BSD LICENSE
+ Copyright(c) 2016 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.. _Pdump_Library:
+
+pdump Library
+=============
+
+Pdump library provides framework for packet capturing on DPDK.
+
+Operation
+---------
+
+Pdump library provides APIs to support packet capturing on dpdk ethernet devices.
+Library provides APIs to initialize the packet capture framework, enable/disable
+the packet capture and un initialize the packet capture framework.
+
+Pdump library works on server and client based model.
+
+Sever is responsible for enabling/disabling the packet captures.
+Clients are responsible for requesting enable/disable of the
+packet captures.
+
+As part of packet capture framework initialization, pthread and
+the server socket is created. Only one server socket is allowed on the system.
+As part of enabling/disabling the packet capture, client sockets are created
+and multiple client sockets are allowed.
+Who ever calls initialization first they will succeed with the initialization,
+next subsequent calls of initialization are not allowed. So next users can only
+request enabling/disabling the packet capture.
+
+Library provides below APIs
+
+``rte_pdump_init()``
+This API initializes the packet capture framework.
+
+``rte_pdump_enable()``
+This API enables the packet capturing on a given port and queue.
+Note: filter option in the API is place holder for future use.
+
+``rte_pdump_enable_by_deviceid()``
+This API enables the packet capturing on a given device id
+(device name or pci address) and queue.
+Note: filter option in the API is place holder for future use.
+
+``rte_pdump_disable()``
+This API disables the packet capturing on a given port and queue.
+
+``rte_pdump_disable_by_deviceid()``
+This API disables the packet capturing on a given device_id and queue.
+
+``rte_pdump_uninit()``
+This API un initializes the packet capture framework.
+
+
+Implementation Details
+----------------------
+
+On a call to library API ``rte_pdump_init()``, library creates pthread and server socket.
+Server socket in pthread context will be listening to the client requests to enable/disable
+the packet capture.
+
+Who ever calls this API first will have server socket created,
+subsequent calls to this APIs will not create any further server sockets. i.e only one server
+socket is allowed.
+
+On each call to library APIs ``rte_pdump_enable()/rte_pdump_enable_by_deviceid()``
+to enable the packet capture, library creates separate client sockets,
+builds up enable request and sends the request to the server.
+Server listening on the socket will serve the request, enable the packet capture
+by registering ethernet rx/tx callbacks for the given port/device_id and queue combinations.
+Server mirrors the packets to new mempool and enqueue them to the ring that clients has passed
+in these APIs.
+Server sends the response back to the client about the status of the request that was processed.
+After the response is received from the server, client sockets will be closed.
+
+On each call to library APIs ``rte_pdump_disable()/rte_pdump_disable_by_deviceid()``
+to disable packet capture, library creates separate client sockets,
+builds up disable request and sends the request to the server.
+Server listening on the socket will serve the request, disable the packet capture
+by removing the ethernet rx/tx callbacks for the given port/device_id and queue combinations.
+Server sends the response back to the client about the status of the request that was processed.
+After the response is received from the server, client sockets will be closed.
+
+On a call to library API ``rte_pdump_uninit()``, library closes the pthread and the server socket.
+
+
+Use Case: Packet Capturing
+--------------------------
+
+app/pdump tool is developed based on this library to capture the packets
+in DPDK.
+Users can develop their own packet capturing application using new library
+if they wish to do so.
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 83c841b..4d6ab10 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -34,6 +34,10 @@ This section should contain new features added in this release. Sample format:
Refer to the previous release notes for examples.
+* **Added packet capturing support.**
+
+ Now users have facility to capture packets on dpdk ports using librte_pdump
+ and app/pdump tool.
Resolved Issues
---------------
@@ -90,6 +94,7 @@ This section should contain API changes. Sample format:
ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+* Now function ``rte_eth_dev_get_port_by_name`` changed to public API.
ABI Changes
-----------
@@ -101,6 +106,8 @@ ABI Changes
* The ``rte_port_source_params`` structure has new fields to support PCAP file.
It was already in release 16.04 with ``RTE_NEXT_ABI`` flag.
+* The ``rte_eth_dev_info`` structure has new fields ``nb_rx_queues`` and ``nb_tx_queues``
+ to support number of queues configured by software.
Shared Library Versions
-----------------------
diff --git a/doc/guides/sample_app_ug/index.rst b/doc/guides/sample_app_ug/index.rst
index 930f68c..96bb317 100644
--- a/doc/guides/sample_app_ug/index.rst
+++ b/doc/guides/sample_app_ug/index.rst
@@ -76,6 +76,7 @@ Sample Applications User Guide
ptpclient
performance_thread
ipsec_secgw
+ pdump
**Figures**
diff --git a/doc/guides/sample_app_ug/pdump.rst b/doc/guides/sample_app_ug/pdump.rst
new file mode 100644
index 0000000..c185550
--- /dev/null
+++ b/doc/guides/sample_app_ug/pdump.rst
@@ -0,0 +1,109 @@
+
+.. BSD LICENSE
+ Copyright(c) 2016 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+dpdk_pdump Application
+======================
+The dpdk_pdump application is a Data Plane Development Kit (DPDK) application
+that runs as a DPDK secondary process and is capable of enabling packet capturing
+on dpdk ports and capturing the packets.
+
+Running the Application
+-----------------------
+The application has a pdump command line option with various sub arguments inside:
+Parameters inside the parenthesis represents the mandatory parameters.
+Parameters inside the square brackets represents optional parameters.
+User has to pass on packet capture parameters under --pdump parameters, multiples of
+--pdump can be passed to capture packets on different port and queue combinations.
+
+.. code-block:: console
+
+ ./$(RTE_TARGET)/app/pdump -- --pdump '(port=<port_id> |
+ device_id=<pci address or device name>),
+ (queue=2), (rx-dev=<iface/path to pcap file> |
+ tx-dev=<iface/path to pcap file> |
+ rxtx-dev=<iface/path to pcap file>),
+ [ring-size=1024], [mbuf-size=2048], [total-num-mbufs=8191]'
+
+Parameters
+~~~~~~~~~~
+``--pdump``: Specifies arguments needed for packet capturing.
+
+``port``
+Port id of the eth device on which packets should be captured.
+
+``device_id``
+PCI address (or) name of the eth device on which packets should be captured.
+
+``queue``
+Queue id of the eth device on which packets should be captured.
+User can pass on queue value as ‘*’ if packets capturing has to be enabled
+on all queues of the eth device.
+
+``rx-dev``
+Can be either pcap file name or any linux iface onto which ingress side packets of
+dpdk eth device will be sent on for users to view.
+
+``tx-dev``
+Can be either pcap file name or any linux iface onto which egress side packets of
+dpdk eth device will be sent on for users to view.
+
+``rxtx-dev``
+Can be either pcap file name or any linux iface onto which both ingress &
+egress side packets of dpdk eth device will be sent on for users to view.
+
+Note:
+To receive ingress packets only, rx-dev should be passed.
+To receive egress packets only, tx-dev should be passed.
+To receive ingress and egress packets separately should pass on both rx-dev and tx-dev.
+To receive both ingress and egress packets on same device, should pass only rxtx-dev.
+
+Pdump tool uses these devices internally to create PCAPPMD vdev having ``tx_stream``
+as either of these devices.
+
+``ring-size``
+Size of the ring. This value is used internally for ring creation.
+The ring will be used to enqueue the packets from primary application to secondary.
+
+``mbuf-size``
+Size of the mbuf data room size. This is used internally for mempool creation.
+Ideally this value must be same as primary application's mempool which is used for
+packet rx.
+
+``total-num-mbufs``
+Total number mbufs in mempool. This is used internally for mempool creation.
+
+Example
+-------
+
+.. code-block:: console
+
+ $ sudo ./x86_64-native-linuxapp-gcc/app/dpdk_pdump -- --pdump 'device_id=0000:02:00.0,queue=*,rx-dev=/tmp/rx-file.pcap,tx-dev=/tmp/tx-file.pcap,ring-size=8192,mbuf-size=2176,total-num-mbufs=16384' --pdump 'device_id=0000:01:00.0,queue=*,rx-dev=/tmp/rx2-file.pcap,tx-dev=/tmp/tx2-file.pcap,ring-size=16384,mbuf-size=2176,total-num-mbufs=32768'
--
2.5.0
^ permalink raw reply [relevance 6%]
* [dpdk-dev] [PATCH 0/3] [RFC] vhost: micro vhost optimization
@ 2016-05-03 0:46 3% Yuanhan Liu
2016-05-10 21:49 0% ` Rich Lane
0 siblings, 1 reply; 200+ results
From: Yuanhan Liu @ 2016-05-03 0:46 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
Here is a small patch set does the micro optimization, which brings about
10% performance boost in my 64B packet testing, with the following topo:
pkt generator <----> NIC <-----> Virtio NIC
Patch 1 pre updates the used ring and update them in batch. It should be
feasible from my understanding: there will be no issue, guest driver will
not start processing them as far as we haven't updated the "used->idx"
yet. I could miss something though.
Patch 2 saves one check for small packets (that can be hold in one desc
buf and mbuf).
Patch 3 moves several frequently used fields into one cache line, for
better cache sharing.
Note that this patch set is based on my latest vhost ABI refactoring patchset.
---
Yuanhan Liu (3):
vhost: pre update used ring for Tx and Rx
vhost: optimize dequeue for small packets
vhost: arrange virtio_net fields for better cache sharing
lib/librte_vhost/vhost-net.h | 8 +--
lib/librte_vhost/vhost_rxtx.c | 110 ++++++++++++++++++++++++------------------
2 files changed, 68 insertions(+), 50 deletions(-)
--
1.9.0
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH 14/16] vhost: reserve few more space for future extension
2016-05-02 22:25 9% [dpdk-dev] [PATCH 00/16] vhost ABI/API refactoring Yuanhan Liu
2016-05-02 22:25 2% ` [dpdk-dev] [PATCH 10/16] vhost: export vid as the only interface to applications Yuanhan Liu
@ 2016-05-02 22:25 4% ` Yuanhan Liu
2016-05-13 5:24 8% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Yuanhan Liu
2 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-02 22:25 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Thomas Monjalon, Panu Matilainen, Tetsuya Mukawa,
Traynor Kevin, Yuanhan Liu
"virtio_net_device_ops" is the only left open struct that an application
can access, therefore, it's the only place that might introduce potential
ABI break in future for extension.
So, do some reservation for it. 5 should be pretty enough, considering
that we have barely touched it for a long while. Another reason to
choose 5 is for cache alignment: 5 makes the struct 64 bytes for 64 bit
machine.
With this, it's confidence to say that we might be able to be free from
the ABI violation forever.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
lib/librte_vhost/rte_virtio_net.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 388621e..4e50425 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -77,6 +77,8 @@ struct virtio_net_device_ops {
void (*destroy_device)(int vid); /**< Remove device. */
int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
+
+ void *reserved[5]; /**< Reserved for future extension */
};
/**
--
1.9.0
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH 10/16] vhost: export vid as the only interface to applications
2016-05-02 22:25 9% [dpdk-dev] [PATCH 00/16] vhost ABI/API refactoring Yuanhan Liu
@ 2016-05-02 22:25 2% ` Yuanhan Liu
2016-05-10 16:17 0% ` Rich Lane
2016-05-02 22:25 4% ` [dpdk-dev] [PATCH 14/16] vhost: reserve few more space for future extension Yuanhan Liu
2016-05-13 5:24 8% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Yuanhan Liu
2 siblings, 1 reply; 200+ results
From: Yuanhan Liu @ 2016-05-02 22:25 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Thomas Monjalon, Panu Matilainen, Tetsuya Mukawa,
Traynor Kevin, Yuanhan Liu
With all the previous prepare works, we are just one step away from
the final ABI refactoring. That is, to change current API to let them
stick to vid instead of the old virtio_net dev.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
drivers/net/vhost/rte_eth_vhost.c | 61 ++++++++++-----------------
examples/vhost/main.c | 41 ++++++++++++------
lib/librte_vhost/rte_virtio_net.h | 30 +++++--------
lib/librte_vhost/vhost_rxtx.c | 15 ++++++-
lib/librte_vhost/vhost_user/virtio-net-user.c | 14 +++---
lib/librte_vhost/virtio-net.c | 17 +++++---
6 files changed, 91 insertions(+), 87 deletions(-)
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 9763cd4..a9dada5 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -68,9 +68,9 @@ static struct ether_addr base_eth_addr = {
};
struct vhost_queue {
+ int vid;
rte_atomic32_t allow_queuing;
rte_atomic32_t while_queuing;
- struct virtio_net *device;
struct pmd_internal *internal;
struct rte_mempool *mb_pool;
uint8_t port;
@@ -137,7 +137,7 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
goto out;
/* Dequeue packets from guest TX queue */
- nb_rx = rte_vhost_dequeue_burst(r->device,
+ nb_rx = rte_vhost_dequeue_burst(r->vid,
r->virtqueue_id, r->mb_pool, bufs, nb_bufs);
r->rx_pkts += nb_rx;
@@ -168,7 +168,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
goto out;
/* Enqueue packets to guest RX queue */
- nb_tx = rte_vhost_enqueue_burst(r->device,
+ nb_tx = rte_vhost_enqueue_burst(r->vid,
r->virtqueue_id, bufs, nb_bufs);
r->tx_pkts += nb_tx;
@@ -217,7 +217,7 @@ find_internal_resource(int vid)
}
static int
-new_device(struct virtio_net *dev)
+new_device(int vid)
{
struct rte_eth_dev *eth_dev;
struct internal_list *list;
@@ -228,23 +228,17 @@ new_device(struct virtio_net *dev)
int newnode;
#endif
- if (dev == NULL) {
- RTE_LOG(INFO, PMD, "Invalid argument\n");
- return -1;
- }
-
- list = find_internal_resource(dev->vid);
+ list = find_internal_resource(vid);
if (list == NULL) {
- RTE_LOG(INFO, PMD, "Invalid vid %d\n", dev->vid);
+ RTE_LOG(INFO, PMD, "Invalid vid %d\n", vid);
return -1;
}
eth_dev = list->eth_dev;
internal = eth_dev->data->dev_private;
- internal->vid = dev->vid;
#ifdef RTE_LIBRTE_VHOST_NUMA
- newnode = rte_vhost_get_numa_node(dev->vid);
+ newnode = rte_vhost_get_numa_node(vid);
if (newnode > 0)
eth_dev->data->numa_node = newnode;
#endif
@@ -253,7 +247,7 @@ new_device(struct virtio_net *dev)
vq = eth_dev->data->rx_queues[i];
if (vq == NULL)
continue;
- vq->device = dev;
+ vq->vid = vid;
vq->internal = internal;
vq->port = eth_dev->data->port_id;
}
@@ -261,15 +255,14 @@ new_device(struct virtio_net *dev)
vq = eth_dev->data->tx_queues[i];
if (vq == NULL)
continue;
- vq->device = dev;
+ vq->vid = vid;
vq->internal = internal;
vq->port = eth_dev->data->port_id;
}
- for (i = 0; i < rte_vhost_get_queue_num(dev->vid) * VIRTIO_QNUM; i++)
- rte_vhost_enable_guest_notification(dev, i, 0);
+ for (i = 0; i < rte_vhost_get_queue_num(vid) * VIRTIO_QNUM; i++)
+ rte_vhost_enable_guest_notification(vid, i, 0);
- dev->priv = eth_dev;
eth_dev->data->dev_link.link_status = ETH_LINK_UP;
for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
@@ -293,22 +286,19 @@ new_device(struct virtio_net *dev)
}
static void
-destroy_device(volatile struct virtio_net *dev)
+destroy_device(int vid)
{
+ struct internal_list *list;
struct rte_eth_dev *eth_dev;
struct vhost_queue *vq;
unsigned i;
- if (dev == NULL) {
- RTE_LOG(INFO, PMD, "Invalid argument\n");
- return;
- }
-
- eth_dev = (struct rte_eth_dev *)dev->priv;
- if (eth_dev == NULL) {
- RTE_LOG(INFO, PMD, "Failed to find a ethdev\n");
+ list = find_internal_resource(vid);
+ if (list == NULL) {
+ RTE_LOG(INFO, PMD, "Invalid vid %d\n", vid);
return;
}
+ eth_dev = list->eth_dev;
/* Wait until rx/tx_pkt_burst stops accessing vhost device */
for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
@@ -330,19 +320,17 @@ destroy_device(volatile struct virtio_net *dev)
eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
- dev->priv = NULL;
-
for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
vq = eth_dev->data->rx_queues[i];
if (vq == NULL)
continue;
- vq->device = NULL;
+ vq->vid = -1;
}
for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
vq = eth_dev->data->tx_queues[i];
if (vq == NULL)
continue;
- vq->device = NULL;
+ vq->vid = -1;
}
RTE_LOG(INFO, PMD, "Connection closed\n");
@@ -351,20 +339,15 @@ destroy_device(volatile struct virtio_net *dev)
}
static int
-vring_state_changed(struct virtio_net *dev, uint16_t vring, int enable)
+vring_state_changed(int vid, uint16_t vring, int enable)
{
struct rte_vhost_vring_state *state;
struct rte_eth_dev *eth_dev;
struct internal_list *list;
- if (dev == NULL) {
- RTE_LOG(ERR, PMD, "Invalid argument\n");
- return -1;
- }
-
- list = find_internal_resource(dev->vid);
+ list = find_internal_resource(vid);
if (list == NULL) {
- RTE_LOG(ERR, PMD, "Invalid vid %d\n", dev->vid);
+ RTE_LOG(ERR, PMD, "Invalid vid %d\n", vid);
return -1;
}
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 145fa6f..bbf0d28 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -701,6 +701,19 @@ find_vhost_dev(struct ether_addr *mac)
return NULL;
}
+static inline struct vhost_dev *__attribute__((always_inline))
+find_vhost_dev_by_vid(int vid)
+{
+ struct vhost_dev *vdev;
+
+ TAILQ_FOREACH(vdev, &vhost_dev_list, next) {
+ if (vdev->ready == DEVICE_RX && vdev->vid == vid)
+ return vdev;
+ }
+
+ return NULL;
+}
+
/*
* This function learns the MAC address of the device and registers this along with a
* vlan tag to a VMDQ.
@@ -796,7 +809,7 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
{
uint16_t ret;
- ret = rte_vhost_enqueue_burst(dst_vdev->dev, VIRTIO_RXQ, &m, 1);
+ ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
if (enable_stats) {
rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
@@ -1042,7 +1055,6 @@ static inline void __attribute__((always_inline))
drain_eth_rx(struct vhost_dev *vdev)
{
uint16_t rx_count, enqueue_count;
- struct virtio_net *dev = vdev->dev;
struct rte_mbuf *pkts[MAX_PKT_BURST];
rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
@@ -1068,7 +1080,7 @@ drain_eth_rx(struct vhost_dev *vdev)
}
}
- enqueue_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ,
+ enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
pkts, rx_count);
if (enable_stats) {
rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
@@ -1085,7 +1097,7 @@ drain_virtio_tx(struct vhost_dev *vdev)
uint16_t count;
uint16_t i;
- count = rte_vhost_dequeue_burst(vdev->dev, VIRTIO_TXQ, mbuf_pool,
+ count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ, mbuf_pool,
pkts, MAX_PKT_BURST);
/* setup VMDq for the first packet */
@@ -1171,13 +1183,17 @@ switch_worker(void *arg __rte_unused)
* of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
*/
static void
-destroy_device (volatile struct virtio_net *dev)
+destroy_device(int vid)
{
struct vhost_dev *vdev;
int lcore;
- vdev = (struct vhost_dev *)dev->priv;
- /*set the remove flag. */
+ vdev = find_vhost_dev_by_vid(vid);
+ if (!vdev) {
+ RTE_LOG(ERR, VHOST_CONFIG, "(%d) failed to find device\n", vid);
+ return;
+ }
+
vdev->remove = 1;
while(vdev->ready != DEVICE_SAFE_REMOVE) {
rte_pause();
@@ -1214,12 +1230,11 @@ destroy_device (volatile struct virtio_net *dev)
* and the allocated to a specific data core.
*/
static int
-new_device (struct virtio_net *dev)
+new_device(int vid)
{
int lcore, core_add = 0;
uint32_t device_num_min = num_devices;
struct vhost_dev *vdev;
- int vid = dev->vid;
vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
if (vdev == NULL) {
@@ -1228,8 +1243,6 @@ new_device (struct virtio_net *dev)
vid);
return -1;
}
- vdev->dev = dev;
- dev->priv = vdev;
vdev->vid = vid;
TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, next);
@@ -1252,8 +1265,8 @@ new_device (struct virtio_net *dev)
lcore_info[vdev->coreid].device_num++;
/* Disable notifications. */
- rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0);
- rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0);
+ rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
+ rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
RTE_LOG(INFO, VHOST_DATA,
"(%d) device has been added to data core %d\n",
@@ -1309,7 +1322,7 @@ print_stats(void)
"RX total: %" PRIu64 "\n"
"RX dropped: %" PRIu64 "\n"
"RX successful: %" PRIu64 "\n",
- vdev->dev->vid,
+ vdev->vid,
tx_total, tx_dropped, tx,
rx_total, rx_dropped, rx);
}
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 27f6847..0a26df9 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -178,23 +178,12 @@ struct virtio_memory {
*
*/
struct virtio_net_device_ops {
- int (*new_device)(struct virtio_net *); /**< Add device. */
- void (*destroy_device)(volatile struct virtio_net *); /**< Remove device. */
+ int (*new_device)(int vid); /**< Add device. */
+ void (*destroy_device)(int vid); /**< Remove device. */
- int (*vring_state_changed)(struct virtio_net *dev, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
+ int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
};
-static inline uint16_t __attribute__((always_inline))
-rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
-{
- struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
-
- if (!vq->enabled)
- return 0;
-
- return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
-}
-
/**
* Function to convert guest physical addresses to vhost virtual addresses.
* This is used to convert guest virtio buffer addresses.
@@ -231,7 +220,7 @@ int rte_vhost_feature_enable(uint64_t feature_mask);
/* Returns currently supported vhost features */
uint64_t rte_vhost_feature_get(void);
-int rte_vhost_enable_guest_notification(struct virtio_net *dev, uint16_t queue_id, int enable);
+int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
/* Register vhost driver. dev_name could be different for multiple instance support. */
int rte_vhost_driver_register(const char *dev_name);
@@ -286,8 +275,8 @@ int rte_vhost_get_numa_node(int vid);
* be received from the physical port or from another virtual device. A packet
* count is returned to indicate the number of packets that were succesfully
* added to the RX queue.
- * @param dev
- * virtio-net device
+ * @param vid
+ * virtio-net device ID
* @param queue_id
* virtio queue index in mq case
* @param pkts
@@ -297,14 +286,14 @@ int rte_vhost_get_numa_node(int vid);
* @return
* num of packets enqueued
*/
-uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
/**
* This function gets guest buffers from the virtio device TX virtqueue,
* construct host mbufs, copies guest buffer content to host mbufs and
* store them in pkts to be processed.
- * @param dev
+ * @param vid
* virtio-net device
* @param queue_id
* virtio queue index in mq case
@@ -317,7 +306,8 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
* @return
* num of packets dequeued
*/
-uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
+
#endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 8d87508..08cab08 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -46,6 +46,7 @@
#include <rte_arp.h>
#include "vhost-net.h"
+#include "virtio-net.h"
#define MAX_PKT_BURST 32
#define VHOST_LOG_PAGE 4096
@@ -587,9 +588,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
}
uint16_t
-rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
{
+ struct virtio_net *dev = get_device(vid);
+
+ if (!dev)
+ return 0;
+
if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
return virtio_dev_merge_rx(dev, queue_id, pkts, count);
else
@@ -815,9 +821,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
uint16_t
-rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
{
+ struct virtio_net *dev;
struct rte_mbuf *rarp_mbuf = NULL;
struct vhost_virtqueue *vq;
uint32_t desc_indexes[MAX_PKT_BURST];
@@ -826,6 +833,10 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
uint16_t free_entries;
uint16_t avail_idx;
+ dev = get_device(vid);
+ if (!dev)
+ return 0;
+
if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
dev->vid, __func__, queue_id);
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 9385af1..7fa69a7 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -117,7 +117,7 @@ user_set_mem_table(int vid, struct VhostUserMsg *pmsg)
/* Remove from the data plane. */
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
if (dev->mem) {
@@ -279,6 +279,9 @@ user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
struct vhost_vring_file file;
struct virtio_net *dev = get_device(vid);
+ if (!dev)
+ return;
+
file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
file.fd = VIRTIO_INVALID_EVENTFD;
@@ -289,7 +292,7 @@ user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
vhost_set_vring_kick(vid, &file);
if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
- if (notify_ops->new_device(dev) == 0)
+ if (notify_ops->new_device(vid) == 0)
dev->flags |= VIRTIO_DEV_RUNNING;
}
}
@@ -307,7 +310,7 @@ user_get_vring_base(int vid,
return -1;
/* We have to stop the queue (virtio) if it is running. */
if (dev->flags & VIRTIO_DEV_RUNNING)
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
/* Here we are safe to get the last used index */
vhost_get_vring_base(vid, state->index, state);
@@ -342,9 +345,8 @@ user_set_vring_enable(int vid,
"set queue enable: %d to qp idx: %d\n",
enable, state->index);
- if (notify_ops->vring_state_changed) {
- notify_ops->vring_state_changed(dev, state->index, enable);
- }
+ if (notify_ops->vring_state_changed)
+ notify_ops->vring_state_changed(vid, state->index, enable);
dev->virtqueue[state->index]->enabled = enable;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 6bf4d87..9fd80a8 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -296,7 +296,7 @@ vhost_destroy_device(int vid)
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
cleanup_device(dev, 1);
@@ -353,7 +353,7 @@ vhost_reset_owner(int vid)
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
cleanup_device(dev, 0);
@@ -717,21 +717,26 @@ vhost_set_backend(int vid, struct vhost_vring_file *file)
if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
if (dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED &&
dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED) {
- if (notify_ops->new_device(dev) < 0)
+ if (notify_ops->new_device(vid) < 0)
return -1;
dev->flags |= VIRTIO_DEV_RUNNING;
}
} else if (file->fd == VIRTIO_DEV_STOPPED) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(dev);
+ notify_ops->destroy_device(vid);
}
return 0;
}
-int rte_vhost_enable_guest_notification(struct virtio_net *dev,
- uint16_t queue_id, int enable)
+int
+rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
{
+ struct virtio_net *dev = get_device(vid);
+
+ if (!dev)
+ return -1;
+
if (enable) {
RTE_LOG(ERR, VHOST_CONFIG,
"guest notification isn't supported.\n");
--
1.9.0
^ permalink raw reply [relevance 2%]
* [dpdk-dev] [PATCH 00/16] vhost ABI/API refactoring
@ 2016-05-02 22:25 9% Yuanhan Liu
2016-05-02 22:25 2% ` [dpdk-dev] [PATCH 10/16] vhost: export vid as the only interface to applications Yuanhan Liu
` (2 more replies)
0 siblings, 3 replies; 200+ results
From: Yuanhan Liu @ 2016-05-02 22:25 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Thomas Monjalon, Panu Matilainen, Tetsuya Mukawa,
Traynor Kevin, Yuanhan Liu
Every time we introduce a new feature to vhost, we are likely
to break ABI. Moreover, some cleanups (such as the one from Ilya
to remove vec_buf from vhost_virtqueue struct) also break ABI.
This patch set is meant to resolve above issue ultimately, by
hiding virtio_net structure (as well as few others) internaly,
and export the virtio_net dev strut to applications by a number,
vid, like the way kernel exposes an fd to user space.
Back to the patch set, the first part of this set makes some
changes to vhost example, vhost-pmd and vhost, bit by bit, to
remove the dependence to "virtio_net" struct. And then do the
final change to make the current APIs to adapt to using "vid".
After that, "vrtio_net_device_ops" is the only left open struct
that an application can acces, thefeore, it's the only place
that might introduce potential ABI breakage in future for
extension. Hence, I made few more (5) space reservation, to
make sure we will not break ABI for a long time, and hopefuly,
forever.
The last bit of this patch set is some cleanups, including the
one from Ilya.
Note that this refactoring breaks the tep_termination example.
Well, it's just another copy of the original messy vhost example,
and I have no interest to cleanup it again. Therefore, I might
consider to remove that example later, and add the vxlan bits
into vhost example.
Few more TODOs: update release note, update lib version, update
version.map
Thanks.
--yliu
---
Ilya Maximets (1):
vhost: make buf vector for scatter Rx local
Yuanhan Liu (15):
vhost: declare backend with int type
vhost: set/reset dev flags internally
vhost: declare device_fh as int
example/vhost: make a copy of virtio device id
vhost: rename device_fh to vid
vhost: get device by vid only
vhost: move vhost_device_ctx to cuse
vhost: query pmd internal by vid
vhost: add few more functions
vhost: export vid as the only interface to applications
vhost: hide internal structs/macros/functions
vhost: remove unnecessary fields
vhost: remove virtio-net.h
vhost: reserve few more space for future extension
vhost: per device vhost_hlen
drivers/net/vhost/rte_eth_vhost.c | 86 ++++-------
examples/vhost/main.c | 126 ++++++++-------
examples/vhost/main.h | 1 +
lib/librte_vhost/rte_virtio_net.h | 197 ++++++------------------
lib/librte_vhost/vhost-net.h | 195 +++++++++++++++++++----
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 83 +++++-----
lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 30 ++--
lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 12 +-
lib/librte_vhost/vhost_rxtx.c | 133 ++++++++--------
lib/librte_vhost/vhost_user/vhost-net-user.c | 53 +++----
lib/librte_vhost/vhost_user/virtio-net-user.c | 64 ++++----
lib/librte_vhost/vhost_user/virtio-net-user.h | 18 +--
lib/librte_vhost/virtio-net.c | 213 ++++++++++++++++----------
lib/librte_vhost/virtio-net.h | 43 ------
14 files changed, 644 insertions(+), 610 deletions(-)
delete mode 100644 lib/librte_vhost/virtio-net.h
--
1.9.0
^ permalink raw reply [relevance 9%]
* [dpdk-dev] [PATCH v2 7/8] examples/vhost: switch_worker cleanup
[not found] ` <1462224230-19460-1-git-send-email-yuanhan.liu@linux.intel.com>
@ 2016-05-02 21:23 2% ` Yuanhan Liu
2016-05-09 18:06 0% ` [dpdk-dev] [PATCH v2 0/8] vhost/example cleanup/fix Yuanhan Liu
1 sibling, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-05-02 21:23 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
switch_worker() is the last piece of code that is messy yet it touches
virtio/vhost device.
Here do a cleanup, so that we will be less painful for later vhost ABI
refactoring.
The cleanup is straigforward: break long lines, move some code into
functions. The last, comment a bit on switch_worker().
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
examples/vhost/main.c | 253 +++++++++++++++++++++++++++-----------------------
1 file changed, 136 insertions(+), 117 deletions(-)
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index dbb42ee..66d3bf2 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -213,6 +213,8 @@ struct mbuf_table {
/* TX queue for each data core. */
struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
+#define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \
+ / US_PER_S * BURST_TX_DRAIN_US)
#define VLAN_HLEN 4
/* Per-device statistics struct */
@@ -915,16 +917,35 @@ static void virtio_tx_offload(struct rte_mbuf *m)
tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
}
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+ while (n--)
+ rte_pktmbuf_free(pkts[n]);
+}
+
+static inline void __attribute__((always_inline))
+do_drain_mbuf_table(struct mbuf_table *tx_q)
+{
+ uint16_t count;
+
+ count = rte_eth_tx_burst(ports[0], tx_q->txq_id,
+ tx_q->m_table, tx_q->len);
+ if (unlikely(count < tx_q->len))
+ free_pkts(&tx_q->m_table[count], tx_q->len - count);
+
+ tx_q->len = 0;
+}
+
/*
- * This function routes the TX packet to the correct interface. This may be a local device
- * or the physical port.
+ * This function routes the TX packet to the correct interface. This
+ * may be a local device or the physical port.
*/
static inline void __attribute__((always_inline))
virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
{
struct mbuf_table *tx_q;
- struct rte_mbuf **m_table;
- unsigned len, ret, offset = 0;
+ unsigned offset = 0;
const uint16_t lcore_id = rte_lcore_id();
struct virtio_net *dev = vdev->dev;
struct ether_hdr *nh;
@@ -960,7 +981,6 @@ queue2nic:
/*Add packet to the port tx queue*/
tx_q = &lcore_tx_queue[lcore_id];
- len = tx_q->len;
nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) {
@@ -998,55 +1018,130 @@ queue2nic:
if (m->ol_flags & PKT_TX_TCP_SEG)
virtio_tx_offload(m);
- tx_q->m_table[len] = m;
- len++;
+ tx_q->m_table[tx_q->len++] = m;
if (enable_stats) {
dev_statistics[dev->device_fh].tx_total++;
dev_statistics[dev->device_fh].tx++;
}
- if (unlikely(len == MAX_PKT_BURST)) {
- m_table = (struct rte_mbuf **)tx_q->m_table;
- ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);
- /* Free any buffers not handled by TX and update the port stats. */
- if (unlikely(ret < len)) {
- do {
- rte_pktmbuf_free(m_table[ret]);
- } while (++ret < len);
+ if (unlikely(tx_q->len == MAX_PKT_BURST))
+ do_drain_mbuf_table(tx_q);
+}
+
+
+static inline void __attribute__((always_inline))
+drain_mbuf_table(struct mbuf_table *tx_q)
+{
+ static uint64_t prev_tsc;
+ uint64_t cur_tsc;
+
+ if (tx_q->len == 0)
+ return;
+
+ cur_tsc = rte_rdtsc();
+ if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
+ prev_tsc = cur_tsc;
+
+ RTE_LOG(DEBUG, VHOST_DATA,
+ "TX queue drained after timeout with burst size %u\n",
+ tx_q->len);
+ do_drain_mbuf_table(tx_q);
+ }
+}
+
+static inline void __attribute__((always_inline))
+drain_eth_rx(struct vhost_dev *vdev)
+{
+ uint16_t rx_count, enqueue_count;
+ struct virtio_net *dev = vdev->dev;
+ struct rte_mbuf *pkts[MAX_PKT_BURST];
+
+ rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
+ pkts, MAX_PKT_BURST);
+ if (!rx_count)
+ return;
+
+ /*
+ * When "enable_retry" is set, here we wait and retry when there
+ * is no enough free slots in the queue to hold @rx_count packets,
+ * to diminish packet loss.
+ */
+ if (enable_retry &&
+ unlikely(rx_count > rte_vring_available_entries(dev,
+ VIRTIO_RXQ))) {
+ uint32_t retry;
+
+ for (retry = 0; retry < burst_rx_retry_num; retry++) {
+ rte_delay_us(burst_rx_delay_time);
+ if (rx_count <= rte_vring_available_entries(dev,
+ VIRTIO_RXQ))
+ break;
}
+ }
+
+ enqueue_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ,
+ pkts, rx_count);
+ if (enable_stats) {
+ uint64_t fh = dev->device_fh;
+
+ rte_atomic64_add(&dev_statistics[fh].rx_total_atomic, rx_count);
+ rte_atomic64_add(&dev_statistics[fh].rx_atomic, enqueue_count);
+ }
- len = 0;
+ free_pkts(pkts, rx_count);
+}
+
+static inline void __attribute__((always_inline))
+drain_virtio_tx(struct vhost_dev *vdev)
+{
+ struct rte_mbuf *pkts[MAX_PKT_BURST];
+ uint16_t count;
+ uint16_t i;
+
+ count = rte_vhost_dequeue_burst(vdev->dev, VIRTIO_TXQ, mbuf_pool,
+ pkts, MAX_PKT_BURST);
+
+ /* setup VMDq for the first packet */
+ if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
+ if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1)
+ free_pkts(pkts, count);
}
- tx_q->len = len;
- return;
+ for (i = 0; i < count; ++i) {
+ virtio_tx_route(vdev, pkts[i],
+ vlan_tags[(uint16_t)vdev->dev->device_fh]);
+ }
}
+
/*
- * This function is called by each data core. It handles all RX/TX registered with the
- * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared
- * with all devices in the main linked list.
+ * Main function of vhost-switch. It basically does:
+ *
+ * for each vhost device {
+ * - drain_eth_rx()
+ *
+ * Which drains the host eth Rx queue linked to the vhost device,
+ * and deliver all of them to guest virito Rx ring associated with
+ * this vhost device.
+ *
+ * - drain_virtio_tx()
+ *
+ * Which drains the guest virtio Tx queue and deliver all of them
+ * to the target, which could be another vhost device, or the
+ * physical eth dev. The route is done in function "virtio_tx_route".
+ * }
*/
static int
-switch_worker(__attribute__((unused)) void *arg)
+switch_worker(void *arg __rte_unused)
{
- struct virtio_net *dev = NULL;
- struct vhost_dev *vdev = NULL;
- struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned i;
+ unsigned lcore_id = rte_lcore_id();
+ struct vhost_dev *vdev;
struct mbuf_table *tx_q;
- const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
- uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
- unsigned ret, i;
- const uint16_t lcore_id = rte_lcore_id();
- const uint16_t num_cores = (uint16_t)rte_lcore_count();
- uint16_t rx_count = 0;
- uint16_t tx_count;
- uint32_t retry = 0;
RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
- prev_tsc = 0;
tx_q = &lcore_tx_queue[lcore_id];
- for (i = 0; i < num_cores; i ++) {
+ for (i = 0; i < rte_lcore_count(); i++) {
if (lcore_ids[i] == lcore_id) {
tx_q->txq_id = i;
break;
@@ -1054,34 +1149,7 @@ switch_worker(__attribute__((unused)) void *arg)
}
while(1) {
- cur_tsc = rte_rdtsc();
- /*
- * TX burst queue drain
- */
- diff_tsc = cur_tsc - prev_tsc;
- if (unlikely(diff_tsc > drain_tsc)) {
-
- if (tx_q->len) {
- RTE_LOG(DEBUG, VHOST_DATA,
- "TX queue drained after timeout with burst size %u\n",
- tx_q->len);
-
- /*Tx any packets in the queue*/
- ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,
- (struct rte_mbuf **)tx_q->m_table,
- (uint16_t)tx_q->len);
- if (unlikely(ret < tx_q->len)) {
- do {
- rte_pktmbuf_free(tx_q->m_table[ret]);
- } while (++ret < tx_q->len);
- }
-
- tx_q->len = 0;
- }
-
- prev_tsc = cur_tsc;
-
- }
+ drain_mbuf_table(tx_q);
/*
* Inform the configuration core that we have exited the
@@ -1091,69 +1159,20 @@ switch_worker(__attribute__((unused)) void *arg)
lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL;
/*
- * Process devices
+ * Process vhost devices
*/
TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list, next) {
- uint64_t fh;
-
- dev = vdev->dev;
- fh = dev->device_fh;
-
if (unlikely(vdev->remove)) {
unlink_vmdq(vdev);
vdev->ready = DEVICE_SAFE_REMOVE;
continue;
}
- if (likely(vdev->ready == DEVICE_RX)) {
- /*Handle guest RX*/
- rx_count = rte_eth_rx_burst(ports[0],
- vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
-
- if (rx_count) {
- /*
- * Retry is enabled and the queue is full then we wait and retry to avoid packet loss
- * Here MAX_PKT_BURST must be less than virtio queue size
- */
- if (enable_retry && unlikely(rx_count > rte_vring_available_entries(dev, VIRTIO_RXQ))) {
- for (retry = 0; retry < burst_rx_retry_num; retry++) {
- rte_delay_us(burst_rx_delay_time);
- if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ))
- break;
- }
- }
- ret_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_burst, rx_count);
- if (enable_stats) {
- rte_atomic64_add(
- &dev_statistics[fh].rx_total_atomic,
- rx_count);
- rte_atomic64_add(
- &dev_statistics[fh].rx_atomic,
- ret_count);
- }
- while (likely(rx_count)) {
- rx_count--;
- rte_pktmbuf_free(pkts_burst[rx_count]);
- }
-
- }
- }
+ if (likely(vdev->ready == DEVICE_RX))
+ drain_eth_rx(vdev);
- if (likely(!vdev->remove)) {
- /* Handle guest TX*/
- tx_count = rte_vhost_dequeue_burst(dev, VIRTIO_TXQ, mbuf_pool, pkts_burst, MAX_PKT_BURST);
- /* If this is the first received packet we need to learn the MAC and setup VMDQ */
- if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) {
- if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) {
- while (tx_count)
- rte_pktmbuf_free(pkts_burst[--tx_count]);
- }
- }
- for (i = 0; i < tx_count; ++i) {
- virtio_tx_route(vdev, pkts_burst[i],
- vlan_tags[(uint16_t)dev->device_fh]);
- }
- }
+ if (likely(!vdev->remove))
+ drain_virtio_tx(vdev);
}
}
--
1.9.3
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] perfomance of rte_lpm rule subsystem
2016-04-19 15:46 3% ` Stephen Hemminger
2016-04-19 20:46 0% ` Vladimir Medvedkin
@ 2016-05-02 19:38 0% ` Александр Киселев
1 sibling, 0 replies; 200+ results
From: Александр Киселев @ 2016-05-02 19:38 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: dev
Stephen, what was the main reason you use red-black tree instead of dir-24-8?
Did you switch to using trees because of too big memory working set of
dir-24-8 algorithm?
2016-04-19 18:46 GMT+03:00 Stephen Hemminger <stephen@networkplumber.org>:
> On Tue, 19 Apr 2016 14:11:11 +0300
> Александр Киселев <kiselev99@gmail.com> wrote:
>
> > Hi.
> >
> > Doing some test with rte_lpm (adding/deleting bgp full table rules) I
> > noticed that
> > rule subsystem is very slow even considering that probably it was never
> > designed for using
> > in a data forwarding plane. So I want to propose some changes to the
> "rule"
> > subsystem.
> >
> > I reimplemented rule part ot the lib using rte_hash, and perfomance of
> > adding/deleted routes have increased dramatically.
> > If increasing speed of adding deleting routes makes sence for anybody
> else
> > I would like to discuss my patch.
> > The patch also include changes that make next_hop 64 bit, so please just
> > ignore them. The rule changes are in the following
> > functions only:
> >
> > rte_lpm2_create
> >
> > rule_find
> > rule_add
> > rule_delete
> > find_previous_rule
> > delete_depth_small
> > delete_depth_big
> >
> > rte_lpm2_add
> > rte_lpm2_delete
> > rte_lpm2_is_rule_present
> > rte_lpm2_delete_all
> >
>
> We forked LPM back several versions ago.
> I sent the patches to use BSD red-black tree for rules but the patches were
> ignored. mostly because it broke ABI.
>
--
--
Kiselev Alexander
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] ovs crash when running traffic from VM to VM over DPDK and vhostuser
@ 2016-05-02 17:40 3% ` Yi Ba
0 siblings, 0 replies; 200+ results
From: Yi Ba @ 2016-05-02 17:40 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: dev
Running with dpdk 16.04 and latest ovs from git, and removing "mrg_rxbuf=off" from virtio params, the crash is no longer observed. However, we are wittnessing ovs gets stuck, and will post to ovs mailing list:2016-05-02T17:26:18.804Z|00111|ovs_rcu|WARN|blocked 1000 ms waiting for pmd145 to quiesce
2016-05-02T17:26:19.805Z|00112|ovs_rcu|WARN|blocked 2001 ms waiting for pmd145 to quiesce
2016-05-02T17:26:21.804Z|00113|ovs_rcu|WARN|blocked 4000 ms waiting for pmd145 to quiesce
2016-05-02T17:26:25.805Z|00114|ovs_rcu|WARN|blocked 8001 ms waiting for pmd145 to quiesce
2016-05-02T17:26:33.805Z|00115|ovs_rcu|WARN|blocked 16001 ms waiting for pmd145 to quiesce
2016-05-02T17:26:49.805Z|00116|ovs_rcu|WARN|blocked 32001 ms waiting for pmd145 to quiesce
2016-05-02T17:27:14.354Z|00072|ovs_rcu(vhost_thread2)|WARN|blocked 128000 ms waiting for pmd145 to quiesce
2016-05-02T17:27:15.841Z|00008|ovs_rcu(urcu3)|WARN|blocked 128001 ms waiting for pmd145 to quiesce
2016-05-02T17:27:21.805Z|00117|ovs_rcu|WARN|blocked 64000 ms waiting for pmd145 to quiesce
2016-05-02T17:28:25.804Z|00118|ovs_rcu|WARN|blocked 128000 ms waiting for pmd145 to quiesce
On Wednesday, 6 April 2016 10:56 AM, Yuanhan Liu <yuanhan.liu@linux.intel.com> wrote:
On Tue, Apr 05, 2016 at 08:36:19PM +0000, Yi Ba wrote:
>
> Program received signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7ff1ddffb700 (LWP 21287)]
> 0x0000000000450da7 in update_secure_len (vec_idx=0x7ff1ddff27f8, secure_len=0x7ff1ddff27fc, id=13948, vq=0x7fe7992c8940)
> at /home/stack/ovs-dpdk/dpdk-2.2.0/lib/librte_vhost/vhost_rxtx.c:452
> 452 /home/stack/ovs-dpdk/dpdk-2.2.0/lib/librte_vhost/vhost_rxtx.c: No such file or directory.
> (gdb) bt
> #0 0x0000000000450da7 in update_secure_len (vec_idx=0x7ff1ddff27f8, secure_len=0x7ff1ddff27fc, id=13948, vq=0x7fe7992c8940)
It looks like a known issue, which has been fixed in this release. So,
could you please just try again with the latest DPDK code? It should
be able to solve your issue.
--yliu
From yuanhan.liu@linux.intel.com Mon May 2 23:20:28 2016
Return-Path: <yuanhan.liu@linux.intel.com>
Received: from mga04.intel.com (mga04.intel.com [192.55.52.120])
by dpdk.org (Postfix) with ESMTP id 1A22639EA
for <dev@dpdk.org>; Mon, 2 May 2016 23:20:27 +0200 (CEST)
Received: from fmsmga004.fm.intel.com ([10.253.24.48])
by fmsmga104.fm.intel.com with ESMTP; 02 May 2016 14:20:28 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.24,569,1455004800"; d="scan'208";a="95880071"
Received: from yliu-dev.sh.intel.com ([10.239.67.162])
by fmsmga004.fm.intel.com with ESMTP; 02 May 2016 14:20:26 -0700
From: Yuanhan Liu <yuanhan.liu@linux.intel.com>
To: dev@dpdk.org
Cc: huawei.xie@intel.com,
Yuanhan Liu <yuanhan.liu@linux.intel.com>
Date: Mon, 2 May 2016 14:23:42 -0700
Message-Id: <1462224230-19460-1-git-send-email-yuanhan.liu@linux.intel.com>
X-Mailer: git-send-email 1.9.0
In-Reply-To: <1461645951-14603-1-git-send-email-yuanhan.liu@linux.intel.com>
References: <1461645951-14603-1-git-send-email-yuanhan.liu@linux.intel.com>
Subject: [dpdk-dev] [PATCH v2 0/8] vhost/example cleanup/fix
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
<mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
<mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Mon, 02 May 2016 21:20:28 -0000
I'm starting to work on the vhost ABI refactoring, that I also have to
touch the vhost example code. The vhost example code, however, is very
messy, full of __very__ long lines. This would make a later diff to
apply the new vhost API be very ugly, therefore, not friendly for review.
This is how this cleanup comes.
Besides that, there is one enhancement patch, which handles the broadcast
packets so that we could rely the ARP request packet, to let vhost-switch
be more like a real switch. There is another patch that (hopefully) would
fix the mbuf allocation failure ultimately. I also added some guidelines
there as comments to show how to count how many mbuf entries is enough for
our usage.
In another word, an example is meant to be clean/simple and with good
coding style so that people can get the usage easily. So, one way or
another, this patch is good to have, even without this ABI refactoring
stuff.
Note that I'm going to apply it before the end of this week, if no objections.
v2: - some checkpatch fixes
- cleaned the code about device statistics
---
Yuanhan Liu (8):
examples/vhost: remove the non-working zero copy code
examples/vhost: remove unused macro and struct
examples/vhost: use tailq to link vhost devices
examples/vhost: use mac compare helper function directly
examples/vhost: handle broadcast packet
examples/vhost: fix mbuf allocation failure
examples/vhost: switch_worker cleanup
examples/vhost: embed statistics into vhost_dev struct
doc/guides/sample_app_ug/vhost.rst | 36 +-
examples/vhost/main.c | 2394 ++++++------------------------------
examples/vhost/main.h | 56 +-
3 files changed, 391 insertions(+), 2095 deletions(-)
--
1.9.3
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH] mk: do not enforce any specific ARM ABI
2016-04-15 22:33 15% [dpdk-dev] [PATCH] mk: do not enforce any specific ARM ABI Jan Viktorin
@ 2016-05-02 15:47 4% ` Thomas Monjalon
0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-05-02 15:47 UTC (permalink / raw)
To: Jan Viktorin; +Cc: dev
2016-04-16 00:33, Jan Viktorin:
> The dpdk build system passes -mfloat-abi=softfp, which makes the build fail
> when the selected ABI is EABIhf. The dpdk build system should not make
> assumptions on the selected ARM ABI.
>
> Signed-off-by: Jan Viktorin <viktorin@rehivetech.com>
> Reported-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Applied, thanks
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from xstats
2016-04-15 14:44 3% [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from xstats Remy Horton
2016-04-20 16:03 0% ` David Harton (dharton)
@ 2016-04-29 12:52 0% ` David Harton (dharton)
1 sibling, 0 replies; 200+ results
From: David Harton (dharton) @ 2016-04-29 12:52 UTC (permalink / raw)
To: Remy Horton, dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Remy Horton
> Sent: Friday, April 15, 2016 10:44 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from
> xstats
>
> The current extended ethernet statistics fetching involve doing several
> string operations, which causes performance issues if there are lots of
> statistics and/or network interfaces. This RFC patchset changes the API
> for xstats to use integer identifiers instead of strings and implements
> this new API for the ixgbe driver. Others drivers to follow.
>
> --
>
> Since this will involve API & ABI breakage as previously advertised,
> there are several design assumptions that need consideration:
>
> *) id-name & id-value pairs for both lookup and query
> Permits out-of-order and non-contigious returning of names/ids/values,
> even though expected implmentations would in practice return items in
> sorted order by id. Is this sufficent/desirable future proofing? Idea
> is to allow possibility of drivers returning partial statistics.
I think the key is that the order of the stats must always be honored
and if that's the case then an id isn't necessary. However, if others
want an id certainly doesn't hurt.
I don't see drivers autonomously providing a subset of stats and users
can filter out stats they don't want to their presentation layers.
>
> *) Bulk name-id mapping lookup only
> At the moment individual lookup is not supported, as this would impose
> extra overheads on drivers. The assumption is that any end user would
> fetch all this data once on startup and then cache the mappings.
Agreed. Similarly there is no need to return a partial list of stats
as the presentation layers can filter.
>
> *) Replacement or additional API
> This patch replaces the current xstats API, but there is no inherant
> reason beyond maintainability why this funtionality could not be in
> addition rather than a replacement. What is consensus on this?
I suggest 3 new functions are added:
- get number of xstats
- get xstats names
- get xstats values
This facilitates:
- parallel development within the release without breaking current usage
- possibility of removing rte_eth_xstats_get() in following release
Thanks for moving this forward,
Dave
>
> Comments welcome.
>
> Remy Horton (3):
> rte: change xstats to use integer keys
> drivers/net/ixgbe: change xstats to use integer keys
> examples/ethtool: add xstats display command
>
> drivers/net/ixgbe/ixgbe_ethdev.c | 87
> +++++++++++++++++++++++++++++++----
> examples/ethtool/ethtool-app/ethapp.c | 57 +++++++++++++++++++++++
> lib/librte_ether/rte_ethdev.c | 87
> +++++++++++++++++++++++++++++++----
> lib/librte_ether/rte_ethdev.h | 38 +++++++++++++++
> 4 files changed, 252 insertions(+), 17 deletions(-)
>
> --
> 2.5.5
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from xstats
2016-04-28 14:56 0% ` Tahhan, Maryam
@ 2016-04-28 15:58 0% ` David Harton (dharton)
0 siblings, 0 replies; 200+ results
From: David Harton (dharton) @ 2016-04-28 15:58 UTC (permalink / raw)
To: Tahhan, Maryam, Horton, Remy, dev; +Cc: Mcnamara, John, Van Haaren, Harry
> -----Original Message-----
> From: Tahhan, Maryam [mailto:maryam.tahhan@intel.com]
> Sent: Thursday, April 28, 2016 10:56 AM
> To: David Harton (dharton) <dharton@cisco.com>; Horton, Remy
> <remy.horton@intel.com>; dev@dpdk.org
> Cc: Mcnamara, John <john.mcnamara@intel.com>; Van Haaren, Harry
> <harry.van.haaren@intel.com>
> Subject: RE: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from
> xstats
>
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of David Harton
> > (dharton)
> > Sent: Wednesday, April 20, 2016 5:04 PM
> > To: Horton, Remy <remy.horton@intel.com>; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations
> > from xstats
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Remy Horton
> > > Sent: Friday, April 15, 2016 10:44 AM
> > > To: dev@dpdk.org
> > > Subject: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from
> > > xstats
> > >
> > > The current extended ethernet statistics fetching involve doing
> > > several string operations, which causes performance issues if there
> > > are lots of statistics and/or network interfaces. This RFC patchset
> > > changes the API for xstats to use integer identifiers instead of
> > > strings and implements this new API for the ixgbe driver. Others
> > drivers to follow.
> > >
> > > --
> > >
> > > Since this will involve API & ABI breakage as previously advertised,
> > > there are several design assumptions that need consideration:
> > >
> > > *) id-name & id-value pairs for both lookup and query Permits
> > > out-of-order and non-contigious returning of names/ids/values, even
> > > though expected implmentations would in practice return items in
> > > sorted order by id. Is this sufficent/desirable future proofing?
> > > Idea is to allow possibility of drivers returning partial statistics.
> >
> > I believe forcing drivers to match to a common id-space will become
> > burdensome. If the stats id-space isn't common then matching strings
> > is probably just as sufficient as long as drivers don't add/remove
> > stats ad hoc between the time the device is initialized and removed.
>
> I'm not aware of drivers adding/removing the stats ad hoc? The idea is to
> have a common-id space otherwise it will be a free for all and we won't
> have alignment across the drivers. I don't see it being any more
> burdensome than having a common register naming across the board which is
> what is there today. The advantage being that you don't have to pull the
> strings every time.
>
> >
> > >
> > > *) Bulk name-id mapping lookup only
> > > At the moment individual lookup is not supported, as this would
> > > impose extra overheads on drivers. The assumption is that any end
> > > user would fetch all this data once on startup and then cache the
> mappings.
> >
> > I'm not sure I see the value of looking up a single stat from a user
> > perspective. I can see where the drivers might say that some stats
> > are less disruptive/etc but the user doesn't have that knowledge and
> > wouldn't know how to take advantage. Usually all stats are grabbed
> > multiple times and the changes noted during debug sessions.
> >
>
> I believe Remy's change doesn't suggest/support individual lookup. It is
> just a statement that we don't want to burden drivers with individual
> stats lookups.
>
> > >
> > > *) Replacement or additional API
> > > This patch replaces the current xstats API, but there is no inherant
> > > reason beyond maintainability why this funtionality could not be in
> > > addition rather than a replacement. What is consensus on this?
> >
> > I came to the conclusion that replacing the existing API isn't
> > necessary but rather extending it so backwards compatibility could be
> > maintained during the previous discussions on this topic. However, if
> > we want to go forward with cleaning up in order to reduce the support
> > drivers provide I'm all for it.
> >
> > I still believe the API we develop should follow an "ethtool stats like"
> > format as suggested earlier this year:
> >
> > extern int rte_eth_xstats_names_get(uint8_t port_id,
> > struct rte_eth_xstats_name *names, unsigned n); extern int
> > rte_eth_xstats_values_get(uint8_t port_id,
> > uint64_t *values, unsigned n);
> >
> > Again, these could be provided alongside the existing API or replace it.
>
> I'm struggling a bit here. This is really what Remy has posted
> http://dpdk.org/dev/patchwork/patch/12094/ or am I missing something
> obvious?
Maybe I misread the patch series or missed one but I don't see where
stats can be obtained without copying strings? This is the real issue I
raised originally.
Having the ability to get the names without stats is useful, but,
the real gain would be obtaining the stats without the names.
>
> >
> > I also like the idea you provided of a separate API to obtain the
> > xstats count rather than deriving the count by calling one of the
> > above functions with "dummy" values.
>
> +1
>
> >
> > Again, I can provide the patches for the changes I've made that align
> > with this proposed API. I just never got any feedback on it when
> > requested previously.
>
> I believe time is not in our favour on this front. If you have patches can
> you post them, otherwise can you please review the patchset that Remy has
> posted?
I'm working on it but I have some process I'm navigating. I'm hopeful
I'll have the green light within a week if not sooner. I apologize...
I'm pushing as hard as I can. If you need to proceed go ahead I
completely understand.
All I can say is that I have implemented the API above and converted all
drivers that supported xstats in v2.2. Any new drivers that have added
xstats support since would need to be added.
I did not add "get the count" because it wasn't provided in the current API
and instead followed the convention but I do believe overtly getting the
count it is the better approach.
Thanks,
Dave
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from xstats
2016-04-20 16:03 0% ` David Harton (dharton)
@ 2016-04-28 14:56 0% ` Tahhan, Maryam
2016-04-28 15:58 0% ` David Harton (dharton)
0 siblings, 1 reply; 200+ results
From: Tahhan, Maryam @ 2016-04-28 14:56 UTC (permalink / raw)
To: David Harton (dharton), Horton, Remy, dev
Cc: Mcnamara, John, Van Haaren, Harry
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of David Harton
> (dharton)
> Sent: Wednesday, April 20, 2016 5:04 PM
> To: Horton, Remy <remy.horton@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations
> from xstats
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Remy Horton
> > Sent: Friday, April 15, 2016 10:44 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from
> > xstats
> >
> > The current extended ethernet statistics fetching involve doing
> > several string operations, which causes performance issues if there
> > are lots of statistics and/or network interfaces. This RFC patchset
> > changes the API for xstats to use integer identifiers instead of
> > strings and implements this new API for the ixgbe driver. Others
> drivers to follow.
> >
> > --
> >
> > Since this will involve API & ABI breakage as previously advertised,
> > there are several design assumptions that need consideration:
> >
> > *) id-name & id-value pairs for both lookup and query Permits
> > out-of-order and non-contigious returning of names/ids/values, even
> > though expected implmentations would in practice return items in
> > sorted order by id. Is this sufficent/desirable future proofing? Idea
> > is to allow possibility of drivers returning partial statistics.
>
> I believe forcing drivers to match to a common id-space will become
> burdensome. If the stats id-space isn't common then matching strings is
> probably just as sufficient as long as drivers don't add/remove stats ad
> hoc between the time the device is initialized and removed.
I'm not aware of drivers adding/removing the stats ad hoc? The idea is to have a common-id space otherwise it will be a free for all and we won't have alignment across the drivers. I don't see it being any more burdensome than having a common register naming across the board which is what is there today. The advantage being that you don't have to pull the strings every time.
>
> >
> > *) Bulk name-id mapping lookup only
> > At the moment individual lookup is not supported, as this would impose
> > extra overheads on drivers. The assumption is that any end user would
> > fetch all this data once on startup and then cache the mappings.
>
> I'm not sure I see the value of looking up a single stat from a user
> perspective. I can see where the drivers might say that some stats are
> less disruptive/etc but the user doesn't have that knowledge and
> wouldn't know how to take advantage. Usually all stats are grabbed
> multiple times and the changes noted during debug sessions.
>
I believe Remy's change doesn't suggest/support individual lookup. It is just a statement that we don't want to burden drivers with individual stats lookups.
> >
> > *) Replacement or additional API
> > This patch replaces the current xstats API, but there is no inherant
> > reason beyond maintainability why this funtionality could not be in
> > addition rather than a replacement. What is consensus on this?
>
> I came to the conclusion that replacing the existing API isn't necessary
> but rather extending it so backwards compatibility could be maintained
> during the previous discussions on this topic. However, if we want to go
> forward with cleaning up in order to reduce the support drivers provide
> I'm all for it.
>
> I still believe the API we develop should follow an "ethtool stats like"
> format as suggested earlier this year:
>
> extern int rte_eth_xstats_names_get(uint8_t port_id,
> struct rte_eth_xstats_name *names, unsigned n); extern int
> rte_eth_xstats_values_get(uint8_t port_id,
> uint64_t *values, unsigned n);
>
> Again, these could be provided alongside the existing API or replace it.
I'm struggling a bit here. This is really what Remy has posted http://dpdk.org/dev/patchwork/patch/12094/ or am I missing something obvious?
>
> I also like the idea you provided of a separate API to obtain the xstats
> count rather than deriving the count by calling one of the above
> functions with "dummy" values.
+1
>
> Again, I can provide the patches for the changes I've made that align
> with this proposed API. I just never got any feedback on it when
> requested previously.
I believe time is not in our favour on this front. If you have patches can you post them, otherwise can you please review the patchset that Remy has posted?
Thanks in advance.
BR
Maryam
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix
2016-04-28 5:45 0% ` [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix Wang, Zhihong
@ 2016-04-28 6:09 0% ` Yuanhan Liu
0 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-04-28 6:09 UTC (permalink / raw)
To: Wang, Zhihong; +Cc: dev, Xie, Huawei
On Thu, Apr 28, 2016 at 05:45:16AM +0000, Wang, Zhihong wrote:
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yuanhan Liu
> > Sent: Tuesday, April 26, 2016 12:46 PM
> > To: dev@dpdk.org
> > Cc: Xie, Huawei <huawei.xie@intel.com>; Yuanhan Liu
> > <yuanhan.liu@linux.intel.com>
> > Subject: [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix
> >
> > I'm starting to work on the vhost ABI refactoring, that I also have to
> > touch the vhost example code, to make it work. The vhost example code,
> > however, is very messy, full of __very__ long lines. This would make
> > a later diff to apply the new vhost API be very ugly, therefore, not
> > friendly for review. This is how this cleanup comes.
>
>
> I think this patch is great effort to clean the messy code and make clearer
> logic, only one suggestion: do you think a complete cleanup would help more?
Yes, but I will stop here, and maybe do the left in near future, as I
have more important thing to do now. I even thought about to make the
VMDq and VLAN stuff optional, to not let our example connect with those
hardware feature that tight.
So, feel free to make patches to clean it further if you have time.
--yliu
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix
2016-04-26 4:45 3% [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix Yuanhan Liu
2016-04-26 4:45 2% ` [dpdk-dev] [PATCH 7/7] examples/vhost: switch_worker cleanup Yuanhan Liu
@ 2016-04-28 5:45 0% ` Wang, Zhihong
2016-04-28 6:09 0% ` Yuanhan Liu
[not found] ` <1462224230-19460-1-git-send-email-yuanhan.liu@linux.intel.com>
2 siblings, 1 reply; 200+ results
From: Wang, Zhihong @ 2016-04-28 5:45 UTC (permalink / raw)
To: Yuanhan Liu, dev; +Cc: Xie, Huawei
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yuanhan Liu
> Sent: Tuesday, April 26, 2016 12:46 PM
> To: dev@dpdk.org
> Cc: Xie, Huawei <huawei.xie@intel.com>; Yuanhan Liu
> <yuanhan.liu@linux.intel.com>
> Subject: [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix
>
> I'm starting to work on the vhost ABI refactoring, that I also have to
> touch the vhost example code, to make it work. The vhost example code,
> however, is very messy, full of __very__ long lines. This would make
> a later diff to apply the new vhost API be very ugly, therefore, not
> friendly for review. This is how this cleanup comes.
I think this patch is great effort to clean the messy code and make clearer
logic, only one suggestion: do you think a complete cleanup would help more?
in terms of code style and function organization. Since there'll be further work
on it, and it's a small file anyway. Currently some parts still seem messy to me,
which compromises the effort of this patch.
>
> Besides that, there is one enhancement patch, which handles the broadcast
> packets so that we could rely the ARP request packet, to let vhost-switch
> be more like a real switch. There is another patch that (hopefully) would
> fix the mbuf allocation failure ultimately. I also added some guidelines
> there as comments to show how to count how many mbuf entries is enough for
> our usage.
>
> ---
> Yuanhan Liu (7):
> examples/vhost: remove the non-working zero copy code
> examples/vhost: remove unused macro and struct
> examples/vhost: use tailq to link vhost devices
> examples/vhost: use mac compare helper function directly
> examples/vhost: handle broadcast packet
> examples/vhost: fix mbuf allocation failures
> examples/vhost: switch_worker cleanup
>
> doc/guides/sample_app_ug/vhost.rst | 36 +-
> examples/vhost/main.c | 2319 ++++++------------------------------
> examples/vhost/main.h | 49 +-
> 3 files changed, 375 insertions(+), 2029 deletions(-)
>
> --
> 1.9.0
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC] eal: provide option to set vhost_user socket owner/permissions
2016-04-26 4:16 0% ` Yuanhan Liu
@ 2016-04-26 7:24 0% ` Christian Ehrhardt
0 siblings, 0 replies; 200+ results
From: Christian Ehrhardt @ 2016-04-26 7:24 UTC (permalink / raw)
To: Yuanhan Liu
Cc: Aaron Conole, dev, Xie, Huawei, Thomas Monjalon, David Marchand,
Panu Matilainen
Thanks,
great that you added more on CC for a wider discussion - I think that is
the only right way to go.
Just to "defend" a bit - solution a) was created under the special
circumstance that I wanted a workaround that would work today.
But that is/was special to what I package with DPDK 2.2 + OVS 2.5 as of
today - and therefore was the right place for a fast interim fix for me.
I totally agree that the A in EAL was meant for abstraction and we might
want to avoid vhost specific things in there that in the long run.
I like your suggestion of a new API as a proper long term solution, but I
don't feel deeply enough involved yet on the API level to give it any
judgement.
So I look forward for more opinions on it.
P.S. the patch bot hit me hard with 2 pages of space/bracket issues, sorry
for that - but it was only meant as RFC after all :-)
Christian Ehrhardt
Software Engineer, Ubuntu Server
Canonical Ltd
On Tue, Apr 26, 2016 at 6:16 AM, Yuanhan Liu <yuanhan.liu@linux.intel.com>
wrote:
> On Mon, Apr 25, 2016 at 11:18:16AM +0200, Christian Ehrhardt wrote:
> > The API doesn't hold a way to specify a owner/permission set for
> vhost_user
> > created sockets.
>
> Yes, it's kind of like a known issue. So, thanks for bringing it, with
> a solution, for dicussion (cc'ed more people).
>
> > I don't even think an API change would make that much sense.
> >
> > Projects consuming DPDK start to do 'their own workarounds' like
> openvswitch
> > https://patchwork.ozlabs.org/patch/559043/
> > https://patchwork.ozlabs.org/patch/559045/
> > But for this specific example they are blocked/stalled behind a bigger
> > rework (https://patchwork.ozlabs.org/patch/604898/).
> > Also one could ask why each project would need their own workaround.
> >
> > At the same time - as I want it for existing code linking against DPDK I
> > wanted to avoid changing API/ABI. That way I want to provide something
> existing
> > users could utilize. So I created a DPDK EAL commandline option based
> ideas in
> > the former patches.
> >
> > For myself I consider this a nice interim solution for existing released
> > Openvswitch+DPDK solution. And I intend to put it as delta into the DPDK
> 2.2
> > currently packaged in Ubuntu to get it working more smoothly with
> > openvswitch 2.5.
> >
> > But I'd be interested if DPDK in general would be interested in:
> > a) an approach like this?
>
> You were trying to add a vhost specific stuff as EAL command option,
> which is something we might should try to avoid.
>
> > b) would prefer a change of the API?
>
> Adding a new option to the current register API might will not work well,
> either. It gives you no ability to do a dynamic change later. I mean,
> taking OVS as an example, OVS provides you the flexible ability to do all
> kinds of configuration in a dynamic way, say number of rx queues. If we
> do the permissions setup in the register time, there would be no way to
> change it later, right?
>
> So, I'm thinking that we may could add a new API for that? It then would
> allow applications to change it at anytime.
>
> > c) consider it an issue of consuming projects and let them take care?
>
> It's not exactly an issue of consuming projects; we created the socket
> file after all.
>
> And I'd like to hear what others would say.
>
> Thanks.
>
> --yliu
>
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH 7/7] examples/vhost: switch_worker cleanup
2016-04-26 4:45 3% [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix Yuanhan Liu
@ 2016-04-26 4:45 2% ` Yuanhan Liu
2016-04-28 5:45 0% ` [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix Wang, Zhihong
[not found] ` <1462224230-19460-1-git-send-email-yuanhan.liu@linux.intel.com>
2 siblings, 0 replies; 200+ results
From: Yuanhan Liu @ 2016-04-26 4:45 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
switch_worker() is the last piece of code that is messy yet it touches
virtio/vhost device.
Here do a cleanup, so that we will be less painful for later vhost ABI
refactoring.
The cleanup is straigforward: break long lines, move some code into
functions, the last, comment a bit on switch_worker().
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
examples/vhost/main.c | 255 +++++++++++++++++++++++++++-----------------------
1 file changed, 138 insertions(+), 117 deletions(-)
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 6a69f34..96d6ab5 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -213,6 +213,8 @@ struct mbuf_table {
/* TX queue for each data core. */
struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
+#define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \
+ / US_PER_S * BURST_TX_DRAIN_US)
#define VLAN_HLEN 4
/* Per-device statistics struct */
@@ -945,16 +947,35 @@ static void virtio_tx_offload(struct rte_mbuf *m)
tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
}
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+ while (n--)
+ rte_pktmbuf_free(pkts[n]);
+}
+
+static inline void __attribute__((always_inline))
+do_drain_mbuf_table(struct mbuf_table *tx_q)
+{
+ uint16_t count;
+
+ count = rte_eth_tx_burst(ports[0], tx_q->txq_id,
+ tx_q->m_table, tx_q->len);
+ if (unlikely(count < tx_q->len))
+ free_pkts(&tx_q->m_table[count], tx_q->len - count);
+
+ tx_q->len = 0;
+}
+
/*
- * This function routes the TX packet to the correct interface. This may be a local device
- * or the physical port.
+ * This function routes the TX packet to the correct interface. This
+ * may be a local device or the physical port.
*/
static inline void __attribute__((always_inline))
virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
{
struct mbuf_table *tx_q;
- struct rte_mbuf **m_table;
- unsigned len, ret, offset = 0;
+ unsigned offset = 0;
const uint16_t lcore_id = rte_lcore_id();
struct virtio_net *dev = vdev->dev;
struct ether_hdr *nh;
@@ -990,7 +1011,6 @@ queue2nic:
/*Add packet to the port tx queue*/
tx_q = &lcore_tx_queue[lcore_id];
- len = tx_q->len;
nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) {
@@ -1028,55 +1048,132 @@ queue2nic:
if (m->ol_flags & PKT_TX_TCP_SEG)
virtio_tx_offload(m);
- tx_q->m_table[len] = m;
- len++;
+ tx_q->m_table[tx_q->len++] = m;
if (enable_stats) {
dev_statistics[dev->device_fh].tx_total++;
dev_statistics[dev->device_fh].tx++;
}
- if (unlikely(len == MAX_PKT_BURST)) {
- m_table = (struct rte_mbuf **)tx_q->m_table;
- ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);
- /* Free any buffers not handled by TX and update the port stats. */
- if (unlikely(ret < len)) {
- do {
- rte_pktmbuf_free(m_table[ret]);
- } while (++ret < len);
+ if (unlikely(tx_q->len == MAX_PKT_BURST))
+ do_drain_mbuf_table(tx_q);
+
+ return;
+}
+
+
+static inline void __attribute__((always_inline))
+drain_mbuf_table(struct mbuf_table *tx_q)
+{
+ static uint64_t prev_tsc = 0;
+ uint64_t cur_tsc;
+
+ if (tx_q->len == 0)
+ return;
+
+ cur_tsc = rte_rdtsc();
+ if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
+ prev_tsc = cur_tsc;
+
+ RTE_LOG(DEBUG, VHOST_DATA,
+ "TX queue drained after timeout with burst size %u\n",
+ tx_q->len);
+ do_drain_mbuf_table(tx_q);
+ }
+}
+
+static inline void __attribute__((always_inline))
+drain_eth_rx(struct vhost_dev *vdev)
+{
+ uint16_t rx_count, enqueue_count;
+ struct virtio_net *dev = vdev->dev;
+ struct rte_mbuf *pkts[MAX_PKT_BURST];
+
+ rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
+ pkts, MAX_PKT_BURST);
+ if (!rx_count)
+ return;
+
+ /*
+ * When "enable_retry" is set, here we wait and retry when there
+ * is no enough free slots in the queue to hold @rx_count packets,
+ * to diminish packet loss.
+ */
+ if (enable_retry &&
+ unlikely(rx_count > rte_vring_available_entries(dev,
+ VIRTIO_RXQ))) {
+ uint32_t retry;
+
+ for (retry = 0; retry < burst_rx_retry_num; retry++) {
+ rte_delay_us(burst_rx_delay_time);
+ if (rx_count <= rte_vring_available_entries(dev,
+ VIRTIO_RXQ))
+ break;
}
+ }
- len = 0;
+ enqueue_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ,
+ pkts, rx_count);
+ if (enable_stats) {
+ uint64_t fh = dev->device_fh;
+
+ rte_atomic64_add(&dev_statistics[fh].rx_total_atomic, rx_count);
+ rte_atomic64_add(&dev_statistics[fh].rx_atomic, enqueue_count);
}
- tx_q->len = len;
- return;
+ free_pkts(pkts, rx_count);
}
+
+static inline void __attribute__((always_inline))
+drain_virtio_tx(struct vhost_dev *vdev)
+{
+ struct rte_mbuf *pkts[MAX_PKT_BURST];
+ uint16_t count;
+ uint16_t i;
+
+ count = rte_vhost_dequeue_burst(vdev->dev, VIRTIO_TXQ, mbuf_pool,
+ pkts, MAX_PKT_BURST);
+
+ /* setup VMDq for the first packet */
+ if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
+ if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1)
+ free_pkts(pkts, count);
+ }
+
+ for (i = 0; i < count; ++i) {
+ virtio_tx_route(vdev, pkts[i],
+ vlan_tags[(uint16_t)vdev->dev->device_fh]);
+ }
+}
+
/*
- * This function is called by each data core. It handles all RX/TX registered with the
- * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared
- * with all devices in the main linked list.
+ * Main function of vhost-switch. It basically does:
+ *
+ * for each vhost device {
+ * - drain_eth_rx()
+ *
+ * Which drains the host eth Rx queue linked to the vhost device,
+ * and deliver all of them to guest virito Rx ring associated with
+ * this vhost device.
+ *
+ * - drain_virtio_tx()
+ *
+ * Which drains the guest virtio Tx queue and deliver all of them
+ * to the target, which could be another vhost device, or the
+ * physical eth dev. The route is done in function "virtio_tx_route".
+ * }
*/
static int
-switch_worker(__attribute__((unused)) void *arg)
+switch_worker(void *arg __rte_unused)
{
- struct virtio_net *dev = NULL;
- struct vhost_dev *vdev = NULL;
- struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned i;
+ unsigned lcore_id = rte_lcore_id();
+ struct vhost_dev *vdev;
struct mbuf_table *tx_q;
- const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
- uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
- unsigned ret, i;
- const uint16_t lcore_id = rte_lcore_id();
- const uint16_t num_cores = (uint16_t)rte_lcore_count();
- uint16_t rx_count = 0;
- uint16_t tx_count;
- uint32_t retry = 0;
RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
- prev_tsc = 0;
tx_q = &lcore_tx_queue[lcore_id];
- for (i = 0; i < num_cores; i ++) {
+ for (i = 0; i < rte_lcore_count(); i ++) {
if (lcore_ids[i] == lcore_id) {
tx_q->txq_id = i;
break;
@@ -1084,34 +1181,7 @@ switch_worker(__attribute__((unused)) void *arg)
}
while(1) {
- cur_tsc = rte_rdtsc();
- /*
- * TX burst queue drain
- */
- diff_tsc = cur_tsc - prev_tsc;
- if (unlikely(diff_tsc > drain_tsc)) {
-
- if (tx_q->len) {
- RTE_LOG(DEBUG, VHOST_DATA,
- "TX queue drained after timeout with burst size %u\n",
- tx_q->len);
-
- /*Tx any packets in the queue*/
- ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,
- (struct rte_mbuf **)tx_q->m_table,
- (uint16_t)tx_q->len);
- if (unlikely(ret < tx_q->len)) {
- do {
- rte_pktmbuf_free(tx_q->m_table[ret]);
- } while (++ret < tx_q->len);
- }
-
- tx_q->len = 0;
- }
-
- prev_tsc = cur_tsc;
-
- }
+ drain_mbuf_table(tx_q);
/*
* Inform the configuration core that we have exited the
@@ -1121,69 +1191,20 @@ switch_worker(__attribute__((unused)) void *arg)
lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL;
/*
- * Process devices
+ * Process vhost devices
*/
TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list, next) {
- uint64_t fh;
-
- dev = vdev->dev;
- fh = dev->device_fh;
-
if (unlikely(vdev->remove)) {
unlink_vmdq(vdev);
vdev->ready = DEVICE_SAFE_REMOVE;
continue;
}
- if (likely(vdev->ready == DEVICE_RX)) {
- /*Handle guest RX*/
- rx_count = rte_eth_rx_burst(ports[0],
- vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
-
- if (rx_count) {
- /*
- * Retry is enabled and the queue is full then we wait and retry to avoid packet loss
- * Here MAX_PKT_BURST must be less than virtio queue size
- */
- if (enable_retry && unlikely(rx_count > rte_vring_available_entries(dev, VIRTIO_RXQ))) {
- for (retry = 0; retry < burst_rx_retry_num; retry++) {
- rte_delay_us(burst_rx_delay_time);
- if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ))
- break;
- }
- }
- ret_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_burst, rx_count);
- if (enable_stats) {
- rte_atomic64_add(
- &dev_statistics[fh].rx_total_atomic,
- rx_count);
- rte_atomic64_add(
- &dev_statistics[fh].rx_atomic,
- ret_count);
- }
- while (likely(rx_count)) {
- rx_count--;
- rte_pktmbuf_free(pkts_burst[rx_count]);
- }
-
- }
- }
+ if (likely(vdev->ready == DEVICE_RX))
+ drain_eth_rx(vdev);
- if (likely(!vdev->remove)) {
- /* Handle guest TX*/
- tx_count = rte_vhost_dequeue_burst(dev, VIRTIO_TXQ, mbuf_pool, pkts_burst, MAX_PKT_BURST);
- /* If this is the first received packet we need to learn the MAC and setup VMDQ */
- if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) {
- if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) {
- while (tx_count)
- rte_pktmbuf_free(pkts_burst[--tx_count]);
- }
- }
- for (i = 0; i < tx_count; ++i) {
- virtio_tx_route(vdev, pkts_burst[i],
- vlan_tags[(uint16_t)dev->device_fh]);
- }
- }
+ if (likely(!vdev->remove))
+ drain_virtio_tx(vdev);
}
}
--
1.9.0
^ permalink raw reply [relevance 2%]
* [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix
@ 2016-04-26 4:45 3% Yuanhan Liu
2016-04-26 4:45 2% ` [dpdk-dev] [PATCH 7/7] examples/vhost: switch_worker cleanup Yuanhan Liu
` (2 more replies)
0 siblings, 3 replies; 200+ results
From: Yuanhan Liu @ 2016-04-26 4:45 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
I'm starting to work on the vhost ABI refactoring, that I also have to
touch the vhost example code, to make it work. The vhost example code,
however, is very messy, full of __very__ long lines. This would make
a later diff to apply the new vhost API be very ugly, therefore, not
friendly for review. This is how this cleanup comes.
Besides that, there is one enhancement patch, which handles the broadcast
packets so that we could rely the ARP request packet, to let vhost-switch
be more like a real switch. There is another patch that (hopefully) would
fix the mbuf allocation failure ultimately. I also added some guidelines
there as comments to show how to count how many mbuf entries is enough for
our usage.
---
Yuanhan Liu (7):
examples/vhost: remove the non-working zero copy code
examples/vhost: remove unused macro and struct
examples/vhost: use tailq to link vhost devices
examples/vhost: use mac compare helper function directly
examples/vhost: handle broadcast packet
examples/vhost: fix mbuf allocation failures
examples/vhost: switch_worker cleanup
doc/guides/sample_app_ug/vhost.rst | 36 +-
examples/vhost/main.c | 2319 ++++++------------------------------
examples/vhost/main.h | 49 +-
3 files changed, 375 insertions(+), 2029 deletions(-)
--
1.9.0
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [RFC] eal: provide option to set vhost_user socket owner/permissions
2016-04-25 9:18 2% [dpdk-dev] [RFC] eal: provide option to set vhost_user socket owner/permissions Christian Ehrhardt
@ 2016-04-26 4:16 0% ` Yuanhan Liu
2016-04-26 7:24 0% ` Christian Ehrhardt
0 siblings, 1 reply; 200+ results
From: Yuanhan Liu @ 2016-04-26 4:16 UTC (permalink / raw)
To: Christian Ehrhardt
Cc: Aaron Conole, dev, Xie, Huawei, Thomas Monjalon, David Marchand,
Panu Matilainen
On Mon, Apr 25, 2016 at 11:18:16AM +0200, Christian Ehrhardt wrote:
> The API doesn't hold a way to specify a owner/permission set for vhost_user
> created sockets.
Yes, it's kind of like a known issue. So, thanks for bringing it, with
a solution, for dicussion (cc'ed more people).
> I don't even think an API change would make that much sense.
>
> Projects consuming DPDK start to do 'their own workarounds' like openvswitch
> https://patchwork.ozlabs.org/patch/559043/
> https://patchwork.ozlabs.org/patch/559045/
> But for this specific example they are blocked/stalled behind a bigger
> rework (https://patchwork.ozlabs.org/patch/604898/).
> Also one could ask why each project would need their own workaround.
>
> At the same time - as I want it for existing code linking against DPDK I
> wanted to avoid changing API/ABI. That way I want to provide something existing
> users could utilize. So I created a DPDK EAL commandline option based ideas in
> the former patches.
>
> For myself I consider this a nice interim solution for existing released
> Openvswitch+DPDK solution. And I intend to put it as delta into the DPDK 2.2
> currently packaged in Ubuntu to get it working more smoothly with
> openvswitch 2.5.
>
> But I'd be interested if DPDK in general would be interested in:
> a) an approach like this?
You were trying to add a vhost specific stuff as EAL command option,
which is something we might should try to avoid.
> b) would prefer a change of the API?
Adding a new option to the current register API might will not work well,
either. It gives you no ability to do a dynamic change later. I mean,
taking OVS as an example, OVS provides you the flexible ability to do all
kinds of configuration in a dynamic way, say number of rx queues. If we
do the permissions setup in the register time, there would be no way to
change it later, right?
So, I'm thinking that we may could add a new API for that? It then would
allow applications to change it at anytime.
> c) consider it an issue of consuming projects and let them take care?
It's not exactly an issue of consuming projects; we created the socket
file after all.
And I'd like to hear what others would say.
Thanks.
--yliu
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [RFC] eal: provide option to set vhost_user socket owner/permissions
@ 2016-04-25 9:18 2% Christian Ehrhardt
2016-04-26 4:16 0% ` Yuanhan Liu
0 siblings, 1 reply; 200+ results
From: Christian Ehrhardt @ 2016-04-25 9:18 UTC (permalink / raw)
To: christian.ehrhardt, Aaron Conole, dev
The API doesn't hold a way to specify a owner/permission set for vhost_user
created sockets. I don't even think an API change would make that much sense.
Projects consuming DPDK start to do 'their own workarounds' like openvswitch
https://patchwork.ozlabs.org/patch/559043/
https://patchwork.ozlabs.org/patch/559045/
But for this specific example they are blocked/stalled behind a bigger
rework (https://patchwork.ozlabs.org/patch/604898/).
Also one could ask why each project would need their own workaround.
At the same time - as I want it for existing code linking against DPDK I
wanted to avoid changing API/ABI. That way I want to provide something existing
users could utilize. So I created a DPDK EAL commandline option based ideas in
the former patches.
For myself I consider this a nice interim solution for existing released
Openvswitch+DPDK solution. And I intend to put it as delta into the DPDK 2.2
currently packaged in Ubuntu to get it working more smoothly with
openvswitch 2.5.
But I'd be interested if DPDK in general would be interested in:
a) an approach like this?
b) would prefer a change of the API?
c) consider it an issue of consuming projects and let them take care?
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
---
doc/guides/testpmd_app_ug/run_app.rst | 19 +++
lib/librte_eal/common/eal_common_options.c | 4 +
lib/librte_eal/common/eal_internal_cfg.h | 2 +
lib/librte_eal/common/eal_options.h | 4 +
lib/librte_eal/common/include/rte_eal.h | 5 +
lib/librte_eal/linuxapp/eal/eal.c | 182 +++++++++++++++++++++++++++
lib/librte_vhost/vhost_user/vhost-net-user.c | 4 +
7 files changed, 220 insertions(+)
diff --git a/doc/guides/testpmd_app_ug/run_app.rst b/doc/guides/testpmd_app_ug/run_app.rst
index f605564..24c9c01 100644
--- a/doc/guides/testpmd_app_ug/run_app.rst
+++ b/doc/guides/testpmd_app_ug/run_app.rst
@@ -156,6 +156,25 @@ See the DPDK Getting Started Guides for more information on these options.
Use malloc instead of hugetlbfs.
+* ``--vhost-owner``
+
+ When creating vhost_user sockets change owner and group to the specified value.
+ This can be given as ``user:group``, but also only ``user`` or ``:group`` are supported.
+
+ Examples::
+
+ --vhost-owner 'libvirt-qemu:kvm'
+ --vhost-owner 'libvirt-qemu'
+ --vhost-owner ':kvm'
+
+* ``--vhost-perm``
+
+ When creating vhost_user sockets set them up with these permissions.
+
+ For example::
+
+ --vhost-perm '0664'
+
Testpmd Command-line Options
----------------------------
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 2b418d5..073198b 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -95,6 +95,8 @@ eal_long_options[] = {
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
{OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
{OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
+ {OPT_VHOST_OWNER, 1, NULL, OPT_VHOST_OWNER_NUM },
+ {OPT_VHOST_PERM, 1, NULL, OPT_VHOST_PERM_NUM },
{0, 0, NULL, 0 }
};
@@ -153,6 +155,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
#endif
internal_cfg->vmware_tsc_map = 0;
internal_cfg->create_uio_dev = 0;
+ internal_cfg->vhost_sock_owner = NULL;
+ internal_cfg->vhost_sock_perm = NULL;
}
static int
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 5f1367e..bdf34e3 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -83,6 +83,8 @@ struct internal_config {
volatile enum rte_intr_mode vfio_intr_mode;
const char *hugefile_prefix; /**< the base filename of hugetlbfs files */
const char *hugepage_dir; /**< specific hugetlbfs directory to use */
+ const char *vhost_sock_owner; /**< owner:group of vhost_user sockets */
+ const char *vhost_sock_perm; /**< permissions of vhost_user sockets */
unsigned num_hugepage_sizes; /**< how many sizes on this system */
struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index a881c62..1161083 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -83,6 +83,10 @@ enum {
OPT_VMWARE_TSC_MAP_NUM,
#define OPT_XEN_DOM0 "xen-dom0"
OPT_XEN_DOM0_NUM,
+#define OPT_VHOST_OWNER "vhost-owner"
+ OPT_VHOST_OWNER_NUM,
+#define OPT_VHOST_PERM "vhost-perm"
+ OPT_VHOST_PERM_NUM,
OPT_LONG_MAX_NUM
};
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index a71d6f5..506cf24 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -252,6 +252,11 @@ static inline int rte_gettid(void)
return RTE_PER_LCORE(_thread_id);
}
+/**
+ * Set owner/permissions on sockets if requested on EAL commandline
+ */
+void rte_eal_set_socket_permissions(const char *);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8aafd51..3d0b709 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -53,6 +53,9 @@
#if defined(RTE_ARCH_X86)
#include <sys/io.h>
#endif
+#include <sys/types.h>
+#include <pwd.h>
+#include <grp.h>
#include <rte_common.h>
#include <rte_debug.h>
@@ -343,6 +346,8 @@ eal_usage(const char *prgname)
" --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n"
" --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n"
" --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n"
+ " --"OPT_VHOST_OWNER" Create vhost-user sockets with this owner:group\n"
+ " --"OPT_VHOST_PERM" Create vhost-user sockets with these permissions\n"
"\n");
/* Allow the application to print its usage message too if hook is set */
if ( rte_application_usage_hook ) {
@@ -618,6 +623,14 @@ eal_parse_args(int argc, char **argv)
internal_config.create_uio_dev = 1;
break;
+ case OPT_VHOST_OWNER_NUM:
+ internal_config.vhost_sock_owner = optarg;
+ break;
+
+ case OPT_VHOST_PERM_NUM:
+ internal_config.vhost_sock_perm = optarg;
+ break;
+
default:
if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
RTE_LOG(ERR, EAL, "Option %c is not supported "
@@ -934,3 +947,172 @@ rte_eal_check_module(const char *module_name)
/* Module has been found */
return 1;
}
+
+/* Try to double the size of '*buf', return true
+ * if successful, and '*sizep' will be updated with
+ * the new size. Otherwise, return false. */
+static int
+enlarge_buffer(char **buf, size_t *sizep)
+{
+ size_t newsize = *sizep * 2;
+
+ if (newsize > *sizep) {
+ *buf = realloc(*buf, newsize);
+ *sizep = newsize;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+get_owners_from_str(const char *user_spec, uid_t *uid, gid_t *gid)
+{
+ size_t bufsize = 4096;
+
+ char *pos = strchr(user_spec, ':');
+ user_spec += strspn(user_spec, " \t\r\n");
+ size_t len = pos ? (size_t)(pos - user_spec) : strlen(user_spec);
+
+ char *buf = NULL;
+ struct passwd pwd, *res;
+ int e;
+
+ buf = malloc(bufsize);
+ char *user_search = NULL;
+ if (len) {
+ user_search = malloc(len + 1);
+ memcpy(user_search, user_spec, len);
+ user_search[len] = '\0';
+ while ((e = getpwnam_r(user_search, &pwd, buf, bufsize, &res)) == ERANGE) {
+ if (!enlarge_buffer(&buf, &bufsize)) {
+ break;
+ }
+ }
+
+ if (e != 0) {
+ RTE_LOG(ERR, EAL,"Failed to retrive user %s's uid (%s), aborting.",
+ user_search, strerror(e));
+ goto release;
+ }
+ if (res == NULL) {
+ RTE_LOG(ERR, EAL,"user %s not found, aborting.",
+ user_search);
+ e = -1;
+ goto release;
+ }
+ } else {
+ /* User name is not specified, use current user. */
+ while ((e = getpwuid_r(getuid(), &pwd, buf, bufsize, &res)) == ERANGE) {
+ if (!enlarge_buffer(&buf, &bufsize)) {
+ break;
+ }
+ }
+
+ if (e != 0) {
+ RTE_LOG(ERR, EAL,"Failed to retrive current user's uid "
+ "(%s), aborting.", strerror(e));
+ goto release;
+ }
+ user_search = strdup(pwd.pw_name);
+ }
+
+ if (uid)
+ *uid = pwd.pw_uid;
+
+ free(buf);
+ buf = NULL;
+
+ if (pos) {
+ char *grpstr = pos + 1;
+ grpstr += strspn(grpstr, " \t\r\n");
+
+ if (*grpstr) {
+ struct group grp, *res;
+
+ bufsize = 4096;
+ buf = malloc(bufsize);
+ while ((e = getgrnam_r(grpstr, &grp, buf, bufsize, &res))
+ == ERANGE) {
+ if (!enlarge_buffer(&buf, &bufsize)) {
+ break;
+ }
+ }
+
+ if (e) {
+ RTE_LOG(ERR, EAL,"Failed to get group entry for %s, "
+ "(%s), aborting.", grpstr,
+ strerror(e));
+ goto release;
+ }
+ if (res == NULL) {
+ RTE_LOG(ERR, EAL,"Group %s not found, aborting.",
+ grpstr);
+ e = -1;
+ goto release;
+ }
+
+ if (gid)
+ *gid = grp.gr_gid;
+ }
+ }
+
+ release:
+ free(buf);
+ free(user_search);
+ return e;
+}
+
+static void
+vhost_set_permissions(const char *vhost_sock_location)
+{
+ unsigned long int mode = strtoul(internal_config.vhost_sock_perm, NULL, 0);
+ int err = chmod(vhost_sock_location, (mode_t)mode);
+ if (err) {
+ RTE_LOG(ERR, EAL,"vhost-user socket cannot set"
+ " permissions to %s (%s).\n",
+ internal_config.vhost_sock_perm, strerror(err));
+ return;
+ }
+ RTE_LOG(INFO, EAL,"Socket %s changed permissions"
+ " to %s\n", vhost_sock_location,
+ internal_config.vhost_sock_perm);
+}
+
+static void
+vhost_set_ownership(const char *vhost_sock_location)
+{
+ uid_t vhuid=0;
+ gid_t vhgid=0;
+
+ if (get_owners_from_str(internal_config.vhost_sock_owner, &vhuid, &vhgid)) {
+ RTE_LOG(ERR, EAL,"vhost-user socket unable to get"
+ " specified user/group: %s\n",
+ internal_config.vhost_sock_owner);
+ return;
+ }
+
+ int err = chown(vhost_sock_location, vhuid, vhgid);
+ if (err) {
+ RTE_LOG(ERR, EAL,"vhost-user socket unable to set"
+ " ownership to %s (%s).\n",
+ internal_config.vhost_sock_owner, strerror(err));
+ return;
+ }
+
+ RTE_LOG(INFO, EAL,"Socket %s changed ownership"
+ " to %s.\n", vhost_sock_location,
+ internal_config.vhost_sock_owner);
+}
+
+void
+rte_eal_set_socket_permissions(const char *path)
+{
+ if (internal_config.vhost_sock_perm) {
+ vhost_set_permissions(path);
+ }
+
+ if (internal_config.vhost_sock_owner) {
+ vhost_set_ownership(path);
+ }
+}
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index df2bd64..ea8dee9 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -51,6 +51,8 @@
#include "vhost-net.h"
#include "virtio-net-user.h"
+#include <rte_eal.h>
+
#define MAX_VIRTIO_BACKLOG 128
static void vserver_new_vq_conn(int fd, void *data, int *remove);
@@ -476,6 +478,8 @@ rte_vhost_driver_register(const char *path)
return -1;
}
+ rte_eal_set_socket_permissions(path);
+
vserver->path = strdup(path);
fdset_add(&g_vhost_server.fdset, vserver->listenfd,
--
2.7.4
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from xstats
2016-04-15 14:44 3% [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from xstats Remy Horton
@ 2016-04-20 16:03 0% ` David Harton (dharton)
2016-04-28 14:56 0% ` Tahhan, Maryam
2016-04-29 12:52 0% ` David Harton (dharton)
1 sibling, 1 reply; 200+ results
From: David Harton (dharton) @ 2016-04-20 16:03 UTC (permalink / raw)
To: Remy Horton, dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Remy Horton
> Sent: Friday, April 15, 2016 10:44 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from
> xstats
>
> The current extended ethernet statistics fetching involve doing several
> string operations, which causes performance issues if there are lots of
> statistics and/or network interfaces. This RFC patchset changes the API
> for xstats to use integer identifiers instead of strings and implements
> this new API for the ixgbe driver. Others drivers to follow.
>
> --
>
> Since this will involve API & ABI breakage as previously advertised, there
> are several design assumptions that need consideration:
>
> *) id-name & id-value pairs for both lookup and query Permits out-of-order
> and non-contigious returning of names/ids/values, even though expected
> implmentations would in practice return items in sorted order by id. Is
> this sufficent/desirable future proofing? Idea is to allow possibility of
> drivers returning partial statistics.
I believe forcing drivers to match to a common id-space will become
burdensome. If the stats id-space isn't common then matching strings is
probably just as sufficient as long as drivers don't add/remove stats
ad hoc between the time the device is initialized and removed.
>
> *) Bulk name-id mapping lookup only
> At the moment individual lookup is not supported, as this would impose
> extra overheads on drivers. The assumption is that any end user would
> fetch all this data once on startup and then cache the mappings.
I'm not sure I see the value of looking up a single stat from a user
perspective. I can see where the drivers might say that some stats
are less disruptive/etc but the user doesn't have that knowledge and
wouldn't know how to take advantage. Usually all stats are grabbed
multiple times and the changes noted during debug sessions.
>
> *) Replacement or additional API
> This patch replaces the current xstats API, but there is no inherant
> reason beyond maintainability why this funtionality could not be in
> addition rather than a replacement. What is consensus on this?
I came to the conclusion that replacing the existing API isn't necessary
but rather extending it so backwards compatibility could be maintained
during the previous discussions on this topic. However, if we want to
go forward with cleaning up in order to reduce the support drivers
provide I'm all for it.
I still believe the API we develop should follow an "ethtool stats like"
format as suggested earlier this year:
extern int rte_eth_xstats_names_get(uint8_t port_id,
struct rte_eth_xstats_name *names, unsigned n);
extern int rte_eth_xstats_values_get(uint8_t port_id,
uint64_t *values, unsigned n);
Again, these could be provided alongside the existing API or replace it.
I also like the idea you provided of a separate API to obtain the
xstats count rather than deriving the count by calling one of the
above functions with "dummy" values.
Again, I can provide the patches for the changes I've made that align
with this proposed API. I just never got any feedback on it when
requested previously.
Regards,
Dave
>
> Comments welcome.
>
> Remy Horton (3):
> rte: change xstats to use integer keys
> drivers/net/ixgbe: change xstats to use integer keys
> examples/ethtool: add xstats display command
>
> drivers/net/ixgbe/ixgbe_ethdev.c | 87
> +++++++++++++++++++++++++++++++----
> examples/ethtool/ethtool-app/ethapp.c | 57 +++++++++++++++++++++++
> lib/librte_ether/rte_ethdev.c | 87
> +++++++++++++++++++++++++++++++----
> lib/librte_ether/rte_ethdev.h | 38 +++++++++++++++
> 4 files changed, 252 insertions(+), 17 deletions(-)
>
> --
> 2.5.5
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH v2 04/17] eal: remove duplicate function declaration
@ 2016-04-20 11:44 3% ` David Marchand
0 siblings, 0 replies; 200+ results
From: David Marchand @ 2016-04-20 11:44 UTC (permalink / raw)
To: dev; +Cc: thomas.monjalon, viktorin
rte_eal_dev_init is declared in both eal_private.h and rte_dev.h since its
introduction.
This function has been exported in ABI, so remove it from eal_private.h
Fixes: e57f20e05177 ("eal: make vdev init path generic for both virtual and pci devices")
Signed-off-by: David Marchand <david.marchand@6wind.com>
---
lib/librte_eal/common/eal_private.h | 7 -------
lib/librte_eal/linuxapp/eal/eal.c | 1 +
2 files changed, 1 insertion(+), 7 deletions(-)
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 2342fa1..855fd25 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -262,13 +262,6 @@ int rte_eal_intr_init(void);
int rte_eal_alarm_init(void);
/**
- * This function initialises any virtual devices
- *
- * This function is private to the EAL.
- */
-int rte_eal_dev_init(void);
-
-/**
* Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
* etc.) loaded.
*
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8aafd51..f26f8d3 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -70,6 +70,7 @@
#include <rte_cpuflags.h>
#include <rte_interrupts.h>
#include <rte_pci.h>
+#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_common.h>
#include <rte_version.h>
--
1.9.1
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH] port: bump ABI for pcap file support
2016-04-15 10:32 4% ` Dumitrescu, Cristian
@ 2016-04-20 9:55 4% ` Thomas Monjalon
0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-04-20 9:55 UTC (permalink / raw)
To: Dumitrescu, Cristian; +Cc: Zhang, Roy Fan, dev, Singh, Jasvinder
> > Support of PCAP file has been added to rte_port in release 16.04
> > as NEXT_ABI. It is in the standard ABI of the release 16.07.
> >
> > Signed-off-by: Thomas Monjalon <thomas.monjalon@6wind.com>
>
> Acked-by: Cristian Dumitrescu <Cristian.Dumitrescu@intel.com>
Applied
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v2] ethdev: remove deprecated statistics
2016-04-19 14:03 5% [dpdk-dev] [PATCH] ethdev: remove deprecated statistics Thomas Monjalon
@ 2016-04-20 9:47 4% ` Thomas Monjalon
0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-04-20 9:47 UTC (permalink / raw)
To: maryam.tahhan, harry.van.haaren; +Cc: dev
Some statistics were deprecated since release 2.1 (49f386542af4).
The last deprecated counter to be used was imcasts.
The VF loopback statistics are also removed as they are used only
in igb and duplicated in extended statistics.
The new counters should be added to extended statistics.
Signed-off-by: Thomas Monjalon <thomas.monjalon@6wind.com>
---
v2:
- remove VF loopback stats
- remove comment in ethtool example
doc/guides/rel_notes/deprecation.rst | 4 ----
doc/guides/rel_notes/release_16_07.rst | 6 +++++-
drivers/net/bonding/rte_eth_bond_pmd.c | 1 -
drivers/net/cxgbe/cxgbe_ethdev.c | 1 -
drivers/net/e1000/igb_ethdev.c | 5 -----
drivers/net/ena/ena_ethdev.c | 2 --
drivers/net/ena/ena_ethdev.h | 1 -
drivers/net/enic/enic_main.c | 1 -
drivers/net/i40e/i40e_ethdev.c | 1 -
drivers/net/ixgbe/ixgbe_ethdev.c | 4 ----
drivers/net/nfp/nfp_net.c | 18 ------------------
drivers/net/vmxnet3/vmxnet3_ethdev.c | 1 -
examples/ethtool/ethtool-app/ethapp.c | 1 -
lib/librte_ether/Makefile | 2 +-
lib/librte_ether/rte_ethdev.h | 26 --------------------------
15 files changed, 6 insertions(+), 68 deletions(-)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index c78cde7..fffe9c7 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -26,10 +26,6 @@ Deprecation Notices
rte_pci_device. The release 16.04 does not contain these ABI changes, but
release 16.07 will.
-* The following fields have been deprecated in rte_eth_stats:
- ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
- tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff
-
* The xstats API and rte_eth_xstats struct will be changed to allow retrieval
of values without any string copies or parsing.
No backwards compatibility is planned, as it would require code duplication
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 001888f..83c841b 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -86,6 +86,10 @@ This section should contain API changes. Sample format:
* Add a short 1-2 sentence description of the API change. Use fixed width
quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
+* The following counters are removed from ``rte_eth_stats`` structure:
+ ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
+ tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+
ABI Changes
-----------
@@ -107,7 +111,7 @@ The libraries prepended with a plus sign were incremented in this version.
.. code-block:: diff
- libethdev.so.3
+ + libethdev.so.4
librte_acl.so.2
librte_cfgfile.so.2
librte_cmdline.so.2
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 54788cf..c897146 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -1836,7 +1836,6 @@ bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
stats->imissed += slave_stats.imissed;
stats->ierrors += slave_stats.ierrors;
stats->oerrors += slave_stats.oerrors;
- stats->imcasts += slave_stats.imcasts;
stats->rx_nombuf += slave_stats.rx_nombuf;
for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c
index bb134e5..04eddaf 100644
--- a/drivers/net/cxgbe/cxgbe_ethdev.c
+++ b/drivers/net/cxgbe/cxgbe_ethdev.c
@@ -656,7 +656,6 @@ static void cxgbe_dev_stats_get(struct rte_eth_dev *eth_dev,
/* RX Stats */
eth_stats->ipackets = ps.rx_frames;
eth_stats->ibytes = ps.rx_octets;
- eth_stats->imcasts = ps.rx_mcast_frames;
eth_stats->imissed = ps.rx_ovflow0 + ps.rx_ovflow1 +
ps.rx_ovflow2 + ps.rx_ovflow3 +
ps.rx_trunc0 + ps.rx_trunc1 +
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index e0053fe..f0921ee 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -1805,11 +1805,6 @@ eth_igbvf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
rte_stats->ibytes = hw_stats->gorc;
rte_stats->opackets = hw_stats->gptc;
rte_stats->obytes = hw_stats->gotc;
- rte_stats->imcasts = hw_stats->mprc;
- rte_stats->ilbpackets = hw_stats->gprlbc;
- rte_stats->ilbbytes = hw_stats->gorlbc;
- rte_stats->olbpackets = hw_stats->gptlbc;
- rte_stats->olbbytes = hw_stats->gotlbc;
}
static void
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 02af67a..e157587 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -605,7 +605,6 @@ static void ena_stats_restart(struct rte_eth_dev *dev)
rte_atomic64_init(&adapter->drv_stats->ierrors);
rte_atomic64_init(&adapter->drv_stats->oerrors);
- rte_atomic64_init(&adapter->drv_stats->imcasts);
rte_atomic64_init(&adapter->drv_stats->rx_nombuf);
}
@@ -643,7 +642,6 @@ static void ena_stats_get(struct rte_eth_dev *dev,
/* Driver related stats */
stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors);
stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors);
- stats->imcasts = rte_atomic64_read(&adapter->drv_stats->imcasts);
stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf);
}
diff --git a/drivers/net/ena/ena_ethdev.h b/drivers/net/ena/ena_ethdev.h
index ba6f01e..aca853c 100644
--- a/drivers/net/ena/ena_ethdev.h
+++ b/drivers/net/ena/ena_ethdev.h
@@ -121,7 +121,6 @@ enum ena_adapter_state {
struct ena_driver_stats {
rte_atomic64_t ierrors;
rte_atomic64_t oerrors;
- rte_atomic64_t imcasts;
rte_atomic64_t rx_nombuf;
};
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index e3da51d..60fe765 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -248,7 +248,6 @@ void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
r_stats->imissed = stats->rx.rx_drop;
- r_stats->imcasts = stats->rx.rx_multicast_frames_ok;
r_stats->rx_nombuf = stats->rx.rx_no_bufs;
}
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index bc28d3c..d8b6bd7 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -2099,7 +2099,6 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
stats->obytes = ns->eth.tx_bytes;
stats->oerrors = ns->eth.tx_errors +
pf->main_vsi->eth_stats.tx_errors;
- stats->imcasts = pf->main_vsi->eth_stats.rx_multicast;
/* Rx Errors */
stats->imissed = ns->eth.rx_discards +
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 3f1ebc1..eec607c 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -2852,8 +2852,6 @@ ixgbevf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
stats->ibytes = hw_stats->vfgorc;
stats->opackets = hw_stats->vfgptc;
stats->obytes = hw_stats->vfgotc;
- stats->imcasts = hw_stats->vfmprc;
- /* stats->imcasts should be removed as imcasts is deprecated */
}
static void
@@ -2870,8 +2868,6 @@ ixgbevf_dev_stats_reset(struct rte_eth_dev *dev)
hw_stats->vfgorc = 0;
hw_stats->vfgptc = 0;
hw_stats->vfgotc = 0;
- hw_stats->vfmprc = 0;
-
}
static void
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index bcf5fa9..bc0a3d8 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -902,11 +902,6 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
nfp_dev_stats.obytes -= hw->eth_stats_base.obytes;
- nfp_dev_stats.imcasts =
- nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
- nfp_dev_stats.imcasts -= hw->eth_stats_base.imcasts;
-
/* reading general device stats */
nfp_dev_stats.ierrors =
nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
@@ -918,12 +913,6 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
nfp_dev_stats.oerrors -= hw->eth_stats_base.oerrors;
- /* Multicast frames received */
- nfp_dev_stats.imcasts =
- nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
- nfp_dev_stats.imcasts -= hw->eth_stats_base.imcasts;
-
/* RX ring mbuf allocation failures */
nfp_dev_stats.rx_nombuf = dev->data->rx_mbuf_alloc_failed;
@@ -985,9 +974,6 @@ nfp_net_stats_reset(struct rte_eth_dev *dev)
hw->eth_stats_base.obytes =
nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_OCTETS);
- hw->eth_stats_base.imcasts =
- nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
/* reading general device stats */
hw->eth_stats_base.ierrors =
nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
@@ -995,10 +981,6 @@ nfp_net_stats_reset(struct rte_eth_dev *dev)
hw->eth_stats_base.oerrors =
nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_ERRORS);
- /* Multicast frames received */
- hw->eth_stats_base.imcasts =
- nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
/* RX ring mbuf allocation failures */
dev->data->rx_mbuf_alloc_failed = 0;
diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index bd7a2bb..29b469c 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -694,7 +694,6 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
stats->q_errors[i] = rxStats->pktsRxError;
stats->ierrors += rxStats->pktsRxError;
- stats->imcasts += rxStats->mcastPktsRxOK;
stats->rx_nombuf += rxStats->pktsRxOutOfBuf;
}
}
diff --git a/examples/ethtool/ethtool-app/ethapp.c b/examples/ethtool/ethtool-app/ethapp.c
index 2ed4796..38e466c 100644
--- a/examples/ethtool/ethtool-app/ethapp.c
+++ b/examples/ethtool/ethtool-app/ethapp.c
@@ -535,7 +535,6 @@ static void pcmd_portstats_callback(__rte_unused void *ptr_params,
}
stat = rte_ethtool_net_get_stats64(params->port, &stat_info);
if (stat == 0) {
- /* Most of rte_eth_stats is deprecated.. */
printf("Port %i stats\n", params->port);
printf(" In: %" PRIu64 " (%" PRIu64 " bytes)\n"
" Out: %"PRIu64" (%"PRIu64 " bytes)\n"
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
index e810284..0bb5dc9 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ether/Makefile
@@ -41,7 +41,7 @@ CFLAGS += $(WERROR_FLAGS)
EXPORT_MAP := rte_ether_version.map
-LIBABIVER := 3
+LIBABIVER := 4
SRCS-y += rte_ethdev.c
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 022733e..2757510 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -200,27 +200,9 @@ struct rte_eth_stats {
/**< Total of RX packets dropped by the HW,
* because there are no available mbufs (i.e. RX queues are full).
*/
- uint64_t ibadcrc __rte_deprecated;
- /**< Deprecated; Total of RX packets with CRC error. */
- uint64_t ibadlen __rte_deprecated;
- /**< Deprecated; Total of RX packets with bad length. */
uint64_t ierrors; /**< Total number of erroneous received packets. */
uint64_t oerrors; /**< Total number of failed transmitted packets. */
- uint64_t imcasts;
- /**< Deprecated; Total number of multicast received packets. */
uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */
- uint64_t fdirmatch __rte_deprecated;
- /**< Deprecated; Total number of RX packets matching a filter. */
- uint64_t fdirmiss __rte_deprecated;
- /**< Deprecated; Total number of RX packets not matching any filter. */
- uint64_t tx_pause_xon __rte_deprecated;
- /**< Deprecated; Total nb. of XON pause frame sent. */
- uint64_t rx_pause_xon __rte_deprecated;
- /**< Deprecated; Total nb. of XON pause frame received. */
- uint64_t tx_pause_xoff __rte_deprecated;
- /**< Deprecated; Total nb. of XOFF pause frame sent. */
- uint64_t rx_pause_xoff __rte_deprecated;
- /**< Deprecated; Total nb. of XOFF pause frame received. */
uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
/**< Total number of queue RX packets. */
uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
@@ -231,14 +213,6 @@ struct rte_eth_stats {
/**< Total number of successfully transmitted queue bytes. */
uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS];
/**< Total number of queue packets received that are dropped. */
- uint64_t ilbpackets;
- /**< Total number of good packets received from loopback,VF Only */
- uint64_t olbpackets;
- /**< Total number of good packets transmitted to loopback,VF Only */
- uint64_t ilbbytes;
- /**< Total number of good bytes received from loopback,VF Only */
- uint64_t olbbytes;
- /**< Total number of good bytes transmitted to loopback,VF Only */
};
/**
--
2.7.0
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] perfomance of rte_lpm rule subsystem
2016-04-19 15:46 3% ` Stephen Hemminger
@ 2016-04-19 20:46 0% ` Vladimir Medvedkin
2016-05-02 19:38 0% ` Александр Киселев
1 sibling, 0 replies; 200+ results
From: Vladimir Medvedkin @ 2016-04-19 20:46 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Александр
Киселев,
dev
Hi Alexander,
Why next_hop is 64 bit long?
2016-04-19 18:46 GMT+03:00 Stephen Hemminger <stephen@networkplumber.org>:
> On Tue, 19 Apr 2016 14:11:11 +0300
> Александр Киселев <kiselev99@gmail.com> wrote:
>
> > Hi.
> >
> > Doing some test with rte_lpm (adding/deleting bgp full table rules) I
> > noticed that
> > rule subsystem is very slow even considering that probably it was never
> > designed for using
> > in a data forwarding plane. So I want to propose some changes to the
> "rule"
> > subsystem.
> >
> > I reimplemented rule part ot the lib using rte_hash, and perfomance of
> > adding/deleted routes have increased dramatically.
> > If increasing speed of adding deleting routes makes sence for anybody
> else
> > I would like to discuss my patch.
> > The patch also include changes that make next_hop 64 bit, so please just
> > ignore them. The rule changes are in the following
> > functions only:
> >
> > rte_lpm2_create
> >
> > rule_find
> > rule_add
> > rule_delete
> > find_previous_rule
> > delete_depth_small
> > delete_depth_big
> >
> > rte_lpm2_add
> > rte_lpm2_delete
> > rte_lpm2_is_rule_present
> > rte_lpm2_delete_all
> >
>
> We forked LPM back several versions ago.
> I sent the patches to use BSD red-black tree for rules but the patches were
> ignored. mostly because it broke ABI.
>
--
Regards,
Vladimir
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] perfomance of rte_lpm rule subsystem
@ 2016-04-19 15:46 3% ` Stephen Hemminger
2016-04-19 20:46 0% ` Vladimir Medvedkin
2016-05-02 19:38 0% ` Александр Киселев
0 siblings, 2 replies; 200+ results
From: Stephen Hemminger @ 2016-04-19 15:46 UTC (permalink / raw)
To: Александр
Киселев
Cc: dev
On Tue, 19 Apr 2016 14:11:11 +0300
Александр Киселев <kiselev99@gmail.com> wrote:
> Hi.
>
> Doing some test with rte_lpm (adding/deleting bgp full table rules) I
> noticed that
> rule subsystem is very slow even considering that probably it was never
> designed for using
> in a data forwarding plane. So I want to propose some changes to the "rule"
> subsystem.
>
> I reimplemented rule part ot the lib using rte_hash, and perfomance of
> adding/deleted routes have increased dramatically.
> If increasing speed of adding deleting routes makes sence for anybody else
> I would like to discuss my patch.
> The patch also include changes that make next_hop 64 bit, so please just
> ignore them. The rule changes are in the following
> functions only:
>
> rte_lpm2_create
>
> rule_find
> rule_add
> rule_delete
> find_previous_rule
> delete_depth_small
> delete_depth_big
>
> rte_lpm2_add
> rte_lpm2_delete
> rte_lpm2_is_rule_present
> rte_lpm2_delete_all
>
We forked LPM back several versions ago.
I sent the patches to use BSD red-black tree for rules but the patches were
ignored. mostly because it broke ABI.
^ permalink raw reply [relevance 3%]
* [dpdk-dev] [PATCH] ethdev: remove deprecated statistics
@ 2016-04-19 14:03 5% Thomas Monjalon
2016-04-20 9:47 4% ` [dpdk-dev] [PATCH v2] " Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-04-19 14:03 UTC (permalink / raw)
To: maryam.tahhan, harry.van.haaren; +Cc: dev
Some statistics were deprecated since release 2.1 (49f386542af4).
The last deprecated counter to be used was imcasts.
The new counters should be added to extended statistics.
Signed-off-by: Thomas Monjalon <thomas.monjalon@6wind.com>
---
doc/guides/rel_notes/deprecation.rst | 4 ----
doc/guides/rel_notes/release_16_07.rst | 6 +++++-
drivers/net/bonding/rte_eth_bond_pmd.c | 1 -
drivers/net/cxgbe/cxgbe_ethdev.c | 1 -
drivers/net/e1000/igb_ethdev.c | 1 -
drivers/net/ena/ena_ethdev.c | 2 --
drivers/net/ena/ena_ethdev.h | 1 -
drivers/net/enic/enic_main.c | 1 -
drivers/net/i40e/i40e_ethdev.c | 1 -
drivers/net/ixgbe/ixgbe_ethdev.c | 4 ----
drivers/net/nfp/nfp_net.c | 18 ------------------
drivers/net/vmxnet3/vmxnet3_ethdev.c | 1 -
lib/librte_ether/Makefile | 2 +-
lib/librte_ether/rte_ethdev.h | 18 ------------------
14 files changed, 6 insertions(+), 55 deletions(-)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index c78cde7..fffe9c7 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -26,10 +26,6 @@ Deprecation Notices
rte_pci_device. The release 16.04 does not contain these ABI changes, but
release 16.07 will.
-* The following fields have been deprecated in rte_eth_stats:
- ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
- tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff
-
* The xstats API and rte_eth_xstats struct will be changed to allow retrieval
of values without any string copies or parsing.
No backwards compatibility is planned, as it would require code duplication
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 001888f..83c841b 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -86,6 +86,10 @@ This section should contain API changes. Sample format:
* Add a short 1-2 sentence description of the API change. Use fixed width
quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
+* The following counters are removed from ``rte_eth_stats`` structure:
+ ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
+ tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
+
ABI Changes
-----------
@@ -107,7 +111,7 @@ The libraries prepended with a plus sign were incremented in this version.
.. code-block:: diff
- libethdev.so.3
+ + libethdev.so.4
librte_acl.so.2
librte_cfgfile.so.2
librte_cmdline.so.2
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 54788cf..c897146 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -1836,7 +1836,6 @@ bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
stats->imissed += slave_stats.imissed;
stats->ierrors += slave_stats.ierrors;
stats->oerrors += slave_stats.oerrors;
- stats->imcasts += slave_stats.imcasts;
stats->rx_nombuf += slave_stats.rx_nombuf;
for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c
index bb134e5..04eddaf 100644
--- a/drivers/net/cxgbe/cxgbe_ethdev.c
+++ b/drivers/net/cxgbe/cxgbe_ethdev.c
@@ -656,7 +656,6 @@ static void cxgbe_dev_stats_get(struct rte_eth_dev *eth_dev,
/* RX Stats */
eth_stats->ipackets = ps.rx_frames;
eth_stats->ibytes = ps.rx_octets;
- eth_stats->imcasts = ps.rx_mcast_frames;
eth_stats->imissed = ps.rx_ovflow0 + ps.rx_ovflow1 +
ps.rx_ovflow2 + ps.rx_ovflow3 +
ps.rx_trunc0 + ps.rx_trunc1 +
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index e0053fe..e7682da 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -1805,7 +1805,6 @@ eth_igbvf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
rte_stats->ibytes = hw_stats->gorc;
rte_stats->opackets = hw_stats->gptc;
rte_stats->obytes = hw_stats->gotc;
- rte_stats->imcasts = hw_stats->mprc;
rte_stats->ilbpackets = hw_stats->gprlbc;
rte_stats->ilbbytes = hw_stats->gorlbc;
rte_stats->olbpackets = hw_stats->gptlbc;
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 02af67a..e157587 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -605,7 +605,6 @@ static void ena_stats_restart(struct rte_eth_dev *dev)
rte_atomic64_init(&adapter->drv_stats->ierrors);
rte_atomic64_init(&adapter->drv_stats->oerrors);
- rte_atomic64_init(&adapter->drv_stats->imcasts);
rte_atomic64_init(&adapter->drv_stats->rx_nombuf);
}
@@ -643,7 +642,6 @@ static void ena_stats_get(struct rte_eth_dev *dev,
/* Driver related stats */
stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors);
stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors);
- stats->imcasts = rte_atomic64_read(&adapter->drv_stats->imcasts);
stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf);
}
diff --git a/drivers/net/ena/ena_ethdev.h b/drivers/net/ena/ena_ethdev.h
index ba6f01e..aca853c 100644
--- a/drivers/net/ena/ena_ethdev.h
+++ b/drivers/net/ena/ena_ethdev.h
@@ -121,7 +121,6 @@ enum ena_adapter_state {
struct ena_driver_stats {
rte_atomic64_t ierrors;
rte_atomic64_t oerrors;
- rte_atomic64_t imcasts;
rte_atomic64_t rx_nombuf;
};
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index e3da51d..60fe765 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -248,7 +248,6 @@ void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
r_stats->imissed = stats->rx.rx_drop;
- r_stats->imcasts = stats->rx.rx_multicast_frames_ok;
r_stats->rx_nombuf = stats->rx.rx_no_bufs;
}
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index bc28d3c..d8b6bd7 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -2099,7 +2099,6 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
stats->obytes = ns->eth.tx_bytes;
stats->oerrors = ns->eth.tx_errors +
pf->main_vsi->eth_stats.tx_errors;
- stats->imcasts = pf->main_vsi->eth_stats.rx_multicast;
/* Rx Errors */
stats->imissed = ns->eth.rx_discards +
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 3f1ebc1..eec607c 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -2852,8 +2852,6 @@ ixgbevf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
stats->ibytes = hw_stats->vfgorc;
stats->opackets = hw_stats->vfgptc;
stats->obytes = hw_stats->vfgotc;
- stats->imcasts = hw_stats->vfmprc;
- /* stats->imcasts should be removed as imcasts is deprecated */
}
static void
@@ -2870,8 +2868,6 @@ ixgbevf_dev_stats_reset(struct rte_eth_dev *dev)
hw_stats->vfgorc = 0;
hw_stats->vfgptc = 0;
hw_stats->vfgotc = 0;
- hw_stats->vfmprc = 0;
-
}
static void
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index bcf5fa9..bc0a3d8 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -902,11 +902,6 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
nfp_dev_stats.obytes -= hw->eth_stats_base.obytes;
- nfp_dev_stats.imcasts =
- nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
- nfp_dev_stats.imcasts -= hw->eth_stats_base.imcasts;
-
/* reading general device stats */
nfp_dev_stats.ierrors =
nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
@@ -918,12 +913,6 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
nfp_dev_stats.oerrors -= hw->eth_stats_base.oerrors;
- /* Multicast frames received */
- nfp_dev_stats.imcasts =
- nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
- nfp_dev_stats.imcasts -= hw->eth_stats_base.imcasts;
-
/* RX ring mbuf allocation failures */
nfp_dev_stats.rx_nombuf = dev->data->rx_mbuf_alloc_failed;
@@ -985,9 +974,6 @@ nfp_net_stats_reset(struct rte_eth_dev *dev)
hw->eth_stats_base.obytes =
nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_OCTETS);
- hw->eth_stats_base.imcasts =
- nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
/* reading general device stats */
hw->eth_stats_base.ierrors =
nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
@@ -995,10 +981,6 @@ nfp_net_stats_reset(struct rte_eth_dev *dev)
hw->eth_stats_base.oerrors =
nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_ERRORS);
- /* Multicast frames received */
- hw->eth_stats_base.imcasts =
- nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
/* RX ring mbuf allocation failures */
dev->data->rx_mbuf_alloc_failed = 0;
diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index bd7a2bb..29b469c 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -694,7 +694,6 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
stats->q_errors[i] = rxStats->pktsRxError;
stats->ierrors += rxStats->pktsRxError;
- stats->imcasts += rxStats->mcastPktsRxOK;
stats->rx_nombuf += rxStats->pktsRxOutOfBuf;
}
}
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
index e810284..0bb5dc9 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ether/Makefile
@@ -41,7 +41,7 @@ CFLAGS += $(WERROR_FLAGS)
EXPORT_MAP := rte_ether_version.map
-LIBABIVER := 3
+LIBABIVER := 4
SRCS-y += rte_ethdev.c
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 022733e..d749980 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -200,27 +200,9 @@ struct rte_eth_stats {
/**< Total of RX packets dropped by the HW,
* because there are no available mbufs (i.e. RX queues are full).
*/
- uint64_t ibadcrc __rte_deprecated;
- /**< Deprecated; Total of RX packets with CRC error. */
- uint64_t ibadlen __rte_deprecated;
- /**< Deprecated; Total of RX packets with bad length. */
uint64_t ierrors; /**< Total number of erroneous received packets. */
uint64_t oerrors; /**< Total number of failed transmitted packets. */
- uint64_t imcasts;
- /**< Deprecated; Total number of multicast received packets. */
uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */
- uint64_t fdirmatch __rte_deprecated;
- /**< Deprecated; Total number of RX packets matching a filter. */
- uint64_t fdirmiss __rte_deprecated;
- /**< Deprecated; Total number of RX packets not matching any filter. */
- uint64_t tx_pause_xon __rte_deprecated;
- /**< Deprecated; Total nb. of XON pause frame sent. */
- uint64_t rx_pause_xon __rte_deprecated;
- /**< Deprecated; Total nb. of XON pause frame received. */
- uint64_t tx_pause_xoff __rte_deprecated;
- /**< Deprecated; Total nb. of XOFF pause frame sent. */
- uint64_t rx_pause_xoff __rte_deprecated;
- /**< Deprecated; Total nb. of XOFF pause frame received. */
uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
/**< Total number of queue RX packets. */
uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
--
2.7.0
^ permalink raw reply [relevance 5%]
* [dpdk-dev] [PATCH] mk: do not enforce any specific ARM ABI
@ 2016-04-15 22:33 15% Jan Viktorin
2016-05-02 15:47 4% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Jan Viktorin @ 2016-04-15 22:33 UTC (permalink / raw)
To: dev; +Cc: Jan Viktorin
The dpdk build system passes -mfloat-abi=softfp, which makes the build fail
when the selected ABI is EABIhf. The dpdk build system should not make
assumptions on the selected ARM ABI.
Signed-off-by: Jan Viktorin <viktorin@rehivetech.com>
Reported-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
mk/machine/armv7a/rte.vars.mk | 2 --
1 file changed, 2 deletions(-)
diff --git a/mk/machine/armv7a/rte.vars.mk b/mk/machine/armv7a/rte.vars.mk
index abdb15e..36fa3de 100644
--- a/mk/machine/armv7a/rte.vars.mk
+++ b/mk/machine/armv7a/rte.vars.mk
@@ -54,8 +54,6 @@
# CPU_LDFLAGS =
# CPU_ASFLAGS =
-CPU_CFLAGS += -mfloat-abi=softfp
-
MACHINE_CFLAGS += -march=armv7-a
ifdef CONFIG_RTE_ARCH_ARM_TUNE
--
2.8.0
^ permalink raw reply [relevance 15%]
* [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from xstats
@ 2016-04-15 14:44 3% Remy Horton
2016-04-20 16:03 0% ` David Harton (dharton)
2016-04-29 12:52 0% ` David Harton (dharton)
0 siblings, 2 replies; 200+ results
From: Remy Horton @ 2016-04-15 14:44 UTC (permalink / raw)
To: dev
The current extended ethernet statistics fetching involve doing several
string operations, which causes performance issues if there are lots of
statistics and/or network interfaces. This RFC patchset changes the API
for xstats to use integer identifiers instead of strings and implements
this new API for the ixgbe driver. Others drivers to follow.
--
Since this will involve API & ABI breakage as previously advertised,
there are several design assumptions that need consideration:
*) id-name & id-value pairs for both lookup and query
Permits out-of-order and non-contigious returning of names/ids/values,
even though expected implmentations would in practice return items in
sorted order by id. Is this sufficent/desirable future proofing? Idea
is to allow possibility of drivers returning partial statistics.
*) Bulk name-id mapping lookup only
At the moment individual lookup is not supported, as this would impose
extra overheads on drivers. The assumption is that any end user would
fetch all this data once on startup and then cache the mappings.
*) Replacement or additional API
This patch replaces the current xstats API, but there is no inherant
reason beyond maintainability why this funtionality could not be in
addition rather than a replacement. What is consensus on this?
Comments welcome.
Remy Horton (3):
rte: change xstats to use integer keys
drivers/net/ixgbe: change xstats to use integer keys
examples/ethtool: add xstats display command
drivers/net/ixgbe/ixgbe_ethdev.c | 87 +++++++++++++++++++++++++++++++----
examples/ethtool/ethtool-app/ethapp.c | 57 +++++++++++++++++++++++
lib/librte_ether/rte_ethdev.c | 87 +++++++++++++++++++++++++++++++----
lib/librte_ether/rte_ethdev.h | 38 +++++++++++++++
4 files changed, 252 insertions(+), 17 deletions(-)
--
2.5.5
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [RFC 2/2] librte_ether: add new fields to rte_eth_dev_info struct
2016-04-15 10:36 3% ` Thomas Monjalon
@ 2016-04-15 11:32 0% ` Ananyev, Konstantin
0 siblings, 0 replies; 200+ results
From: Ananyev, Konstantin @ 2016-04-15 11:32 UTC (permalink / raw)
To: Thomas Monjalon, Pattan, Reshma; +Cc: dev
Hi everyone,
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Friday, April 15, 2016 11:36 AM
> To: Pattan, Reshma
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC 2/2] librte_ether: add new fields to rte_eth_dev_info struct
>
> 2016-04-14 10:44, Reshma Pattan:
> > --- a/lib/librte_ether/rte_ethdev.h
> > +++ b/lib/librte_ether/rte_ethdev.h
> > @@ -908,6 +908,9 @@ struct rte_eth_dev_info {
> > struct rte_eth_desc_lim rx_desc_lim; /**< RX descriptors limits */
> > struct rte_eth_desc_lim tx_desc_lim; /**< TX descriptors limits */
> > uint32_t speed_capa; /**< Supported speeds bitmap (ETH_LINK_SPEED_). */
> > + /** number of queues configured by software*/
> > + uint16_t nb_rx_queues; /**< Number of RX queues. */
> > + uint16_t nb_tx_queues; /**< Number of TX queues. */
> > };
>
> I think the ethdev design is strange for these structures.
> struct rte_eth_dev is internal to be used inside the ethdev API
> or by the drivers.
> It contains struct rte_eth_dev_data which can be of interest for
> the application, except the dev_private part (which could be
> directly in struct rte_eth_dev).
>
> So the global question is: how to share the device data with the
> application?
> Instead of giving a pointer or a copy of struct rte_eth_dev_data,
> we have some different accessors:
> - rte_eth_dev_info_get() with a specific struct rte_eth_dev_info
> which gathers a lot of info, not only from struct rte_eth_dev_data
> - rte_eth_macaddr_get()
> - rte_eth_dev_socket_id()
> - rte_eth_link_get() which is more than an accessor
>
> I think having some specialized accessors is good.
> But the rte_eth_dev_info_get() looks like to be a big request
> without precise goal and going to break ABI really often.
> There are some queues informations, some (not so precise)
> offload capabilities, some steering (RSS/VMDq) informations,
> the default configuration of some Intel NIC thresholds,
> the speed capabilities, etc.
>
> Shouldn't we try to streamline this API?
I think in general it is a good idea to split dev_info into some smaller sub-pieces.
But introduce a new API just for these 2 fields seems like an overkill to me.
My vote would be to allow nb_rx/tx_queues into dev_info,
If we'll decide to split dev_info - I think it needs to be a subject for a separate
patch/discussion.
Konstantin
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [RFC 2/2] librte_ether: add new fields to rte_eth_dev_info struct
@ 2016-04-15 10:36 3% ` Thomas Monjalon
2016-04-15 11:32 0% ` Ananyev, Konstantin
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-04-15 10:36 UTC (permalink / raw)
To: Reshma Pattan; +Cc: dev
2016-04-14 10:44, Reshma Pattan:
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -908,6 +908,9 @@ struct rte_eth_dev_info {
> struct rte_eth_desc_lim rx_desc_lim; /**< RX descriptors limits */
> struct rte_eth_desc_lim tx_desc_lim; /**< TX descriptors limits */
> uint32_t speed_capa; /**< Supported speeds bitmap (ETH_LINK_SPEED_). */
> + /** number of queues configured by software*/
> + uint16_t nb_rx_queues; /**< Number of RX queues. */
> + uint16_t nb_tx_queues; /**< Number of TX queues. */
> };
I think the ethdev design is strange for these structures.
struct rte_eth_dev is internal to be used inside the ethdev API
or by the drivers.
It contains struct rte_eth_dev_data which can be of interest for
the application, except the dev_private part (which could be
directly in struct rte_eth_dev).
So the global question is: how to share the device data with the
application?
Instead of giving a pointer or a copy of struct rte_eth_dev_data,
we have some different accessors:
- rte_eth_dev_info_get() with a specific struct rte_eth_dev_info
which gathers a lot of info, not only from struct rte_eth_dev_data
- rte_eth_macaddr_get()
- rte_eth_dev_socket_id()
- rte_eth_link_get() which is more than an accessor
I think having some specialized accessors is good.
But the rte_eth_dev_info_get() looks like to be a big request
without precise goal and going to break ABI really often.
There are some queues informations, some (not so precise)
offload capabilities, some steering (RSS/VMDq) informations,
the default configuration of some Intel NIC thresholds,
the speed capabilities, etc.
Shouldn't we try to streamline this API?
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH] port: bump ABI for pcap file support
2016-04-14 18:33 25% [dpdk-dev] [PATCH] port: bump ABI for pcap file support Thomas Monjalon
@ 2016-04-15 10:32 4% ` Dumitrescu, Cristian
2016-04-20 9:55 4% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Dumitrescu, Cristian @ 2016-04-15 10:32 UTC (permalink / raw)
To: Thomas Monjalon, Zhang, Roy Fan; +Cc: dev, Singh, Jasvinder
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Thursday, April 14, 2016 7:34 PM
> To: Zhang, Roy Fan <roy.fan.zhang@intel.com>
> Cc: dev@dpdk.org; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;
> Singh, Jasvinder <jasvinder.singh@intel.com>
> Subject: [PATCH] port: bump ABI for pcap file support
>
> Support of PCAP file has been added to rte_port in release 16.04
> as NEXT_ABI. It is in the standard ABI of the release 16.07.
>
> Signed-off-by: Thomas Monjalon <thomas.monjalon@6wind.com>
> ---
> doc/guides/rel_notes/deprecation.rst | 5 -----
> doc/guides/rel_notes/release_16_07.rst | 5 ++++-
> examples/ip_pipeline/init.c | 4 ----
> lib/librte_port/Makefile | 2 +-
> lib/librte_port/rte_port_source_sink.c | 14 --------------
> lib/librte_port/rte_port_source_sink.h | 3 ---
> 6 files changed, 5 insertions(+), 28 deletions(-)
>
Acked-by: Cristian Dumitrescu <Cristian.Dumitrescu@intel.com>
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [RFC 1/2] doc: announce ABI change for rte_eth_dev_info structure
2016-04-14 9:44 9% ` [dpdk-dev] [RFC 1/2] doc: announce ABI change for " Reshma Pattan
2016-04-15 9:42 4% ` Mcnamara, John
@ 2016-04-15 10:02 8% ` Thomas Monjalon
1 sibling, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-04-15 10:02 UTC (permalink / raw)
To: Reshma Pattan; +Cc: dev
2016-04-14 10:44, Reshma Pattan:
> New fields nb_rx_queues and nb_tx_queues will be added to
> rte_eth_dev_info structure.
> Changes to API rte_eth_dev_info_get() will be done to update
> these new fields to rte_eth_dev_info object.
>
> Signed-off-by:reshma Pattan<reshma.pattan@intel.com>
In general the Signed-off lines are the same as the From: field.
Here it would be:
Signed-off-by: Reshma Pattan <reshma.pattan@intel.com>
(note the spaces and the uppercase)
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -90,3 +90,9 @@ Deprecation Notices
> a handle, like the way kernel exposes an fd to user for locating a
> specific file, and to keep all major structures internally, so that
> we are likely to be free from ABI violations in future.
> +
> +* A librte_ether public structure ``rte_eth_dev_info`` will be changed in 16.07.
> + The proposed change will add new parameters ``nb_rx_queues``, ``nb_tx_queues``
> + to the structure. These are the number of queues configured by software.
> + Modification to definition of ``rte_eth_dev_info_get()`` will be done
> + to update new parameters to ``rte_eth_dev_info`` object.
It is too late for this announce as it won't appear in the doc downloaded for
version 16.04. So it is obviously rejected.
The question here is: are you allowed to do a small ABI change given that
the ABI will be broken in this version?
I would say there can be some exceptional tolerance.
I have no strong opinion myself but maybe others will have one.
By the way, I have some comments about the patch.
^ permalink raw reply [relevance 8%]
* Re: [dpdk-dev] [RFC 1/2] doc: announce ABI change for rte_eth_dev_info structure
2016-04-14 9:44 9% ` [dpdk-dev] [RFC 1/2] doc: announce ABI change for " Reshma Pattan
@ 2016-04-15 9:42 4% ` Mcnamara, John
2016-04-15 10:02 8% ` Thomas Monjalon
1 sibling, 0 replies; 200+ results
From: Mcnamara, John @ 2016-04-15 9:42 UTC (permalink / raw)
To: Pattan, Reshma, dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Reshma Pattan
> Sent: Thursday, April 14, 2016 10:45 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [RFC 1/2] doc: announce ABI change for
> rte_eth_dev_info structure
>
> New fields nb_rx_queues and nb_tx_queues will be added to rte_eth_dev_info
> structure.
> Changes to API rte_eth_dev_info_get() will be done to update these new
> fields to rte_eth_dev_info object.
>
> Signed-off-by:reshma Pattan<reshma.pattan@intel.com>
Acked-by: John McNamara <john.mcnamara@intel.com>
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH] pci: remove deprecated specific config
@ 2016-04-14 21:33 4% Thomas Monjalon
0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-04-14 21:33 UTC (permalink / raw)
To: helin.zhang; +Cc: dev
The driver i40e was using a specific PCI config before the release 16.04.
>From 16.04, it is always enabled in i40e (commit 56465cfaf).
The API has been deprecated in the commit 68f77593823cab.
The igb_uio implementation has been deprecated in commit b7cf8e155.
The config helper - through igb_uio sysfs entries - is now removed.
Signed-off-by: Thomas Monjalon <thomas.monjalon@6wind.com>
---
config/common_base | 8 -----
doc/guides/linux_gsg/enable_func.rst | 15 ---------
doc/guides/rel_notes/deprecation.rst | 7 -----
lib/librte_eal/common/include/rte_pci.h | 14 ---------
lib/librte_eal/linuxapp/eal/eal_pci.c | 12 -------
lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 52 -------------------------------
6 files changed, 108 deletions(-)
diff --git a/config/common_base b/config/common_base
index 0124e86..1a54e4c 100644
--- a/config/common_base
+++ b/config/common_base
@@ -101,14 +101,6 @@ CONFIG_RTE_MALLOC_DEBUG=n
CONFIG_RTE_EAL_PMD_PATH=""
#
-# Special configurations in PCI Config Space for high performance
-# They are all deprecated, and will be removed later.
-#
-CONFIG_RTE_PCI_CONFIG=n
-CONFIG_RTE_PCI_EXTENDED_TAG=""
-CONFIG_RTE_PCI_MAX_READ_REQUEST_SIZE=0
-
-#
# Compile Environment Abstraction Layer to support Vmware TSC map
#
CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=y
diff --git a/doc/guides/linux_gsg/enable_func.rst b/doc/guides/linux_gsg/enable_func.rst
index 076770f..ec0e04d 100644
--- a/doc/guides/linux_gsg/enable_func.rst
+++ b/doc/guides/linux_gsg/enable_func.rst
@@ -186,21 +186,6 @@ Check with the local Intel's Network Division application engineers for firmware
The base driver to support firmware version of FVL3E will be integrated in the next
DPDK release, so currently the validated firmware version is 4.2.6.
-Enabling Extended Tag
-~~~~~~~~~~~~~~~~~~~~~
-
-PCI configuration of ``extended_tag`` has big impact on small packet size
-performance of 40G ports. Enabling ``extended_tag`` can help 40G port to
-achieve the best performance, especially for small packet size.
-
-* Disabling/enabling ``extended_tag`` can be done in some BIOS implementations.
-
-* If BIOS does not enable it, and does not support changing it, tools
- (e.g. ``setpci`` on Linux) can be used to enable or disable ``extended_tag``.
-
-* From release 16.04, ``extended_tag`` is enabled by default during port
- initialization, users don't need to care about that anymore.
-
Use 16 Bytes RX Descriptor Size
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index a3fdbb1..c78cde7 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -20,13 +20,6 @@ Deprecation Notices
do not need to care about the kind of devices that are being used, making it
easier to add new buses later.
-* The EAL function pci_config_space_set is deprecated in release 16.04
- and will be removed from 16.07.
- Macros CONFIG_RTE_PCI_CONFIG, CONFIG_RTE_PCI_EXTENDED_TAG and
- CONFIG_RTE_PCI_MAX_READ_REQUEST_SIZE will be removed.
- The /sys entries extended_tag and max_read_request_size created by igb_uio
- will be removed.
-
* ABI changes are planned for struct rte_pci_id, i.e., add new field ``class``.
This new added ``class`` field can be used to probe pci device by class
related info. This change should impact size of struct rte_pci_id and struct
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index e692094..9f2301d 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -577,20 +577,6 @@ void rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
void rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
const void *data, size_t len, off_t offset);
-#ifdef RTE_PCI_CONFIG
-#include <rte_common.h>
-/**
- * Set special config space registers for performance purpose.
- * It is deprecated, as all configurations have been moved into
- * each PMDs respectively.
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use
- */
-void pci_config_space_set(struct rte_pci_device *dev) __rte_deprecated;
-#endif /* RTE_PCI_CONFIG */
-
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index dbf12a8..bdc08a0 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -481,18 +481,6 @@ error:
return -1;
}
-#ifdef RTE_PCI_CONFIG
-/*
- * It is deprecated, all its configurations have been moved into
- * each PMD respectively.
- */
-void
-pci_config_space_set(__rte_unused struct rte_pci_device *dev)
-{
- RTE_LOG(DEBUG, EAL, "Nothing here, as it is deprecated\n");
-}
-#endif
-
/* Read PCI config space. */
int rte_eal_pci_read_config(const struct rte_pci_device *device,
void *buf, size_t len, off_t offset)
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
index 72b2692..45a5720 100644
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -81,62 +81,10 @@ store_max_vfs(struct device *dev, struct device_attribute *attr,
return err ? err : count;
}
-#ifdef RTE_PCI_CONFIG
-static ssize_t
-show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf)
-{
- dev_info(dev, "Deprecated\n");
-
- return 0;
-}
-
-static ssize_t
-store_extended_tag(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
-{
- dev_info(dev, "Deprecated\n");
-
- return 0;
-}
-
-static ssize_t
-show_max_read_request_size(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- dev_info(dev, "Deprecated\n");
-
- return 0;
-}
-
-static ssize_t
-store_max_read_request_size(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
-{
- dev_info(dev, "Deprecated\n");
-
- return 0;
-}
-#endif
-
static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs);
-#ifdef RTE_PCI_CONFIG
-static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag,
- store_extended_tag);
-static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR,
- show_max_read_request_size, store_max_read_request_size);
-#endif
static struct attribute *dev_attrs[] = {
&dev_attr_max_vfs.attr,
-#ifdef RTE_PCI_CONFIG
- &dev_attr_extended_tag.attr,
- &dev_attr_max_read_request_size.attr,
-#endif
NULL,
};
--
2.7.0
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH] port: bump ABI for pcap file support
@ 2016-04-14 18:33 25% Thomas Monjalon
2016-04-15 10:32 4% ` Dumitrescu, Cristian
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-04-14 18:33 UTC (permalink / raw)
To: roy.fan.zhang; +Cc: dev, cristian.dumitrescu, jasvinder.singh
Support of PCAP file has been added to rte_port in release 16.04
as NEXT_ABI. It is in the standard ABI of the release 16.07.
Signed-off-by: Thomas Monjalon <thomas.monjalon@6wind.com>
---
doc/guides/rel_notes/deprecation.rst | 5 -----
doc/guides/rel_notes/release_16_07.rst | 5 ++++-
examples/ip_pipeline/init.c | 4 ----
lib/librte_port/Makefile | 2 +-
lib/librte_port/rte_port_source_sink.c | 14 --------------
lib/librte_port/rte_port_source_sink.h | 3 ---
6 files changed, 5 insertions(+), 28 deletions(-)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 327fc2b..a3fdbb1 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -79,11 +79,6 @@ Deprecation Notices
modification of the API of rte_mempool_obj_iter(), implying a breakage
of the ABI.
-* ABI changes are planned for struct rte_port_source_params in order to
- support PCAP file reading feature. The release 16.04 contains this ABI
- change wrapped by RTE_NEXT_ABI macro. Release 16.07 will contain this
- change, and no backwards compatibility is planned.
-
* A librte_vhost public structures refactor is planned for DPDK 16.07
that requires both ABI and API change.
The proposed refactor would expose DPDK vhost dev to applications as
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 701e827..001888f 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -94,6 +94,9 @@ ABI Changes
the previous releases and made in this release. Use fixed width quotes for
``rte_function_names`` or ``rte_struct_names``. Use the past tense.
+* The ``rte_port_source_params`` structure has new fields to support PCAP file.
+ It was already in release 16.04 with ``RTE_NEXT_ABI`` flag.
+
Shared Library Versions
-----------------------
@@ -123,7 +126,7 @@ The libraries prepended with a plus sign were incremented in this version.
librte_pipeline.so.3
librte_pmd_bond.so.1
librte_pmd_ring.so.2
- librte_port.so.2
+ + librte_port.so.3
librte_power.so.1
librte_reorder.so.1
librte_ring.so.1
diff --git a/examples/ip_pipeline/init.c b/examples/ip_pipeline/init.c
index 83422e8..02351f6 100644
--- a/examples/ip_pipeline/init.c
+++ b/examples/ip_pipeline/init.c
@@ -1221,8 +1221,6 @@ static void app_pipeline_params_get(struct app_params *app,
out->type = PIPELINE_PORT_IN_SOURCE;
out->params.source.mempool = app->mempool[mempool_id];
out->burst_size = app->source_params[in->id].burst;
-
-#ifdef RTE_NEXT_ABI
if (app->source_params[in->id].file_name
!= NULL) {
out->params.source.file_name = strdup(
@@ -1237,8 +1235,6 @@ static void app_pipeline_params_get(struct app_params *app,
app->source_params[in->id].
n_bytes_per_pkt;
}
-#endif
-
break;
default:
break;
diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile
index 2c0ccbe..d4de5af 100644
--- a/lib/librte_port/Makefile
+++ b/lib/librte_port/Makefile
@@ -44,7 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
EXPORT_MAP := rte_port_version.map
-LIBABIVER := 2
+LIBABIVER := 3
#
# all source are stored in SRCS-y
diff --git a/lib/librte_port/rte_port_source_sink.c b/lib/librte_port/rte_port_source_sink.c
index 056c975..4cad710 100644
--- a/lib/librte_port/rte_port_source_sink.c
+++ b/lib/librte_port/rte_port_source_sink.c
@@ -38,17 +38,11 @@
#include <rte_malloc.h>
#include <rte_memcpy.h>
-#ifdef RTE_NEXT_ABI
-
#ifdef RTE_PORT_PCAP
#include <rte_ether.h>
#include <pcap.h>
#endif
-#else
-#undef RTE_PORT_PCAP
-#endif
-
#include "rte_port_source_sink.h"
/*
@@ -81,8 +75,6 @@ struct rte_port_source {
uint32_t pkt_index;
};
-#ifdef RTE_NEXT_ABI
-
#ifdef RTE_PORT_PCAP
static int
@@ -232,8 +224,6 @@ error_exit:
#endif /* RTE_PORT_PCAP */
-#endif /* RTE_NEXT_ABI */
-
static void *
rte_port_source_create(void *params, int socket_id)
{
@@ -258,8 +248,6 @@ rte_port_source_create(void *params, int socket_id)
/* Initialization */
port->mempool = (struct rte_mempool *) p->mempool;
-#ifdef RTE_NEXT_ABI
-
if (p->file_name) {
int status = PCAP_SOURCE_LOAD(port, p->file_name,
p->n_bytes_per_pkt, socket_id);
@@ -270,8 +258,6 @@ rte_port_source_create(void *params, int socket_id)
}
}
-#endif
-
return port;
}
diff --git a/lib/librte_port/rte_port_source_sink.h b/lib/librte_port/rte_port_source_sink.h
index 917abe4..4db8a8a 100644
--- a/lib/librte_port/rte_port_source_sink.h
+++ b/lib/librte_port/rte_port_source_sink.h
@@ -53,7 +53,6 @@ extern "C" {
struct rte_port_source_params {
/** Pre-initialized buffer pool */
struct rte_mempool *mempool;
-#ifdef RTE_NEXT_ABI
/** The full path of the pcap file to read packets from */
char *file_name;
@@ -62,8 +61,6 @@ struct rte_port_source_params {
* if it is bigger than packet size, the generated packets
* will contain the whole packet */
uint32_t n_bytes_per_pkt;
-
-#endif
};
/** source port operations */
--
2.7.0
^ permalink raw reply [relevance 25%]
* Re: [dpdk-dev] [PATCH 00/36] mempool: rework memory allocation
2016-04-14 14:01 0% ` Olivier MATZ
@ 2016-04-14 14:03 0% ` Wiles, Keith
0 siblings, 0 replies; 200+ results
From: Wiles, Keith @ 2016-04-14 14:03 UTC (permalink / raw)
To: Olivier MATZ, dev; +Cc: Richardson, Bruce, stephen
>
>
>On 04/14/2016 03:50 PM, Wiles, Keith wrote:
>>> This series is a rework of mempool. For those who don't want to read
>>> all the cover letter, here is a sumary:
>>>
>>> - it is not possible to allocate large mempools if there is not enough
>>> contiguous memory, this series solves this issue
>>> - introduce new APIs with less arguments: "create, populate, obj_init"
>>> - allow to free a mempool
>>> - split code in smaller functions, will ease the introduction of ext_handler
>>> - remove test-pmd anonymous mempool creation
>>> - remove most of dom0-specific mempool code
>>> - opens the door for a eal_memory rework: we probably don't need large
>>> contiguous memory area anymore, working with pages would work.
>>>
>>> This breaks the ABI as it was indicated in the deprecation for 16.04.
>>> The API stays almost the same, no modification is needed in examples app
>>> or in test-pmd. Only kni and mellanox drivers are slightly modified.
>>>
>>> This patch applies on top of 16.04 + v5 of Keith's patch:
>>> "mempool: reduce rte_mempool structure size"
>>
>> I have not digested this complete patch yet, but this one popped out at me as the External Memory Manager support is setting in the wings for 16.07 release. If this causes the EMM patch to be rewritten or updated that seems like a problem to me. Does this patch add the External Memory Manager support?
>> http://thread.gmane.org/gmane.comp.networking.dpdk.devel/32015/focus=35107
>
>I've reworked the series you are referring to, and rebased it on top
>of this series. Please see:
>http://dpdk.org/ml/archives/dev/2016-April/037509.html
Thanks I just saw that update :-)
>
>Regards,
>Olivier
>
Regards,
Keith
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 00/36] mempool: rework memory allocation
2016-04-14 13:50 0% ` Wiles, Keith
@ 2016-04-14 14:01 0% ` Olivier MATZ
2016-04-14 14:03 0% ` Wiles, Keith
0 siblings, 1 reply; 200+ results
From: Olivier MATZ @ 2016-04-14 14:01 UTC (permalink / raw)
To: Wiles, Keith, dev; +Cc: Richardson, Bruce, stephen
On 04/14/2016 03:50 PM, Wiles, Keith wrote:
>> This series is a rework of mempool. For those who don't want to read
>> all the cover letter, here is a sumary:
>>
>> - it is not possible to allocate large mempools if there is not enough
>> contiguous memory, this series solves this issue
>> - introduce new APIs with less arguments: "create, populate, obj_init"
>> - allow to free a mempool
>> - split code in smaller functions, will ease the introduction of ext_handler
>> - remove test-pmd anonymous mempool creation
>> - remove most of dom0-specific mempool code
>> - opens the door for a eal_memory rework: we probably don't need large
>> contiguous memory area anymore, working with pages would work.
>>
>> This breaks the ABI as it was indicated in the deprecation for 16.04.
>> The API stays almost the same, no modification is needed in examples app
>> or in test-pmd. Only kni and mellanox drivers are slightly modified.
>>
>> This patch applies on top of 16.04 + v5 of Keith's patch:
>> "mempool: reduce rte_mempool structure size"
>
> I have not digested this complete patch yet, but this one popped out at me as the External Memory Manager support is setting in the wings for 16.07 release. If this causes the EMM patch to be rewritten or updated that seems like a problem to me. Does this patch add the External Memory Manager support?
> http://thread.gmane.org/gmane.comp.networking.dpdk.devel/32015/focus=35107
I've reworked the series you are referring to, and rebased it on top
of this series. Please see:
http://dpdk.org/ml/archives/dev/2016-April/037509.html
Regards,
Olivier
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH v4 0/3] external mempool manager
@ 2016-04-14 13:57 2% ` Olivier Matz
2016-05-19 13:44 2% ` [dpdk-dev] mempool: " David Hunt
0 siblings, 1 reply; 200+ results
From: Olivier Matz @ 2016-04-14 13:57 UTC (permalink / raw)
To: dev, david.hunt; +Cc: yuanhan.liu, pmatilai
Here's a reworked version of the patch initially sent by David Hunt.
The main change is that it is rebased on top of the "mempool: rework
memory allocation" series [1], which simplifies a lot the first patch.
[1] http://dpdk.org/ml/archives/dev/2016-April/037464.html
v4 changes:
* remove the rte_mempool_create_ext() function. To change the handler, the
user has to do the following:
- mp = rte_mempool_create_empty()
- rte_mempool_set_handler(mp, "my_handler")
- rte_mempool_populate_default(mp)
This avoids to add another function with more than 10 arguments, duplicating
the doxygen comments
* change the api of rte_mempool_alloc_t: only the mempool pointer is required
as all information is available in it
* change the api of rte_mempool_free_t: remove return value
* move inline wrapper functions from the .c to the .h (else they won't be
inlined). This implies to have one header file (rte_mempool.h), or it
would have generate cross dependencies issues.
* remove now unused MEMPOOL_F_INT_HANDLER (note: it was misused anyway due
to the use of && instead of &)
* fix build in debug mode (__MEMPOOL_STAT_ADD(mp, put_pool, n) remaining)
* fix build with shared libraries (global handler has to be declared in
the .map file)
* rationalize #include order
* remove unused function rte_mempool_get_handler_name()
* rename some structures, fields, functions
* remove the static in front of rte_tailq_elem rte_mempool_tailq (comment
from Yuanhan)
* test the ext mempool handler in the same file than standard mempool tests,
avoiding to duplicate the code
* rework the custom handler in mempool_test
* rework a bit the patch selecting default mbuf pool handler
* fix some doxygen comments
Things that should still be discussed:
- Panu pointed out that having a compile-time configuration
option for selecting the default mbuf handler is not a good idea.
I mostly agree, except in one case (and that's why I kept this patch):
if a specific architecture has its own way to provide an efficient
pool handler for mbufs, it could be the proper place to have this
option. But as far as I know, there is no such architecture today
in dpdk.
- The other question I would like to raise is about the use cases.
The cover letter below could be a bit more explicit about what this
feature will be used for.
This is the initial unmodified cover letter from David Hunt:
Hi list.
Here's the v3 version patch for an external mempool manager
v3 changes:
* simplified the file layout, renamed to rte_mempool_handler.[hc]
* moved the default handlers into rte_mempool_default.c
* moved the example handler out into app/test/test_ext_mempool.c
* removed is_mc/is_mp change, slight perf degredation on sp cached operation
* removed stack hanler, may re-introduce at a later date
* Changes out of code reviews
v2 changes:
* There was a lot of duplicate code between rte_mempool_xmem_create and
rte_mempool_create_ext. This has now been refactored and is now
hopefully cleaner.
* The RTE_NEXT_ABI define is now used to allow building of the library
in a format that is compatible with binaries built against previous
versions of DPDK.
* Changes out of code reviews. Hopefully I've got most of them included.
The External Mempool Manager is an extension to the mempool API that allows
users to add and use an external mempool manager, which allows external memory
subsystems such as external hardware memory management systems and software
based memory allocators to be used with DPDK.
The existing API to the internal DPDK mempool manager will remain unchanged
and will be backward compatible. However, there will be an ABI breakage, as
the mempool struct is changing. These changes are all contained withing
RTE_NEXT_ABI defs, and the current or next code can be changed with
the CONFIG_RTE_NEXT_ABI config setting
There are two aspects to external mempool manager.
1. Adding the code for your new mempool handler. This is achieved by adding a
new mempool handler source file into the librte_mempool library, and
using the REGISTER_MEMPOOL_HANDLER macro.
2. Using the new API to call rte_mempool_create_ext to create a new mempool
using the name parameter to identify which handler to use.
New API calls added
1. A new mempool 'create' function which accepts mempool handler name.
2. A new mempool 'rte_get_mempool_handler' function which accepts mempool
handler name, and returns the index to the relevant set of callbacks for
that mempool handler
Several external mempool managers may be used in the same application. A new
mempool can then be created by using the new 'create' function, providing the
mempool handler name to point the mempool to the relevant mempool manager
callback structure.
The old 'create' function can still be called by legacy programs, and will
internally work out the mempool handle based on the flags provided (single
producer, single consumer, etc). By default handles are created internally to
implement the built-in DPDK mempool manager and mempool types.
The external mempool manager needs to provide the following functions.
1. alloc - allocates the mempool memory, and adds each object onto a ring
2. put - puts an object back into the mempool once an application has
finished with it
3. get - gets an object from the mempool for use by the application
4. get_count - gets the number of available objects in the mempool
5. free - frees the mempool memory
Every time a get/put/get_count is called from the application/PMD, the
callback for that mempool is called. These functions are in the fastpath,
and any unoptimised handlers may limit performance.
The new APIs are as follows:
1. rte_mempool_create_ext
struct rte_mempool *
rte_mempool_create_ext(const char * name, unsigned n,
unsigned cache_size, unsigned private_data_size,
int socket_id, unsigned flags,
const char * handler_name);
2. rte_mempool_get_handler_name
char *
rte_mempool_get_handler_name(struct rte_mempool *mp);
Please see rte_mempool.h for further information on the parameters.
The important thing to note is that the mempool handler is passed by name
to rte_mempool_create_ext, and that in turn calls rte_get_mempool_handler to
get the handler index, which is stored in the rte_memool structure. This
allow multiple processes to use the same mempool, as the function pointers
are accessed via handler index.
The mempool handler structure contains callbacks to the implementation of
the handler, and is set up for registration as follows:
static struct rte_mempool_handler handler_sp_mc = {
.name = "ring_sp_mc",
.alloc = rte_mempool_common_ring_alloc,
.put = common_ring_sp_put,
.get = common_ring_mc_get,
.get_count = common_ring_get_count,
.free = common_ring_free,
};
And then the following macro will register the handler in the array of handlers
REGISTER_MEMPOOL_HANDLER(handler_mp_mc);
For and example of a simple malloc based mempool manager, see
lib/librte_mempool/custom_mempool.c
For an example of API usage, please see app/test/test_ext_mempool.c, which
implements a rudimentary mempool manager using simple mallocs for each
mempool object. This file also contains the callbacks and self registration
for the new handler.
David Hunt (2):
mempool: support external handler
mbuf: get default mempool handler from configuration
Olivier Matz (1):
app/test: test external mempool handler
app/test/test_mempool.c | 113 +++++++++++++++
app/test/test_mempool_perf.c | 1 -
config/common_base | 1 +
lib/librte_mbuf/rte_mbuf.c | 21 ++-
lib/librte_mempool/Makefile | 2 +
lib/librte_mempool/rte_mempool.c | 72 ++++------
lib/librte_mempool/rte_mempool.h | 212 +++++++++++++++++++++++++----
lib/librte_mempool/rte_mempool_default.c | 147 ++++++++++++++++++++
lib/librte_mempool/rte_mempool_handler.c | 139 +++++++++++++++++++
lib/librte_mempool/rte_mempool_version.map | 4 +
10 files changed, 637 insertions(+), 75 deletions(-)
create mode 100644 lib/librte_mempool/rte_mempool_default.c
create mode 100644 lib/librte_mempool/rte_mempool_handler.c
--
2.1.4
^ permalink raw reply [relevance 2%]
* Re: [dpdk-dev] [PATCH v5] mempool: reduce rte_mempool structure size
2016-04-14 9:42 2% ` [dpdk-dev] [PATCH v5] " Olivier Matz
2016-04-14 13:28 0% ` Wiles, Keith
@ 2016-04-14 13:53 0% ` Wiles, Keith
2016-05-17 5:31 0% ` Thomas Monjalon
2 siblings, 0 replies; 200+ results
From: Wiles, Keith @ 2016-04-14 13:53 UTC (permalink / raw)
To: Olivier Matz, dev; +Cc: thomas.monjalon, pmatilai
>From: Keith Wiles <keith.wiles@intel.com>
>
>The rte_mempool structure is changed, which will cause an ABI change
>for this structure. Providing backward compat is not reasonable
>here as this structure is used in multiple defines/inlines.
>
>Allow mempool cache support to be dynamic depending on if the
>mempool being created needs cache support. Saves about 1.5M of
>memory used by the rte_mempool structure.
>
>Allocating small mempools which do not require cache can consume
>larges amounts of memory if you have a number of these mempools.
>
>Change to be effective in release 16.07.
>
>Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>Acked-by: Olivier Matz <olivier.matz@6wind.com>
For the change to this patch:
Acked-by: Keith Wiles <keith.wiles@intel.com>
>---
>
>Changes in v5:
>
>- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.
> Error log was:
>
> rte_mempool.c: In function ‘rte_mempool_xmem_create’:
> rte_mempool.c:595: error: cast increases required alignment of target type
>
>
> app/test/test_mempool.c | 4 +--
> lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------
> lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
> 3 files changed, 40 insertions(+), 48 deletions(-)
>
>diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
>index f0f823b..10e1fa4 100644
>--- a/app/test/test_mempool.c
>+++ b/app/test/test_mempool.c
>@@ -122,8 +122,8 @@ test_mempool_basic(void)
> return -1;
>
> printf("get private data\n");
>- if (rte_mempool_get_priv(mp) !=
>- (char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
>+ if (rte_mempool_get_priv(mp) != (char *)mp +
>+ MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
> return -1;
>
> printf("get physical address of an object\n");
>diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
>index f8781e1..7a0e07e 100644
>--- a/lib/librte_mempool/rte_mempool.c
>+++ b/lib/librte_mempool/rte_mempool.c
>@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> /* compilation-time checks */
> RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
> RTE_CACHE_LINE_MASK) != 0);
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
> RTE_CACHE_LINE_MASK) != 0);
>- RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
>- RTE_CACHE_LINE_MASK) != 0);
>-#endif
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
> RTE_CACHE_LINE_MASK) != 0);
>@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> */
> int head = sizeof(struct rte_mempool);
> int new_size = (private_data_size + head) % page_size;
>- if (new_size) {
>+ if (new_size)
> private_data_size += page_size - new_size;
>- }
> }
>
> /* try to allocate tailq entry */
>@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> * store mempool objects. Otherwise reserve a memzone that is large
> * enough to hold mempool header and metadata plus mempool objects.
> */
>- mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
>+ mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
>+ mempool_size += private_data_size;
> mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
> if (vaddr == NULL)
> mempool_size += (size_t)objsz.total_size * n;
>@@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
> mp->private_data_size = private_data_size;
>
>+ /*
>+ * local_cache pointer is set even if cache_size is zero.
>+ * The local_cache points to just past the elt_pa[] array.
>+ */
>+ mp->local_cache = (struct rte_mempool_cache *)
>+ RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
>+
> /* calculate address of the first element for continuous mempool. */
>- obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
>+ obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
> private_data_size;
> obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
>
>@@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> mp->elt_va_start = (uintptr_t)obj;
> mp->elt_pa[0] = mp->phys_addr +
> (mp->elt_va_start - (uintptr_t)mp);
>-
>- /* mempool elements in a separate chunk of memory. */
> } else {
>+ /* mempool elements in a separate chunk of memory. */
> mp->elt_va_start = (uintptr_t)vaddr;
> memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
> }
>@@ -643,19 +645,15 @@ unsigned
> rte_mempool_count(const struct rte_mempool *mp)
> {
> unsigned count;
>+ unsigned lcore_id;
>
> count = rte_ring_count(mp->ring);
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>- {
>- unsigned lcore_id;
>- if (mp->cache_size == 0)
>- return count;
>+ if (mp->cache_size == 0)
>+ return count;
>
>- for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>- count += mp->local_cache[lcore_id].len;
>- }
>-#endif
>+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>+ count += mp->local_cache[lcore_id].len;
>
> /*
> * due to race condition (access to len is not locked), the
>@@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp)
> static unsigned
> rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> unsigned lcore_id;
> unsigned count = 0;
> unsigned cache_count;
>
> fprintf(f, " cache infos:\n");
> fprintf(f, " cache_size=%"PRIu32"\n", mp->cache_size);
>+
>+ if (mp->cache_size == 0)
>+ return count;
>+
> for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> cache_count = mp->local_cache[lcore_id].len;
> fprintf(f, " cache_count[%u]=%u\n", lcore_id, cache_count);
>@@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> }
> fprintf(f, " total_cache_count=%u\n", count);
> return count;
>-#else
>- RTE_SET_USED(mp);
>- fprintf(f, " cache disabled\n");
>- return 0;
>-#endif
> }
>
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
> #define mempool_audit_cookies(mp) do {} while(0)
> #endif
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /* check cookies before and after objects */
> static void
> mempool_audit_cache(const struct rte_mempool *mp)
> {
> /* check cache size consistency */
> unsigned lcore_id;
>+
>+ if (mp->cache_size == 0)
>+ return;
>+
> for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
> RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
>@@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
> }
> }
> }
>-#else
>-#define mempool_audit_cache(mp) do {} while(0)
>-#endif
>-
>
> /* check the consistency of mempool (size, cookies, ...) */
> void
>diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>index 9745bf0..8595e77 100644
>--- a/lib/librte_mempool/rte_mempool.h
>+++ b/lib/librte_mempool/rte_mempool.h
>@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
> } __rte_cache_aligned;
> #endif
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /**
> * A structure that stores a per-core object cache.
> */
>@@ -107,7 +106,6 @@ struct rte_mempool_cache {
> */
> void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
> } __rte_cache_aligned;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>
> /**
> * A structure that stores the size of mempool elements.
>@@ -194,10 +192,7 @@ struct rte_mempool {
>
> unsigned private_data_size; /**< Size of private data. */
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>- /** Per-lcore local cache. */
>- struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
>-#endif
>+ struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
>
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> /** Per-lcore statistics. */
>@@ -247,6 +242,13 @@ struct rte_mempool {
> #endif
>
> /**
>+ * Size of elt_pa array size based on number of pages. (Internal use)
>+ */
>+#define __PA_SIZE(mp, pgn) \
>+ RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
>+ sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
>+
>+/**
> * Calculate the size of the mempool header.
> *
> * @param mp
>@@ -254,9 +256,9 @@ struct rte_mempool {
> * @param pgn
> * Number of pages used to store mempool objects.
> */
>-#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \
>- RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
>- sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
>+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
>+ (sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
>+ (sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
>
> /**
> * Return true if the whole mempool is in contiguous memory.
>@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
> __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> unsigned n, int is_mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> struct rte_mempool_cache *cache;
> uint32_t index;
> void **cache_objs;
> unsigned lcore_id = rte_lcore_id();
> uint32_t cache_size = mp->cache_size;
> uint32_t flushthresh = mp->cache_flushthresh;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>
> /* increment stat now, adding in mempool always success */
> __MEMPOOL_STAT_ADD(mp, put, n);
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /* cache is not enabled or single producer or non-EAL thread */
> if (unlikely(cache_size == 0 || is_mp == 0 ||
> lcore_id >= RTE_MAX_LCORE))
>@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> return;
>
> ring_enqueue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>
> /* push remaining objects in ring */
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> unsigned n, int is_mc)
> {
> int ret;
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> struct rte_mempool_cache *cache;
> uint32_t index, len;
> void **cache_objs;
>@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> return 0;
>
> ring_dequeue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>
> /* get remaining objects from ring */
> if (is_mc)
>@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
> */
> static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
> {
>- return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
>+ return (char *)mp +
>+ MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
> }
>
> /**
>--
>2.1.4
>
>
Regards,
Keith
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH 00/36] mempool: rework memory allocation
2016-04-14 10:19 2% ` [dpdk-dev] [PATCH 00/36] " Olivier Matz
@ 2016-04-14 13:50 0% ` Wiles, Keith
2016-04-14 14:01 0% ` Olivier MATZ
2016-05-18 11:04 2% ` [dpdk-dev] [PATCH v3 00/35] " Olivier Matz
1 sibling, 1 reply; 200+ results
From: Wiles, Keith @ 2016-04-14 13:50 UTC (permalink / raw)
To: Olivier Matz, dev; +Cc: Richardson, Bruce, stephen
>This series is a rework of mempool. For those who don't want to read
>all the cover letter, here is a sumary:
>
>- it is not possible to allocate large mempools if there is not enough
> contiguous memory, this series solves this issue
>- introduce new APIs with less arguments: "create, populate, obj_init"
>- allow to free a mempool
>- split code in smaller functions, will ease the introduction of ext_handler
>- remove test-pmd anonymous mempool creation
>- remove most of dom0-specific mempool code
>- opens the door for a eal_memory rework: we probably don't need large
> contiguous memory area anymore, working with pages would work.
>
>This breaks the ABI as it was indicated in the deprecation for 16.04.
>The API stays almost the same, no modification is needed in examples app
>or in test-pmd. Only kni and mellanox drivers are slightly modified.
>
>This patch applies on top of 16.04 + v5 of Keith's patch:
>"mempool: reduce rte_mempool structure size"
I have not digested this complete patch yet, but this one popped out at me as the External Memory Manager support is setting in the wings for 16.07 release. If this causes the EMM patch to be rewritten or updated that seems like a problem to me. Does this patch add the External Memory Manager support?
http://thread.gmane.org/gmane.comp.networking.dpdk.devel/32015/focus=35107
>
>Changes RFC -> v1:
>
>- remove the rte_deconst macro, and remove some const qualifier in
> dump/audit functions
>- rework modifications in mellanox drivers to ensure the mempool is
> virtually contiguous
>- fix mempool memory chunk iteration (bad pointer was used)
>- fix compilation on freebsd: replace MAP_LOCKED flag by mlock()
>- fix compilation on tilera (pointer arithmetics)
>- slightly rework and clean the mempool autotest
>- fix mempool autotest on bsd
>- more validation (especially mellanox drivers and kni that were not
> tested in RFC)
>- passed autotests (x86_64-native-linuxapp-gcc and x86_64-native-bsdapp-gcc)
>- rebase on head, reorder the patches a bit and fix minor split issues
>
>
>Description of the initial issue
>--------------------------------
>
>The allocation of mbuf pool can fail even if there is enough memory.
>The problem is related to the way the memory is allocated and used in
>dpdk. It is particularly annoying with mbuf pools, but it can also fail
>in other use cases allocating a large amount of memory.
>
>- rte_malloc() allocates physically contiguous memory, which is needed
> for mempools, but useless most of the time.
>
> Allocating a large physically contiguous zone is often impossible
> because the system provide hugepages which may not be contiguous.
>
>- rte_mempool_create() (and therefore rte_pktmbuf_pool_create())
> requires a physically contiguous zone.
>
>- rte_mempool_xmem_create() does not solve the issue as it still
> needs the memory to be virtually contiguous, and there is no
> way in dpdk to allocate a virtually contiguous memory that is
> not also physically contiguous.
>
>How to reproduce the issue
>--------------------------
>
>- start the dpdk with some 2MB hugepages (it can also occur with 1GB)
>- allocate a large mempool
>- even if there is enough memory, the allocation can fail
>
>Example:
>
> git clone http://dpdk.org/git/dpdk
> cd dpdk
> make config T=x86_64-native-linuxapp-gcc
> make -j32
> mkdir -p /mnt/huge
> mount -t hugetlbfs nodev /mnt/huge
> echo 256 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
>
> # we try to allocate a mempool whose size is ~450MB, it fails
> ./build/app/testpmd -l 2,4 -- --total-num-mbufs=200000 -i
>
>The EAL logs "EAL: Virtual area found at..." shows that there are
>several zones, but all smaller than 450MB.
>
>Workarounds:
>
>- Use 1GB hugepages: it sometimes work, but for very large
> pools (millions of mbufs) there is the same issue. Moreover,
> it would consume 1GB memory at least which can be a lot
> in some cases.
>
>- Reboot the machine or allocate hugepages at boot time: this increases
> the chances to have more contiguous memory, but does not completely
> solve the issue
>
>Solutions
>---------
>
>Below is a list of proposed solutions. I implemented a quick and dirty
>PoC of solution 1, but it's not working in all conditions and it's
>really an ugly hack. This series implement the solution 4 which looks
>the best to me, knowing it does not prevent to do more enhancements
>in dpdk memory in the future (solution 3 for instance).
>
>Solution 1: in application
>--------------------------
>
>- allocate several hugepages using rte_malloc() or rte_memzone_reserve()
> (only keeping complete hugepages)
>- parse memsegs and /proc/maps to check which files mmaps these pages
>- mmap the files in a contiguous virtual area
>- use rte_mempool_xmem_create()
>
>Cons:
>
>- 1a. parsing the memsegs of rte config in the application does not
> use a public API, and can be broken if internal dpdk code changes
>- 1b. some memory is lost due to malloc headers. Also, if the memory is
> very fragmented (ex: all 2MB pages are physically separated), it does
> not work at all because we cannot get any complete page. It is not
> possible to use a lower level allocator since commit fafcc11985a.
>- 1c. we cannot use rte_pktmbuf_pool_create(), so we need to use mempool
> api and do a part of the job manually
>- 1d. it breaks secondary processes as the virtual addresses won't be
> mmap'd at the same place in secondary process
>- 1e. it only fixes the issue for the mbuf pool of the application,
> internal pools in dpdk libraries are not modified
>- 1f. this is a pure linux solution (rte_map files)
>- 1g. The application has to be aware of RTE_EAL_SINGLE_SEGMENTS option
> that changes the way hugepages are mapped. By the way, it's strange
> to have such a compile-time option, we should probably have only
> one behavior that works all the time.
>
>Solution 2: in dpdk memory allocator
>------------------------------------
>
>- do the same than solution 1 in a new function rte_malloc_non_contig():
> allocate several chunks and mmap them in a contiguous virtual memory
>- a flag has to be added in malloc header to do the proper cleanup in
> rte_free() (free all the chunks, munmap the memory)
>- introduce a new rte_mem_get_physmap(*physmap,addr, len) that returns
> the virt2phys mapping of a virtual area in dpdk
>- add a mempool flag MEMPOOL_F_NON_PHYS_CONTIG to use
> rte_malloc_non_contig() to allocate the area storing the objects
>
>Cons:
>
>- 2a. same than 1b: it breaks secondary processes if the mempool flag is
> used.
>- 2b. same as 1d: some memory is lost due to malloc headers, and it
> cannot work if memory is too fragmented.
>- 2c. rte_malloc_virt2phy() cannot be used on these zones. It would
> return the physical address of the first page. It would be better to
> return an error in this case.
>- 2d. need to check how to implement this on bsd (TBD)
>
>Solution 3: in dpdk eal memory
>------------------------------
>
>- Rework the way hugepages are mmap'd in dpdk: instead of having several
> rte_map* files, just mmap one file per node. It may drastically
> simplify EAL memory management in dpdk.
>- An API should be added to retrieve the physical mapping of a virtual
> area (ex: rte_mem_get_physmap(*physmap, addr, len))
>- rte_malloc() and rte_memzone_reserve() won't allocate physically
> contiguous memory anymore (TBD)
>- Update mempool to always use the rte_mempool_xmem_create() version
>
>Cons:
>
>- 3a. lot of rework in eal memory, it will induce some behavior changes
> and maybe api changes
>- 3b. possible conflicts with xen_dom0 mempool
>
>Solution 4: in mempool
>----------------------
>
>- Introduce a new API to fill a mempool with zones that are not
> virtually contiguous. It requires to add new functions to create and
> populate a mempool. Example (TBD):
>
> - rte_mempool_create_empty(name, n, elt_size, cache_size, priv_size)
> - rte_mempool_populate(mp, addr, len): add virtual memory for objects
> - rte_mempool_mempool_obj_iter(mp, obj_cb, arg): call a cb for each object
>
>- update rte_mempool_create() to allocate objects in several memory
> chunks by default if there is no large enough physically contiguous
> memory.
>
>Tests done
>----------
>
>Compilation
>~~~~~~~~~~~
>
>The following targets:
>
> x86_64-native-linuxapp-gcc
> i686-native-linuxapp-gcc
> x86_x32-native-linuxapp-gcc
> x86_64-native-linuxapp-clang
> x86_64-native-bsdapp-gcc
> ppc_64-power8-linuxapp-gcc
> tile-tilegx-linuxapp-gcc (only the mempool files, the target does not compile)
>
>Libraries with and without debug, in static and shared mode + examples.
>
>autotests
>~~~~~~~~~
>
>Passed all autotests on x86_64-native-linuxapp-gcc (including kni) and
>mempool-related autotests on x86_64-native-bsdapp-gcc.
>
>test-pmd
>~~~~~~~~
>
># now starts fine, was failing before if mempool was too fragmented
>./x86_64-native-linuxapp-gcc/app/testpmd -l 0,2,4 -n 4 -- -i --port-topology=chained
>
># still ok
>./x86_64-native-linuxapp-gcc/app/testpmd -l 0,2,4 -n 4 -m 256 -- -i --port-topology=chained --mp-anon
>set fwd txonly
>start
>stop
>
># fail, but was failing before too. The problem is because the physical
># addresses are not properly set when using --no-huge. The mempool phys addr
># are now correct, but the zones allocated through memzone_reserve() are
># still wrong. This could be fixed in a future series.
>./x86_64-native-linuxapp-gcc/app/testpmd -l 0,2,4 -n 4 -m 256 --no-huge -- -i ---port-topology=chained
>set fwd txonly
>start
>stop
>
>
>Olivier Matz (36):
> mempool: fix comments and style
> mempool: replace elt_size by total_elt_size
> mempool: uninline function to check cookies
> mempool: use sizeof to get the size of header and trailer
> mempool: rename mempool_obj_ctor_t as mempool_obj_cb_t
> mempool: update library version
> mempool: list objects when added in the mempool
> mempool: remove const attribute in mempool_walk
> mempool: remove const qualifier in dump and audit
> mempool: use the list to iterate the mempool elements
> mempool: use the list to audit all elements
> mempool: use the list to initialize mempool objects
> mempool: create the internal ring in a specific function
> mempool: store physaddr in mempool objects
> mempool: remove MEMPOOL_IS_CONTIG()
> mempool: store memory chunks in a list
> mempool: new function to iterate the memory chunks
> mempool: simplify xmem_usage
> mempool: introduce a free callback for memory chunks
> mempool: make page size optional when getting xmem size
> mempool: default allocation in several memory chunks
> eal: lock memory when using no-huge
> mempool: support no-hugepage mode
> mempool: replace mempool physaddr by a memzone pointer
> mempool: introduce a function to free a mempool
> mempool: introduce a function to create an empty mempool
> eal/xen: return machine address without knowing memseg id
> mempool: rework support of xen dom0
> mempool: create the internal ring when populating
> mempool: populate a mempool with anonymous memory
> mempool: make mempool populate and free api public
> test-pmd: remove specific anon mempool code
> mem: avoid memzone/mempool/ring name truncation
> mempool: new flag when phys contig mem is not needed
> app/test: rework mempool test
> mempool: update copyright
>
> app/test-pmd/Makefile | 4 -
> app/test-pmd/mempool_anon.c | 201 -----
> app/test-pmd/mempool_osdep.h | 54 --
> app/test-pmd/testpmd.c | 23 +-
> app/test/test_mempool.c | 243 +++---
> doc/guides/rel_notes/release_16_04.rst | 2 +-
> drivers/net/mlx4/mlx4.c | 140 ++--
> drivers/net/mlx5/mlx5_rxtx.c | 140 ++--
> drivers/net/mlx5/mlx5_rxtx.h | 4 +-
> drivers/net/xenvirt/rte_eth_xenvirt.h | 2 +-
> drivers/net/xenvirt/rte_mempool_gntalloc.c | 4 +-
> lib/librte_eal/common/eal_common_log.c | 2 +-
> lib/librte_eal/common/eal_common_memzone.c | 10 +-
> lib/librte_eal/common/include/rte_memory.h | 11 +-
> lib/librte_eal/linuxapp/eal/eal_memory.c | 2 +-
> lib/librte_eal/linuxapp/eal/eal_xen_memory.c | 17 +-
> lib/librte_kni/rte_kni.c | 12 +-
> lib/librte_mempool/Makefile | 5 +-
> lib/librte_mempool/rte_dom0_mempool.c | 133 ----
> lib/librte_mempool/rte_mempool.c | 1042 +++++++++++++++++---------
> lib/librte_mempool/rte_mempool.h | 594 +++++++--------
> lib/librte_mempool/rte_mempool_version.map | 18 +-
> lib/librte_ring/rte_ring.c | 16 +-
> 23 files changed, 1377 insertions(+), 1302 deletions(-)
> delete mode 100644 app/test-pmd/mempool_anon.c
> delete mode 100644 app/test-pmd/mempool_osdep.h
> delete mode 100644 lib/librte_mempool/rte_dom0_mempool.c
>
>--
>2.1.4
>
>
Regards,
Keith
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH v5] mempool: reduce rte_mempool structure size
2016-04-14 9:42 2% ` [dpdk-dev] [PATCH v5] " Olivier Matz
@ 2016-04-14 13:28 0% ` Wiles, Keith
2016-04-14 13:53 0% ` Wiles, Keith
2016-05-17 5:31 0% ` Thomas Monjalon
2 siblings, 0 replies; 200+ results
From: Wiles, Keith @ 2016-04-14 13:28 UTC (permalink / raw)
To: Olivier Matz, dev; +Cc: thomas.monjalon, pmatilai
>From: Keith Wiles <keith.wiles@intel.com>
>
>The rte_mempool structure is changed, which will cause an ABI change
>for this structure. Providing backward compat is not reasonable
>here as this structure is used in multiple defines/inlines.
>
>Allow mempool cache support to be dynamic depending on if the
>mempool being created needs cache support. Saves about 1.5M of
>memory used by the rte_mempool structure.
>
>Allocating small mempools which do not require cache can consume
>larges amounts of memory if you have a number of these mempools.
>
>Change to be effective in release 16.07.
>
>Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>Acked-by: Olivier Matz <olivier.matz@6wind.com>
>---
>
>Changes in v5:
>
>- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.
> Error log was:
>
> rte_mempool.c: In function ‘rte_mempool_xmem_create’:
> rte_mempool.c:595: error: cast increases required alignment of target type
>
>
> app/test/test_mempool.c | 4 +--
> lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------
> lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
> 3 files changed, 40 insertions(+), 48 deletions(-)
>
>diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
>index f0f823b..10e1fa4 100644
>--- a/app/test/test_mempool.c
>+++ b/app/test/test_mempool.c
>@@ -122,8 +122,8 @@ test_mempool_basic(void)
> return -1;
>
> printf("get private data\n");
>- if (rte_mempool_get_priv(mp) !=
>- (char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
>+ if (rte_mempool_get_priv(mp) != (char *)mp +
>+ MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
Should we not add the RTE_PTR_ADD() here as well?
> return -1;
>
> printf("get physical address of an object\n");
>diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
>index f8781e1..7a0e07e 100644
>--- a/lib/librte_mempool/rte_mempool.c
>+++ b/lib/librte_mempool/rte_mempool.c
>@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> /* compilation-time checks */
> RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
> RTE_CACHE_LINE_MASK) != 0);
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
> RTE_CACHE_LINE_MASK) != 0);
>- RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
>- RTE_CACHE_LINE_MASK) != 0);
>-#endif
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
> RTE_CACHE_LINE_MASK) != 0);
>@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> */
> int head = sizeof(struct rte_mempool);
> int new_size = (private_data_size + head) % page_size;
>- if (new_size) {
>+ if (new_size)
> private_data_size += page_size - new_size;
>- }
> }
>
> /* try to allocate tailq entry */
>@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> * store mempool objects. Otherwise reserve a memzone that is large
> * enough to hold mempool header and metadata plus mempool objects.
> */
>- mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
>+ mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
>+ mempool_size += private_data_size;
> mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
> if (vaddr == NULL)
> mempool_size += (size_t)objsz.total_size * n;
>@@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
> mp->private_data_size = private_data_size;
>
>+ /*
>+ * local_cache pointer is set even if cache_size is zero.
>+ * The local_cache points to just past the elt_pa[] array.
>+ */
>+ mp->local_cache = (struct rte_mempool_cache *)
>+ RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
>+
> /* calculate address of the first element for continuous mempool. */
>- obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
>+ obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
> private_data_size;
> obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
>
>@@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> mp->elt_va_start = (uintptr_t)obj;
> mp->elt_pa[0] = mp->phys_addr +
> (mp->elt_va_start - (uintptr_t)mp);
>-
>- /* mempool elements in a separate chunk of memory. */
> } else {
>+ /* mempool elements in a separate chunk of memory. */
> mp->elt_va_start = (uintptr_t)vaddr;
> memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
> }
>@@ -643,19 +645,15 @@ unsigned
> rte_mempool_count(const struct rte_mempool *mp)
> {
> unsigned count;
>+ unsigned lcore_id;
>
> count = rte_ring_count(mp->ring);
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>- {
>- unsigned lcore_id;
>- if (mp->cache_size == 0)
>- return count;
>+ if (mp->cache_size == 0)
>+ return count;
>
>- for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>- count += mp->local_cache[lcore_id].len;
>- }
>-#endif
>+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>+ count += mp->local_cache[lcore_id].len;
>
> /*
> * due to race condition (access to len is not locked), the
>@@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp)
> static unsigned
> rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> unsigned lcore_id;
> unsigned count = 0;
> unsigned cache_count;
>
> fprintf(f, " cache infos:\n");
> fprintf(f, " cache_size=%"PRIu32"\n", mp->cache_size);
>+
>+ if (mp->cache_size == 0)
>+ return count;
>+
> for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> cache_count = mp->local_cache[lcore_id].len;
> fprintf(f, " cache_count[%u]=%u\n", lcore_id, cache_count);
>@@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> }
> fprintf(f, " total_cache_count=%u\n", count);
> return count;
>-#else
>- RTE_SET_USED(mp);
>- fprintf(f, " cache disabled\n");
>- return 0;
>-#endif
> }
>
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
> #define mempool_audit_cookies(mp) do {} while(0)
> #endif
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /* check cookies before and after objects */
> static void
> mempool_audit_cache(const struct rte_mempool *mp)
> {
> /* check cache size consistency */
> unsigned lcore_id;
>+
>+ if (mp->cache_size == 0)
>+ return;
>+
> for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
> RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
>@@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
> }
> }
> }
>-#else
>-#define mempool_audit_cache(mp) do {} while(0)
>-#endif
>-
>
> /* check the consistency of mempool (size, cookies, ...) */
> void
>diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>index 9745bf0..8595e77 100644
>--- a/lib/librte_mempool/rte_mempool.h
>+++ b/lib/librte_mempool/rte_mempool.h
>@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
> } __rte_cache_aligned;
> #endif
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /**
> * A structure that stores a per-core object cache.
> */
>@@ -107,7 +106,6 @@ struct rte_mempool_cache {
> */
> void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
> } __rte_cache_aligned;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>
> /**
> * A structure that stores the size of mempool elements.
>@@ -194,10 +192,7 @@ struct rte_mempool {
>
> unsigned private_data_size; /**< Size of private data. */
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>- /** Per-lcore local cache. */
>- struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
>-#endif
>+ struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
>
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> /** Per-lcore statistics. */
>@@ -247,6 +242,13 @@ struct rte_mempool {
> #endif
>
> /**
>+ * Size of elt_pa array size based on number of pages. (Internal use)
>+ */
>+#define __PA_SIZE(mp, pgn) \
>+ RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
>+ sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
>+
>+/**
> * Calculate the size of the mempool header.
> *
> * @param mp
>@@ -254,9 +256,9 @@ struct rte_mempool {
> * @param pgn
> * Number of pages used to store mempool objects.
> */
>-#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \
>- RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
>- sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
>+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
>+ (sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
>+ (sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
>
> /**
> * Return true if the whole mempool is in contiguous memory.
>@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
> __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> unsigned n, int is_mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> struct rte_mempool_cache *cache;
> uint32_t index;
> void **cache_objs;
> unsigned lcore_id = rte_lcore_id();
> uint32_t cache_size = mp->cache_size;
> uint32_t flushthresh = mp->cache_flushthresh;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>
> /* increment stat now, adding in mempool always success */
> __MEMPOOL_STAT_ADD(mp, put, n);
>
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /* cache is not enabled or single producer or non-EAL thread */
> if (unlikely(cache_size == 0 || is_mp == 0 ||
> lcore_id >= RTE_MAX_LCORE))
>@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> return;
>
> ring_enqueue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>
> /* push remaining objects in ring */
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> unsigned n, int is_mc)
> {
> int ret;
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> struct rte_mempool_cache *cache;
> uint32_t index, len;
> void **cache_objs;
>@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> return 0;
>
> ring_dequeue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>
> /* get remaining objects from ring */
> if (is_mc)
>@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
> */
> static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
> {
>- return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
>+ return (char *)mp +
>+ MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
And here?
> }
>
> /**
>--
>2.1.4
>
>
Regards,
Keith
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH 00/36] mempool: rework memory allocation
@ 2016-04-14 10:19 2% ` Olivier Matz
2016-04-14 13:50 0% ` Wiles, Keith
2016-05-18 11:04 2% ` [dpdk-dev] [PATCH v3 00/35] " Olivier Matz
0 siblings, 2 replies; 200+ results
From: Olivier Matz @ 2016-04-14 10:19 UTC (permalink / raw)
To: dev; +Cc: bruce.richardson, stephen
This series is a rework of mempool. For those who don't want to read
all the cover letter, here is a sumary:
- it is not possible to allocate large mempools if there is not enough
contiguous memory, this series solves this issue
- introduce new APIs with less arguments: "create, populate, obj_init"
- allow to free a mempool
- split code in smaller functions, will ease the introduction of ext_handler
- remove test-pmd anonymous mempool creation
- remove most of dom0-specific mempool code
- opens the door for a eal_memory rework: we probably don't need large
contiguous memory area anymore, working with pages would work.
This breaks the ABI as it was indicated in the deprecation for 16.04.
The API stays almost the same, no modification is needed in examples app
or in test-pmd. Only kni and mellanox drivers are slightly modified.
This patch applies on top of 16.04 + v5 of Keith's patch:
"mempool: reduce rte_mempool structure size"
Changes RFC -> v1:
- remove the rte_deconst macro, and remove some const qualifier in
dump/audit functions
- rework modifications in mellanox drivers to ensure the mempool is
virtually contiguous
- fix mempool memory chunk iteration (bad pointer was used)
- fix compilation on freebsd: replace MAP_LOCKED flag by mlock()
- fix compilation on tilera (pointer arithmetics)
- slightly rework and clean the mempool autotest
- fix mempool autotest on bsd
- more validation (especially mellanox drivers and kni that were not
tested in RFC)
- passed autotests (x86_64-native-linuxapp-gcc and x86_64-native-bsdapp-gcc)
- rebase on head, reorder the patches a bit and fix minor split issues
Description of the initial issue
--------------------------------
The allocation of mbuf pool can fail even if there is enough memory.
The problem is related to the way the memory is allocated and used in
dpdk. It is particularly annoying with mbuf pools, but it can also fail
in other use cases allocating a large amount of memory.
- rte_malloc() allocates physically contiguous memory, which is needed
for mempools, but useless most of the time.
Allocating a large physically contiguous zone is often impossible
because the system provide hugepages which may not be contiguous.
- rte_mempool_create() (and therefore rte_pktmbuf_pool_create())
requires a physically contiguous zone.
- rte_mempool_xmem_create() does not solve the issue as it still
needs the memory to be virtually contiguous, and there is no
way in dpdk to allocate a virtually contiguous memory that is
not also physically contiguous.
How to reproduce the issue
--------------------------
- start the dpdk with some 2MB hugepages (it can also occur with 1GB)
- allocate a large mempool
- even if there is enough memory, the allocation can fail
Example:
git clone http://dpdk.org/git/dpdk
cd dpdk
make config T=x86_64-native-linuxapp-gcc
make -j32
mkdir -p /mnt/huge
mount -t hugetlbfs nodev /mnt/huge
echo 256 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
# we try to allocate a mempool whose size is ~450MB, it fails
./build/app/testpmd -l 2,4 -- --total-num-mbufs=200000 -i
The EAL logs "EAL: Virtual area found at..." shows that there are
several zones, but all smaller than 450MB.
Workarounds:
- Use 1GB hugepages: it sometimes work, but for very large
pools (millions of mbufs) there is the same issue. Moreover,
it would consume 1GB memory at least which can be a lot
in some cases.
- Reboot the machine or allocate hugepages at boot time: this increases
the chances to have more contiguous memory, but does not completely
solve the issue
Solutions
---------
Below is a list of proposed solutions. I implemented a quick and dirty
PoC of solution 1, but it's not working in all conditions and it's
really an ugly hack. This series implement the solution 4 which looks
the best to me, knowing it does not prevent to do more enhancements
in dpdk memory in the future (solution 3 for instance).
Solution 1: in application
--------------------------
- allocate several hugepages using rte_malloc() or rte_memzone_reserve()
(only keeping complete hugepages)
- parse memsegs and /proc/maps to check which files mmaps these pages
- mmap the files in a contiguous virtual area
- use rte_mempool_xmem_create()
Cons:
- 1a. parsing the memsegs of rte config in the application does not
use a public API, and can be broken if internal dpdk code changes
- 1b. some memory is lost due to malloc headers. Also, if the memory is
very fragmented (ex: all 2MB pages are physically separated), it does
not work at all because we cannot get any complete page. It is not
possible to use a lower level allocator since commit fafcc11985a.
- 1c. we cannot use rte_pktmbuf_pool_create(), so we need to use mempool
api and do a part of the job manually
- 1d. it breaks secondary processes as the virtual addresses won't be
mmap'd at the same place in secondary process
- 1e. it only fixes the issue for the mbuf pool of the application,
internal pools in dpdk libraries are not modified
- 1f. this is a pure linux solution (rte_map files)
- 1g. The application has to be aware of RTE_EAL_SINGLE_SEGMENTS option
that changes the way hugepages are mapped. By the way, it's strange
to have such a compile-time option, we should probably have only
one behavior that works all the time.
Solution 2: in dpdk memory allocator
------------------------------------
- do the same than solution 1 in a new function rte_malloc_non_contig():
allocate several chunks and mmap them in a contiguous virtual memory
- a flag has to be added in malloc header to do the proper cleanup in
rte_free() (free all the chunks, munmap the memory)
- introduce a new rte_mem_get_physmap(*physmap,addr, len) that returns
the virt2phys mapping of a virtual area in dpdk
- add a mempool flag MEMPOOL_F_NON_PHYS_CONTIG to use
rte_malloc_non_contig() to allocate the area storing the objects
Cons:
- 2a. same than 1b: it breaks secondary processes if the mempool flag is
used.
- 2b. same as 1d: some memory is lost due to malloc headers, and it
cannot work if memory is too fragmented.
- 2c. rte_malloc_virt2phy() cannot be used on these zones. It would
return the physical address of the first page. It would be better to
return an error in this case.
- 2d. need to check how to implement this on bsd (TBD)
Solution 3: in dpdk eal memory
------------------------------
- Rework the way hugepages are mmap'd in dpdk: instead of having several
rte_map* files, just mmap one file per node. It may drastically
simplify EAL memory management in dpdk.
- An API should be added to retrieve the physical mapping of a virtual
area (ex: rte_mem_get_physmap(*physmap, addr, len))
- rte_malloc() and rte_memzone_reserve() won't allocate physically
contiguous memory anymore (TBD)
- Update mempool to always use the rte_mempool_xmem_create() version
Cons:
- 3a. lot of rework in eal memory, it will induce some behavior changes
and maybe api changes
- 3b. possible conflicts with xen_dom0 mempool
Solution 4: in mempool
----------------------
- Introduce a new API to fill a mempool with zones that are not
virtually contiguous. It requires to add new functions to create and
populate a mempool. Example (TBD):
- rte_mempool_create_empty(name, n, elt_size, cache_size, priv_size)
- rte_mempool_populate(mp, addr, len): add virtual memory for objects
- rte_mempool_mempool_obj_iter(mp, obj_cb, arg): call a cb for each object
- update rte_mempool_create() to allocate objects in several memory
chunks by default if there is no large enough physically contiguous
memory.
Tests done
----------
Compilation
~~~~~~~~~~~
The following targets:
x86_64-native-linuxapp-gcc
i686-native-linuxapp-gcc
x86_x32-native-linuxapp-gcc
x86_64-native-linuxapp-clang
x86_64-native-bsdapp-gcc
ppc_64-power8-linuxapp-gcc
tile-tilegx-linuxapp-gcc (only the mempool files, the target does not compile)
Libraries with and without debug, in static and shared mode + examples.
autotests
~~~~~~~~~
Passed all autotests on x86_64-native-linuxapp-gcc (including kni) and
mempool-related autotests on x86_64-native-bsdapp-gcc.
test-pmd
~~~~~~~~
# now starts fine, was failing before if mempool was too fragmented
./x86_64-native-linuxapp-gcc/app/testpmd -l 0,2,4 -n 4 -- -i --port-topology=chained
# still ok
./x86_64-native-linuxapp-gcc/app/testpmd -l 0,2,4 -n 4 -m 256 -- -i --port-topology=chained --mp-anon
set fwd txonly
start
stop
# fail, but was failing before too. The problem is because the physical
# addresses are not properly set when using --no-huge. The mempool phys addr
# are now correct, but the zones allocated through memzone_reserve() are
# still wrong. This could be fixed in a future series.
./x86_64-native-linuxapp-gcc/app/testpmd -l 0,2,4 -n 4 -m 256 --no-huge -- -i ---port-topology=chained
set fwd txonly
start
stop
Olivier Matz (36):
mempool: fix comments and style
mempool: replace elt_size by total_elt_size
mempool: uninline function to check cookies
mempool: use sizeof to get the size of header and trailer
mempool: rename mempool_obj_ctor_t as mempool_obj_cb_t
mempool: update library version
mempool: list objects when added in the mempool
mempool: remove const attribute in mempool_walk
mempool: remove const qualifier in dump and audit
mempool: use the list to iterate the mempool elements
mempool: use the list to audit all elements
mempool: use the list to initialize mempool objects
mempool: create the internal ring in a specific function
mempool: store physaddr in mempool objects
mempool: remove MEMPOOL_IS_CONTIG()
mempool: store memory chunks in a list
mempool: new function to iterate the memory chunks
mempool: simplify xmem_usage
mempool: introduce a free callback for memory chunks
mempool: make page size optional when getting xmem size
mempool: default allocation in several memory chunks
eal: lock memory when using no-huge
mempool: support no-hugepage mode
mempool: replace mempool physaddr by a memzone pointer
mempool: introduce a function to free a mempool
mempool: introduce a function to create an empty mempool
eal/xen: return machine address without knowing memseg id
mempool: rework support of xen dom0
mempool: create the internal ring when populating
mempool: populate a mempool with anonymous memory
mempool: make mempool populate and free api public
test-pmd: remove specific anon mempool code
mem: avoid memzone/mempool/ring name truncation
mempool: new flag when phys contig mem is not needed
app/test: rework mempool test
mempool: update copyright
app/test-pmd/Makefile | 4 -
app/test-pmd/mempool_anon.c | 201 -----
app/test-pmd/mempool_osdep.h | 54 --
app/test-pmd/testpmd.c | 23 +-
app/test/test_mempool.c | 243 +++---
doc/guides/rel_notes/release_16_04.rst | 2 +-
drivers/net/mlx4/mlx4.c | 140 ++--
drivers/net/mlx5/mlx5_rxtx.c | 140 ++--
drivers/net/mlx5/mlx5_rxtx.h | 4 +-
drivers/net/xenvirt/rte_eth_xenvirt.h | 2 +-
drivers/net/xenvirt/rte_mempool_gntalloc.c | 4 +-
lib/librte_eal/common/eal_common_log.c | 2 +-
lib/librte_eal/common/eal_common_memzone.c | 10 +-
lib/librte_eal/common/include/rte_memory.h | 11 +-
lib/librte_eal/linuxapp/eal/eal_memory.c | 2 +-
lib/librte_eal/linuxapp/eal/eal_xen_memory.c | 17 +-
lib/librte_kni/rte_kni.c | 12 +-
lib/librte_mempool/Makefile | 5 +-
lib/librte_mempool/rte_dom0_mempool.c | 133 ----
lib/librte_mempool/rte_mempool.c | 1042 +++++++++++++++++---------
lib/librte_mempool/rte_mempool.h | 594 +++++++--------
lib/librte_mempool/rte_mempool_version.map | 18 +-
lib/librte_ring/rte_ring.c | 16 +-
23 files changed, 1377 insertions(+), 1302 deletions(-)
delete mode 100644 app/test-pmd/mempool_anon.c
delete mode 100644 app/test-pmd/mempool_osdep.h
delete mode 100644 lib/librte_mempool/rte_dom0_mempool.c
--
2.1.4
^ permalink raw reply [relevance 2%]
* [dpdk-dev] [RFC 1/2] doc: announce ABI change for rte_eth_dev_info structure
2016-04-14 9:44 4% [dpdk-dev] [RFC 0/2] add new fields to rte_eth_dev_info structure Reshma Pattan
@ 2016-04-14 9:44 9% ` Reshma Pattan
2016-04-15 9:42 4% ` Mcnamara, John
2016-04-15 10:02 8% ` Thomas Monjalon
1 sibling, 2 replies; 200+ results
From: Reshma Pattan @ 2016-04-14 9:44 UTC (permalink / raw)
To: dev
New fields nb_rx_queues and nb_tx_queues will be added to
rte_eth_dev_info structure.
Changes to API rte_eth_dev_info_get() will be done to update
these new fields to rte_eth_dev_info object.
Signed-off-by:reshma Pattan<reshma.pattan@intel.com>
---
doc/guides/rel_notes/deprecation.rst | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 327fc2b..78cedb7 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -90,3 +90,9 @@ Deprecation Notices
a handle, like the way kernel exposes an fd to user for locating a
specific file, and to keep all major structures internally, so that
we are likely to be free from ABI violations in future.
+
+* A librte_ether public structure ``rte_eth_dev_info`` will be changed in 16.07.
+ The proposed change will add new parameters ``nb_rx_queues``, ``nb_tx_queues``
+ to the structure. These are the number of queues configured by software.
+ Modification to definition of ``rte_eth_dev_info_get()`` will be done
+ to update new parameters to ``rte_eth_dev_info`` object.
--
2.5.0
^ permalink raw reply [relevance 9%]
* [dpdk-dev] [RFC 0/2] add new fields to rte_eth_dev_info structure
@ 2016-04-14 9:44 4% Reshma Pattan
2016-04-14 9:44 9% ` [dpdk-dev] [RFC 1/2] doc: announce ABI change for " Reshma Pattan
0 siblings, 2 replies; 200+ results
From: Reshma Pattan @ 2016-04-14 9:44 UTC (permalink / raw)
To: dev
New fields nb_rx_queues and nb_tx_queues are added to rte_eth_dev_info structure.
Changes to API rte_eth_dev_info_get() are done to update these new fields to rte_eth_dev_info object.
These changes are ABI breakage and we are late to announce deprecation notice for 16.07,
however the rte_ether library is already subject to a deprecation notice in 16.07.
Reshma Pattan (2):
doc: announce ABI change for rte_eth_dev_info structure
librte_ether: add new fields to rte_eth_dev_info struct
doc/guides/rel_notes/deprecation.rst | 6 ++++++
lib/librte_ether/rte_ethdev.c | 2 ++
lib/librte_ether/rte_ethdev.h | 3 +++
3 files changed, 11 insertions(+)
--
2.5.0
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v5] mempool: reduce rte_mempool structure size
@ 2016-04-14 9:42 2% ` Olivier Matz
2016-04-14 13:28 0% ` Wiles, Keith
` (2 more replies)
0 siblings, 3 replies; 200+ results
From: Olivier Matz @ 2016-04-14 9:42 UTC (permalink / raw)
To: dev, keith.wiles; +Cc: thomas.monjalon, pmatilai
From: Keith Wiles <keith.wiles@intel.com>
The rte_mempool structure is changed, which will cause an ABI change
for this structure. Providing backward compat is not reasonable
here as this structure is used in multiple defines/inlines.
Allow mempool cache support to be dynamic depending on if the
mempool being created needs cache support. Saves about 1.5M of
memory used by the rte_mempool structure.
Allocating small mempools which do not require cache can consume
larges amounts of memory if you have a number of these mempools.
Change to be effective in release 16.07.
Signed-off-by: Keith Wiles <keith.wiles@intel.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---
Changes in v5:
- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.
Error log was:
rte_mempool.c: In function ‘rte_mempool_xmem_create’:
rte_mempool.c:595: error: cast increases required alignment of target type
app/test/test_mempool.c | 4 +--
lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------
lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
3 files changed, 40 insertions(+), 48 deletions(-)
diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index f0f823b..10e1fa4 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -122,8 +122,8 @@ test_mempool_basic(void)
return -1;
printf("get private data\n");
- if (rte_mempool_get_priv(mp) !=
- (char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
+ if (rte_mempool_get_priv(mp) != (char *)mp +
+ MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
return -1;
printf("get physical address of an object\n");
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index f8781e1..7a0e07e 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
/* compilation-time checks */
RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
RTE_CACHE_LINE_MASK) != 0);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
RTE_CACHE_LINE_MASK) != 0);
- RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
- RTE_CACHE_LINE_MASK) != 0);
-#endif
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
RTE_CACHE_LINE_MASK) != 0);
@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
*/
int head = sizeof(struct rte_mempool);
int new_size = (private_data_size + head) % page_size;
- if (new_size) {
+ if (new_size)
private_data_size += page_size - new_size;
- }
}
/* try to allocate tailq entry */
@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
* store mempool objects. Otherwise reserve a memzone that is large
* enough to hold mempool header and metadata plus mempool objects.
*/
- mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+ mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
+ mempool_size += private_data_size;
mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
if (vaddr == NULL)
mempool_size += (size_t)objsz.total_size * n;
@@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
mp->private_data_size = private_data_size;
+ /*
+ * local_cache pointer is set even if cache_size is zero.
+ * The local_cache points to just past the elt_pa[] array.
+ */
+ mp->local_cache = (struct rte_mempool_cache *)
+ RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
+
/* calculate address of the first element for continuous mempool. */
- obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
+ obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
private_data_size;
obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
@@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
mp->elt_va_start = (uintptr_t)obj;
mp->elt_pa[0] = mp->phys_addr +
(mp->elt_va_start - (uintptr_t)mp);
-
- /* mempool elements in a separate chunk of memory. */
} else {
+ /* mempool elements in a separate chunk of memory. */
mp->elt_va_start = (uintptr_t)vaddr;
memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
}
@@ -643,19 +645,15 @@ unsigned
rte_mempool_count(const struct rte_mempool *mp)
{
unsigned count;
+ unsigned lcore_id;
count = rte_ring_count(mp->ring);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
- {
- unsigned lcore_id;
- if (mp->cache_size == 0)
- return count;
+ if (mp->cache_size == 0)
+ return count;
- for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
- count += mp->local_cache[lcore_id].len;
- }
-#endif
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+ count += mp->local_cache[lcore_id].len;
/*
* due to race condition (access to len is not locked), the
@@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp)
static unsigned
rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
{
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
unsigned lcore_id;
unsigned count = 0;
unsigned cache_count;
fprintf(f, " cache infos:\n");
fprintf(f, " cache_size=%"PRIu32"\n", mp->cache_size);
+
+ if (mp->cache_size == 0)
+ return count;
+
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
cache_count = mp->local_cache[lcore_id].len;
fprintf(f, " cache_count[%u]=%u\n", lcore_id, cache_count);
@@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
}
fprintf(f, " total_cache_count=%u\n", count);
return count;
-#else
- RTE_SET_USED(mp);
- fprintf(f, " cache disabled\n");
- return 0;
-#endif
}
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
#define mempool_audit_cookies(mp) do {} while(0)
#endif
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
/* check cookies before and after objects */
static void
mempool_audit_cache(const struct rte_mempool *mp)
{
/* check cache size consistency */
unsigned lcore_id;
+
+ if (mp->cache_size == 0)
+ return;
+
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
@@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
}
}
}
-#else
-#define mempool_audit_cache(mp) do {} while(0)
-#endif
-
/* check the consistency of mempool (size, cookies, ...) */
void
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 9745bf0..8595e77 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
} __rte_cache_aligned;
#endif
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
/**
* A structure that stores a per-core object cache.
*/
@@ -107,7 +106,6 @@ struct rte_mempool_cache {
*/
void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
} __rte_cache_aligned;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
/**
* A structure that stores the size of mempool elements.
@@ -194,10 +192,7 @@ struct rte_mempool {
unsigned private_data_size; /**< Size of private data. */
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
- /** Per-lcore local cache. */
- struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
-#endif
+ struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
/** Per-lcore statistics. */
@@ -247,6 +242,13 @@ struct rte_mempool {
#endif
/**
+ * Size of elt_pa array size based on number of pages. (Internal use)
+ */
+#define __PA_SIZE(mp, pgn) \
+ RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
+ sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
+
+/**
* Calculate the size of the mempool header.
*
* @param mp
@@ -254,9 +256,9 @@ struct rte_mempool {
* @param pgn
* Number of pages used to store mempool objects.
*/
-#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \
- RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
- sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
+ (sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
+ (sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
/**
* Return true if the whole mempool is in contiguous memory.
@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
__mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
unsigned n, int is_mp)
{
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
struct rte_mempool_cache *cache;
uint32_t index;
void **cache_objs;
unsigned lcore_id = rte_lcore_id();
uint32_t cache_size = mp->cache_size;
uint32_t flushthresh = mp->cache_flushthresh;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
/* increment stat now, adding in mempool always success */
__MEMPOOL_STAT_ADD(mp, put, n);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
/* cache is not enabled or single producer or non-EAL thread */
if (unlikely(cache_size == 0 || is_mp == 0 ||
lcore_id >= RTE_MAX_LCORE))
@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
return;
ring_enqueue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
/* push remaining objects in ring */
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
unsigned n, int is_mc)
{
int ret;
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
struct rte_mempool_cache *cache;
uint32_t index, len;
void **cache_objs;
@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
return 0;
ring_dequeue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
/* get remaining objects from ring */
if (is_mc)
@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
*/
static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
{
- return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+ return (char *)mp +
+ MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
}
/**
--
2.1.4
^ permalink raw reply [relevance 2%]
* [dpdk-dev] [PATCH v1] doc: add template release notes for 16.07
@ 2016-04-12 12:55 6% John McNamara
0 siblings, 0 replies; 200+ results
From: John McNamara @ 2016-04-12 12:55 UTC (permalink / raw)
To: dev; +Cc: thomas.monjalon, John McNamara
Added template release notes for DPDK 16.07 with inline
explanations of the various sections.
Signed-off-by: John McNamara <john.mcnamara@intel.com>
---
doc/guides/rel_notes/index.rst | 1 +
doc/guides/rel_notes/release_16_07.rst | 160 +++++++++++++++++++++++++++++++++
2 files changed, 161 insertions(+)
create mode 100644 doc/guides/rel_notes/release_16_07.rst
diff --git a/doc/guides/rel_notes/index.rst b/doc/guides/rel_notes/index.rst
index 84317b8..52c63b4 100644
--- a/doc/guides/rel_notes/index.rst
+++ b/doc/guides/rel_notes/index.rst
@@ -36,6 +36,7 @@ Release Notes
:numbered:
rel_description
+ release_16_07
release_16_04
release_2_2
release_2_1
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
new file mode 100644
index 0000000..701e827
--- /dev/null
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -0,0 +1,160 @@
+DPDK Release 16.07
+==================
+
+**Read this first.**
+
+The text below explains how to update the release notes.
+
+Use proper spelling, capitalization and punctuation in all sections.
+
+Variable and config names should be quoted as fixed width text: ``LIKE_THIS``.
+
+Build the docs and view the output file to ensure the changes are correct::
+
+ make doc-guides-html
+
+ firefox build/doc/html/guides/rel_notes/release_16_07.html
+
+
+New Features
+------------
+
+This section should contain new features added in this release. Sample format:
+
+* **Add a title in the past tense with a full stop.**
+
+ Add a short 1-2 sentence description in the past tense. The description
+ should be enough to allow someone scanning the release notes to understand
+ the new feature.
+
+ If the feature adds a lot of sub-features you can use a bullet list like this.
+
+ * Added feature foo to do something.
+ * Enhanced feature bar to do something else.
+
+ Refer to the previous release notes for examples.
+
+
+Resolved Issues
+---------------
+
+This section should contain bug fixes added to the relevant sections. Sample format:
+
+* **code/section Fixed issue in the past tense with a full stop.**
+
+ Add a short 1-2 sentence description of the resolved issue in the past tense.
+ The title should contain the code/lib section like a commit message.
+ Add the entries in alphabetic order in the relevant sections below.
+
+
+EAL
+~~~
+
+
+Drivers
+~~~~~~~
+
+
+Libraries
+~~~~~~~~~
+
+
+Examples
+~~~~~~~~
+
+
+Other
+~~~~~
+
+
+Known Issues
+------------
+
+This section should contain new known issues in this release. Sample format:
+
+* **Add title in present tense with full stop.**
+
+ Add a short 1-2 sentence description of the known issue in the present
+ tense. Add information on any known workarounds.
+
+
+API Changes
+-----------
+
+This section should contain API changes. Sample format:
+
+* Add a short 1-2 sentence description of the API change. Use fixed width
+ quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
+
+
+ABI Changes
+-----------
+
+* Add a short 1-2 sentence description of the ABI change that was announced in
+ the previous releases and made in this release. Use fixed width quotes for
+ ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
+
+
+Shared Library Versions
+-----------------------
+
+Update any library version updated in this release and prepend with a ``+`` sign.
+
+The libraries prepended with a plus sign were incremented in this version.
+
+.. code-block:: diff
+
+ libethdev.so.3
+ librte_acl.so.2
+ librte_cfgfile.so.2
+ librte_cmdline.so.2
+ librte_distributor.so.1
+ librte_eal.so.2
+ librte_hash.so.2
+ librte_ip_frag.so.1
+ librte_ivshmem.so.1
+ librte_jobstats.so.1
+ librte_kni.so.2
+ librte_kvargs.so.1
+ librte_lpm.so.2
+ librte_mbuf.so.2
+ librte_mempool.so.1
+ librte_meter.so.1
+ librte_pipeline.so.3
+ librte_pmd_bond.so.1
+ librte_pmd_ring.so.2
+ librte_port.so.2
+ librte_power.so.1
+ librte_reorder.so.1
+ librte_ring.so.1
+ librte_sched.so.1
+ librte_table.so.2
+ librte_timer.so.1
+ librte_vhost.so.2
+
+
+Tested Platforms
+----------------
+
+This section should contain a list of platforms that were tested with this
+release.
+
+The format is:
+
+#. Platform name.
+
+ - Platform details.
+ - Platform details.
+
+
+Tested NICs
+-----------
+
+This section should contain a list of NICs that were tested with this release.
+
+The format is:
+
+#. NIC name.
+
+ - NIC details.
+ - NIC details.
--
2.5.0
^ permalink raw reply [relevance 6%]
* Re: [dpdk-dev] [PATCH] doc: announce ABI change for rte_port_source_params structure
2016-04-07 21:24 4% ` Thomas Monjalon
@ 2016-04-12 12:39 4% ` Thomas Monjalon
0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-04-12 12:39 UTC (permalink / raw)
To: Zhang, Roy Fan; +Cc: dev, Azarewicz, PiotrX T, Singh, Jasvinder
Hi Fan Zhang,
2016-04-07 23:24, Thomas Monjalon:
> Please send a patch to remove NEXT_ABI early in the 16.07 cycle.
Please could you prepare a patch to remove NEXT_ABI from rte_port?
Thanks
^ permalink raw reply [relevance 4%]
* [dpdk-dev] [PATCH v1] doc: add template release notes for 16.11
@ 2016-04-12 12:01 6% John McNamara
0 siblings, 0 replies; 200+ results
From: John McNamara @ 2016-04-12 12:01 UTC (permalink / raw)
To: dev; +Cc: thomas.monjalon, John McNamara
Added template release notes for DPDK 16.11 with inline
explanations of the various sections.
Signed-off-by: John McNamara <john.mcnamara@intel.com>
---
doc/guides/rel_notes/index.rst | 1 +
doc/guides/rel_notes/release_16_11.rst | 160 +++++++++++++++++++++++++++++++++
2 files changed, 161 insertions(+)
create mode 100644 doc/guides/rel_notes/release_16_11.rst
diff --git a/doc/guides/rel_notes/index.rst b/doc/guides/rel_notes/index.rst
index 84317b8..b38a58c 100644
--- a/doc/guides/rel_notes/index.rst
+++ b/doc/guides/rel_notes/index.rst
@@ -36,6 +36,7 @@ Release Notes
:numbered:
rel_description
+ release_16_11
release_16_04
release_2_2
release_2_1
diff --git a/doc/guides/rel_notes/release_16_11.rst b/doc/guides/rel_notes/release_16_11.rst
new file mode 100644
index 0000000..1b2ca1b
--- /dev/null
+++ b/doc/guides/rel_notes/release_16_11.rst
@@ -0,0 +1,160 @@
+DPDK Release 16.11
+==================
+
+**Read this first.**
+
+The text below explains how to update the release notes.
+
+Use proper spelling, capitalization and punctuation in all sections.
+
+Variable and config names should be quoted as fixed width text: ``LIKE_THIS``.
+
+Build the docs and view the output file to ensure the changes are correct::
+
+ make doc-guides-html
+
+ firefox build/doc/html/guides/rel_notes/release_16_04.html
+
+
+New Features
+------------
+
+This section should contain new features added in this release. Sample format:
+
+* **Add a title in the past tense with a full stop.**
+
+ Add a short 1-2 sentence description in the past tense. The description
+ should be enough to allow someone scanning the release notes to understand
+ the new feature.
+
+ If the feature adds a lot of sub-features you can use a bullet list like this.
+
+ * Added feature foo to do something.
+ * Enhanced feature bar to do something else.
+
+ Refer to the previous release notes for examples.
+
+
+Resolved Issues
+---------------
+
+This section should contain bug fixes added to the relevant sections. Sample format:
+
+* **code/section Fixed issue in the past tense with a full stop.**
+
+ Add a short 1-2 sentence description of the resolved issue in the past tense.
+ The title should contain the code/lib section like a commit message.
+ Add the entries in alphabetic order in the relevant sections below.
+
+
+EAL
+~~~
+
+
+Drivers
+~~~~~~~
+
+
+Libraries
+~~~~~~~~~
+
+
+Examples
+~~~~~~~~
+
+
+Other
+~~~~~
+
+
+Known Issues
+------------
+
+This section should contain new known issues in this release. Sample format:
+
+* **Add title in present tense with full stop.**
+
+ Add a short 1-2 sentence description of the known issue in the present
+ tense. Add information on any known workarounds.
+
+
+API Changes
+-----------
+
+This section should contain API changes. Sample format:
+
+* Add a short 1-2 sentence description of the API change. Use fixed width
+ quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
+
+
+ABI Changes
+-----------
+
+* Add a short 1-2 sentence description of the ABI change that was announced in
+ the previous releases and made in this release. Use fixed width quotes for
+ ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
+
+
+Shared Library Versions
+-----------------------
+
+Update any library version updated in this release and prepend with a ``+`` sign.
+
+The libraries prepended with a plus sign were incremented in this version.
+
+.. code-block:: diff
+
+ libethdev.so.3
+ librte_acl.so.2
+ librte_cfgfile.so.2
+ librte_cmdline.so.2
+ librte_distributor.so.1
+ librte_eal.so.2
+ librte_hash.so.2
+ librte_ip_frag.so.1
+ librte_ivshmem.so.1
+ librte_jobstats.so.1
+ librte_kni.so.2
+ librte_kvargs.so.1
+ librte_lpm.so.2
+ librte_mbuf.so.2
+ librte_mempool.so.1
+ librte_meter.so.1
+ librte_pipeline.so.3
+ librte_pmd_bond.so.1
+ librte_pmd_ring.so.2
+ librte_port.so.2
+ librte_power.so.1
+ librte_reorder.so.1
+ librte_ring.so.1
+ librte_sched.so.1
+ librte_table.so.2
+ librte_timer.so.1
+ librte_vhost.so.2
+
+
+Tested Platforms
+----------------
+
+This section should contain a list of platforms that were tested with this
+release.
+
+The format is:
+
+#. Platform name.
+
+ - Platform details.
+ - Platform details.
+
+
+Tested NICs
+-----------
+
+This section should contain a list of NICs that were tested with this release.
+
+The format is:
+
+#. NIC name.
+
+ - NIC details.
+ - NIC details.
--
2.5.0
^ permalink raw reply [relevance 6%]
* Re: [dpdk-dev] [PATCH] vhost: ABI/API change announcement due to refactor
2016-04-10 9:58 4% ` Thomas Monjalon
@ 2016-04-10 10:02 4% ` Thomas Monjalon
0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-04-10 10:02 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: Panu Matilainen, dev, huawei.xie, Ilya Maximets
> > > +* A librte_vhost public structures refactor is planned for DPDK 16.07
> > > + that requires both ABI and API change.
> > > + The proposed refactor would expose DPDK vhost dev to applications as
> > > + a handle, like the way kernel exposes an fd to user for locating a
> > > + specific file, and to keep all major structures internally, so that
> > > + we are likely to be free from ABI violations in future.
> >
> > Acked-by: Panu Matilainen <pmatilai@redhat.com>
> >
> > I applaud the initiative, public structs are by far the worst offender
> > when trying to maintain a stable ABI because they're so hard to
> > correctly version that hardly anybody besides glibc bothers.
>
> Yes, nice cleanup to do.
>
> Acked-by: Thomas Monjalon <thomas.monjalon@6wind.com>
Applied, thanks
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [PATCH] vhost: ABI/API change announcement due to refactor
2016-04-07 7:12 7% ` Panu Matilainen
@ 2016-04-10 9:58 4% ` Thomas Monjalon
2016-04-10 10:02 4% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-04-10 9:58 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: Panu Matilainen, dev, huawei.xie, Ilya Maximets
2016-04-07 10:12, Panu Matilainen:
> On 04/06/2016 09:53 AM, Yuanhan Liu wrote:
> > +* A librte_vhost public structures refactor is planned for DPDK 16.07
> > + that requires both ABI and API change.
> > + The proposed refactor would expose DPDK vhost dev to applications as
> > + a handle, like the way kernel exposes an fd to user for locating a
> > + specific file, and to keep all major structures internally, so that
> > + we are likely to be free from ABI violations in future.
>
> Acked-by: Panu Matilainen <pmatilai@redhat.com>
>
> I applaud the initiative, public structs are by far the worst offender
> when trying to maintain a stable ABI because they're so hard to
> correctly version that hardly anybody besides glibc bothers.
Yes, nice cleanup to do.
Acked-by: Thomas Monjalon <thomas.monjalon@6wind.com>
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [PATCH] doc: announce ABI changes for user-owned mempool caches
2016-04-08 14:01 4% ` Hunt, David
@ 2016-04-10 9:55 4% ` Thomas Monjalon
0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-04-10 9:55 UTC (permalink / raw)
To: Lazaros Koromilas; +Cc: dev, Hunt, David, Olivier Matz
> >> Deprecation notice for 16.04 for changes targeting release 16.07.
> >> The changes affect struct rte_mempool, rte_mempool_cache and the
> >> mempool API.
> >>
> >> Signed-off-by: Lazaros Koromilas <l@nofutznetworks.com>
> > Acked-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: David Hunt<david.hunt@intel.com>
It is the fourth change announced for rte_mempool in 16.07.
Applied, thanks
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [PATCH] doc: announce ABI changes for user-owned mempool caches
@ 2016-04-08 14:01 4% ` Hunt, David
2016-04-10 9:55 4% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Hunt, David @ 2016-04-08 14:01 UTC (permalink / raw)
To: Olivier Matz, Lazaros Koromilas, dev
On 4/5/2016 4:42 PM, Olivier Matz wrote:
> On 04/05/2016 11:23 AM, Lazaros Koromilas wrote:
>> Deprecation notice for 16.04 for changes targeting release 16.07.
>> The changes affect struct rte_mempool, rte_mempool_cache and the
>> mempool API.
>>
>> Signed-off-by: Lazaros Koromilas <l@nofutznetworks.com>
> Acked-by: Olivier Matz <olivier.matz@6wind.com>
>
Acked-by: David Hunt<david.hunt@intel.com>
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] On DPDK ABI policy
2016-04-07 11:51 9% ` [dpdk-dev] On DPDK ABI policy Panu Matilainen
2016-04-07 21:52 4% ` Matthew Hall
@ 2016-04-08 8:47 9% ` Marc Sune
1 sibling, 0 replies; 200+ results
From: Marc Sune @ 2016-04-08 8:47 UTC (permalink / raw)
To: Panu Matilainen, Matthew Hall; +Cc: Thomas Monjalon, dev
2016-04-07 13:51 GMT+02:00 Panu Matilainen <pmatilai@redhat.com>:
> [ change of subject since this is about ABI policy, not namespacing ]
>
> On 04/07/2016 01:16 PM, Marc Sune wrote:
>
>>
>>
>> 2016-04-07 11:33 GMT+02:00 Panu Matilainen <pmatilai@redhat.com
>> <mailto:pmatilai@redhat.com>>:
>>
>> On 04/07/2016 12:18 PM, Thomas Monjalon wrote:
>>
>> Thank you everyone for the feedbacks.
>>
>> 2016-04-05 15:56, Thomas Monjalon:
>>
>> The goal of this email is to get some feedback on how
>> important it is
>> to fix the DPDK namespace.
>>
>>
>> Everybody agree every symbols must be prefixed. Checking and
>> fixing the
>> namespace consistency will be in the roadmap.
>>
>> It seems most of you agree renaming would be a nice improvement
>> but not
>> so important.
>> The main drawback is the induced backporting pain, even if we have
>> some scripts to convert the patches to the old namespace.
>> Note: the backports can be in DPDK itself or in the applications.
>>
>> If there is enough agreement that we should do something, I
>> suggest to
>> introduce the "dpdk_" prefix slowly and live with both
>> "rte_" and "dpdk_"
>> during some time.
>> We could start using the new prefix for the new APIs
>> (example: crypto)
>> or when there is a significant API break (example: mempool).
>>
>>
>> The slow change has been clearly rejected in favor of a complete
>> change
>> in one patch.
>> The timing was also discussed as it could impact the pending
>> patches.
>> So it would be done at the end or the beginning of a release.
>> Marc suggests to do it for 16.04 as the numbering scheme has
>> changed.
>>
>>
>> Just noting that it cannot be done in 16.04 because the ABI policy
>> requires a deprecation cycle of at least one major release for every
>> breakage. And we're discussing a total 100% breakage of everything
>> here, even if its just a simple rename.
>>
>>
>> I keep not understanding the ABI policy, and particularly why ABI
>> changes have to be announced once cycle before _if_ there is already at
>> least one ABI change proposed. DPDK applications will have to recompile
>> anyway.
>>
>
> The point is to allow API/ABI consumers to assess in advance what sort of
> pains can they expect when moving their applications from one version to
> another. Otherwise all sorts of massive changes could ride the wave of
> whatever "change 16bit struct member to 32bit" trivialities that are
> nevertheless ABI breaks.
>
> There have already been quite a few exceptions to the rule when the ABI is
> already being broken, so its not entirely rigid. Another point that migth
> warrant some tweaking to the policy is the "core" libraries depending on
> each other so an ABI break in any one of them forces recompile of
> everything anyway.
>
In addition to what Matthew said:
I don't understand which sort of pains an announcement saying "we are going
to change this structure and this other, for those high level reasons, but
we don't know exactly how yet, because it is not fully implemented" can be
of any help to a DPDK user. At least it does not to me. This information
has to be in the release notes and users can read that before deciding to
upgrade to a new release.
On the other side, bug fixes still go to NEXT_VERSION. So in 1 ouf ot 2
cases (so far), next release is breaking ABI, so you will have to anyway
recompile your application.
And about ABI breakages; DPDK is not a standard library/library set. For
performance reasons it has a lot of inline code and other optimizations, so
even for small bug fixings can brake ABI, or to be precise, some bug fixes
in inline functions may be silently ignored. I don't know how many users
really use dynamic libraries for DPDK and if there is some warning
somewhere in the documentation for that.
> This aspect of the policy only slows down DPDK development and it
>>
>
> One could also think that slowing down development and forcing people to
> think ahead are not entirely unintentional or unwanted side-effects :)
>
> Look at the latest librte_vhost initiative to remove unnecessarily exposed
> structs to avoid having to deal with ABI breakages all the time: the policy
> is effectively encouraging people into better library design.
>
> pollutes the repository with commits announcing ABI changes that are
>> irrelevant after 2 cycles, as (code) diffs show that already (not
>> mentioning NEXT_ABI complexity and extra LOCs).
>>
>
> Fully agreed on NEXT_ABI, I never liked it at all.
>
> Maintaining LTS releases, and enforcing bug fixing in old LTS first,
>> upstreaming bugfixes is to me a much better approach to solve backwards
>> compatibility issues.
>>
>
> LTS releases could help the situation somewhat, but then again people tend
> to still want those new fancy things backported (you know, have the cake
> and eat it too) but that can't be done because of ABI breakage, so they're
> forced to run the latest version anyway.
> But this is probably another discussion.
>>
>
> Yup, changed subject to avoid mixing it up with the namespace discussion
> too much.
Yes, thanks for that
Marc
>
>
> - Panu -
>
>
^ permalink raw reply [relevance 9%]
* Re: [dpdk-dev] On DPDK ABI policy
2016-04-07 21:52 4% ` Matthew Hall
@ 2016-04-08 8:29 4% ` Marc Sune
0 siblings, 0 replies; 200+ results
From: Marc Sune @ 2016-04-08 8:29 UTC (permalink / raw)
To: Matthew Hall; +Cc: Panu Matilainen, Thomas Monjalon, dev
2016-04-07 23:52 GMT+02:00 Matthew Hall <mhall@mhcomputing.net>:
> On Thu, Apr 07, 2016 at 02:51:35PM +0300, Panu Matilainen wrote:
> > LTS releases could help the situation somewhat, but then again
> > people tend to still want those new fancy things backported (you
> > know, have the cake and eat it too) but that can't be done because
> > of ABI breakage, so they're forced to run the latest version anyway.
>
> RH and Debian / Ubuntu don't put features in LTS except extremely rarely.
> Generally only if there's severe functionality breakage or security issues
> and
> the rest is ignored, and for good reason, as this is much more reliable and
> simple and predictable.
>
> If people are so irrational they can't deal with that simple of a policy,
> NEXT_ABI, LTS, etc. is never going to help them.
>
> If people like to have backported stuff, yes of course we can make trees
> and
> branches for this, they are basically free in Git. But at that point
> community
> people in need of LTS forks of features need to step up to the plate to
> help
> out.
>
Completely agree.
Marc
>
> Matthew.
>
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] On DPDK ABI policy
2016-04-07 11:51 9% ` [dpdk-dev] On DPDK ABI policy Panu Matilainen
@ 2016-04-07 21:52 4% ` Matthew Hall
2016-04-08 8:29 4% ` Marc Sune
2016-04-08 8:47 9% ` Marc Sune
1 sibling, 1 reply; 200+ results
From: Matthew Hall @ 2016-04-07 21:52 UTC (permalink / raw)
To: Panu Matilainen; +Cc: Marc Sune, Thomas Monjalon, dev
On Thu, Apr 07, 2016 at 02:51:35PM +0300, Panu Matilainen wrote:
> LTS releases could help the situation somewhat, but then again
> people tend to still want those new fancy things backported (you
> know, have the cake and eat it too) but that can't be done because
> of ABI breakage, so they're forced to run the latest version anyway.
RH and Debian / Ubuntu don't put features in LTS except extremely rarely.
Generally only if there's severe functionality breakage or security issues and
the rest is ignored, and for good reason, as this is much more reliable and
simple and predictable.
If people are so irrational they can't deal with that simple of a policy,
NEXT_ABI, LTS, etc. is never going to help them.
If people like to have backported stuff, yes of course we can make trees and
branches for this, they are basically free in Git. But at that point community
people in need of LTS forks of features need to step up to the plate to help
out.
Matthew.
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] DPDK namespace
2016-04-07 10:16 5% ` Marc Sune
2016-04-07 11:51 9% ` [dpdk-dev] On DPDK ABI policy Panu Matilainen
@ 2016-04-07 21:48 0% ` Matthew Hall
1 sibling, 0 replies; 200+ results
From: Matthew Hall @ 2016-04-07 21:48 UTC (permalink / raw)
To: Marc Sune; +Cc: Panu Matilainen, Thomas Monjalon, dev, techboard
On Thu, Apr 07, 2016 at 12:16:34PM +0200, Marc Sune wrote:
> I keep not understanding the ABI policy, and particularly why ABI changes
> have to be announced once cycle before _if_ there is already at least one
> ABI change proposed. DPDK applications will have to recompile anyway.
>
> This aspect of the policy only slows down DPDK development and it pollutes
> the repository with commits announcing ABI changes that are irrelevant
> after 2 cycles, as (code) diffs show that already (not mentioning NEXT_ABI
> complexity and extra LOCs).
>
> Maintaining LTS releases, and enforcing bug fixing in old LTS first,
> upstreaming bugfixes is to me a much better approach to solve backwards
> compatibility issues.
>
> But this is probably another discussion.
Yes, separate discussion. But I agree 100,000%. As a community member in my
spare time I get tripped up by NEXT_ABI pollution just trying to submit
trivial patches all the time, then I don't really have any good idea how to
fix it, and I have to annoy Thomas with dumb questions across the time zones.
I would really prefer to dump all the drama about ABIs and make a maintenance
only LTS release which only gets bug fixes people specifically need and not
random fixes or features.
Matthew.
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] doc: announce ABI change for rte_port_source_params structure
2016-04-06 8:51 4% ` Azarewicz, PiotrX T
@ 2016-04-07 21:24 4% ` Thomas Monjalon
2016-04-12 12:39 4% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-04-07 21:24 UTC (permalink / raw)
To: Zhang, Roy Fan; +Cc: dev, Azarewicz, PiotrX T, Singh, Jasvinder
> > > Several new fields will be added to structure rte_port_source_params
> > > for source port enhancement with pcap file reading support.
> > >
> > > Signed-off-by: Fan Zhang <roy.fan.zhang@intel.com>
> > > Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> >
> > Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
>
> Acked-by: Piotr Azarewicz <piotrx.t.azarewicz@intel.com>
Applied, thanks
Please send a patch to remove NEXT_ABI early in the 16.07 cycle.
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [PATCH] doc: announce API changes for device objects
2016-04-07 17:09 3% ` Jan Viktorin
@ 2016-04-07 17:24 0% ` David Marchand
0 siblings, 0 replies; 200+ results
From: David Marchand @ 2016-04-07 17:24 UTC (permalink / raw)
To: Jan Viktorin; +Cc: dev, Olivier Matz, Thomas Monjalon
On Thu, Apr 7, 2016 at 7:09 PM, Jan Viktorin <viktorin@rehivetech.com> wrote:
> On Thu, 7 Apr 2016 19:00:43 +0200
> David Marchand <david.marchand@6wind.com> wrote:
>> >> Following discussions with Jan, here is a deprecation notice to prepare for
>> >> hotplug and rte_device changes to come in 16.07.
>> > As a result, the current rte_driver structure will be renamed to
>> > rte_module and probably reworked in some way due to its semantics and
>> > potential name clash with the new rte_driver struct.
>>
>> If we just introduce some macros like RTE_MODULE_INIT() /
>> RTE_MODULE_EXIT(), we don't need a rte_module object at the moment ?
>>
>
> Well, possibly, we don't need it. At least, it might be hidden and not
> being a part of the API/ABI. Do you need an ack for this?
Yes, please, the process requires 3 acks for this kind of changes.
--
David Marchand
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] doc: announce API changes for device objects
2016-04-07 17:00 0% ` David Marchand
@ 2016-04-07 17:09 3% ` Jan Viktorin
2016-04-07 17:24 0% ` David Marchand
0 siblings, 1 reply; 200+ results
From: Jan Viktorin @ 2016-04-07 17:09 UTC (permalink / raw)
To: David Marchand; +Cc: dev, Olivier Matz, Thomas Monjalon
On Thu, 7 Apr 2016 19:00:43 +0200
David Marchand <david.marchand@6wind.com> wrote:
> On Thu, Apr 7, 2016 at 5:46 PM, Jan Viktorin <viktorin@rehivetech.com> wrote:
> > On Thu, 7 Apr 2016 17:33:17 +0200
> > David Marchand <david.marchand@6wind.com> wrote:
> >
> >> Following discussions with Jan, here is a deprecation notice to prepare for
> >> hotplug and rte_device changes to come in 16.07.
> >>
> >> Signed-off-by: David Marchand <david.marchand@6wind.com>
> >> ---
> >> doc/guides/rel_notes/deprecation.rst | 12 ++++++++++++
> >> 1 file changed, 12 insertions(+)
> >>
> >> diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
> >> index 98d5529..d749e5d 100644
> >> --- a/doc/guides/rel_notes/deprecation.rst
> >> +++ b/doc/guides/rel_notes/deprecation.rst
> >> @@ -8,6 +8,18 @@ API and ABI deprecation notices are to be posted here.
> >> Deprecation Notices
> >> -------------------
> >>
> >> +* The ethdev hotplug API is going to be moved to EAL with a notification
> >> + mechanism added to crypto and ethdev libraries so that hotplug is now
> >> + available to both of them. This API will be stripped of the device arguments
> >> + so that it only cares about hotplugging.
> >> +
> >> +* Structures embodying pci and vdev devices are going to be reworked to
> >> + integrate new common rte_device / rte_driver objects (see
> >> + http://dpdk.org/ml/archives/dev/2016-January/031390.html).
> >> + ethdev and crypto libraries will then only handle those objects so that they
> >> + do not need to care about the kind of devices that are being used, making it
> >> + easier to add new buses later.
> >
> > As a result, the current rte_driver structure will be renamed to
> > rte_module and probably reworked in some way due to its semantics and
> > potential name clash with the new rte_driver struct.
>
> If we just introduce some macros like RTE_MODULE_INIT() /
> RTE_MODULE_EXIT(), we don't need a rte_module object at the moment ?
>
Well, possibly, we don't need it. At least, it might be hidden and not
being a part of the API/ABI. Do you need an ack for this?
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH] doc: announce API changes for device objects
2016-04-07 15:46 0% ` Jan Viktorin
@ 2016-04-07 17:00 0% ` David Marchand
2016-04-07 17:09 3% ` Jan Viktorin
0 siblings, 1 reply; 200+ results
From: David Marchand @ 2016-04-07 17:00 UTC (permalink / raw)
To: Jan Viktorin; +Cc: dev, Olivier Matz, Thomas Monjalon
On Thu, Apr 7, 2016 at 5:46 PM, Jan Viktorin <viktorin@rehivetech.com> wrote:
> On Thu, 7 Apr 2016 17:33:17 +0200
> David Marchand <david.marchand@6wind.com> wrote:
>
>> Following discussions with Jan, here is a deprecation notice to prepare for
>> hotplug and rte_device changes to come in 16.07.
>>
>> Signed-off-by: David Marchand <david.marchand@6wind.com>
>> ---
>> doc/guides/rel_notes/deprecation.rst | 12 ++++++++++++
>> 1 file changed, 12 insertions(+)
>>
>> diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
>> index 98d5529..d749e5d 100644
>> --- a/doc/guides/rel_notes/deprecation.rst
>> +++ b/doc/guides/rel_notes/deprecation.rst
>> @@ -8,6 +8,18 @@ API and ABI deprecation notices are to be posted here.
>> Deprecation Notices
>> -------------------
>>
>> +* The ethdev hotplug API is going to be moved to EAL with a notification
>> + mechanism added to crypto and ethdev libraries so that hotplug is now
>> + available to both of them. This API will be stripped of the device arguments
>> + so that it only cares about hotplugging.
>> +
>> +* Structures embodying pci and vdev devices are going to be reworked to
>> + integrate new common rte_device / rte_driver objects (see
>> + http://dpdk.org/ml/archives/dev/2016-January/031390.html).
>> + ethdev and crypto libraries will then only handle those objects so that they
>> + do not need to care about the kind of devices that are being used, making it
>> + easier to add new buses later.
>
> As a result, the current rte_driver structure will be renamed to
> rte_module and probably reworked in some way due to its semantics and
> potential name clash with the new rte_driver struct.
If we just introduce some macros like RTE_MODULE_INIT() /
RTE_MODULE_EXIT(), we don't need a rte_module object at the moment ?
--
David Marchand
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH v1] doc: fix release notes for 16.04
@ 2016-04-07 16:02 8% John McNamara
0 siblings, 0 replies; 200+ results
From: John McNamara @ 2016-04-07 16:02 UTC (permalink / raw)
To: dev; +Cc: thomas.monjalon, John McNamara
Fix grammar, spelling and formatting of DPDK 16.04 release notes.
Signed-off-by: John McNamara <john.mcnamara@intel.com>
---
doc/guides/rel_notes/release_16_04.rst | 266 +++++++++++++--------------------
1 file changed, 104 insertions(+), 162 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_04.rst b/doc/guides/rel_notes/release_16_04.rst
index e293960..200053c 100644
--- a/doc/guides/rel_notes/release_16_04.rst
+++ b/doc/guides/rel_notes/release_16_04.rst
@@ -2,39 +2,9 @@ DPDK Release 16.04
==================
-**Read this first**
-
-The text below explains how to update the release notes.
-
-Use proper spelling, capitalization and punctuation in all sections.
-
-Variable and config names should be quoted as fixed width text: ``LIKE_THIS``.
-
-Build the docs and view the output file to ensure the changes are correct::
-
- make doc-guides-html
-
- firefox build/doc/html/guides/rel_notes/release_16_04.html
-
-
New Features
------------
-This section should contain new features added in this release. Sample format:
-
-* **Add a title in the past tense with a full stop.**
-
- Add a short 1-2 sentence description in the past tense. The description
- should be enough to allow someone scanning the release notes to understand
- the new feature.
-
- If the feature adds a lot of sub-features you can use a bullet list like this.
-
- * Added feature foo to do something.
- * Enhanced feature bar to do something else.
-
- Refer to the previous release notes for examples.
-
* **Added function to check primary process state.**
A new function ``rte_eal_primary_proc_alive()`` has been added
@@ -45,141 +15,145 @@ This section should contain new features added in this release. Sample format:
* **Enabled bulk allocation of mbufs.**
A new function ``rte_pktmbuf_alloc_bulk()`` has been added to allow the user
- to allocate a bulk of mbufs.
+ to bulk allocate mbufs.
* **Added device link speed capabilities.**
- The structure ``rte_eth_dev_info`` has now a ``speed_capa`` bitmap, which
- allows the application to know the supported speeds of each device.
+ The structure ``rte_eth_dev_info`` now has a ``speed_capa`` bitmap, which
+ allows the application to determine the supported speeds of each device.
* **Added bitmap of link speeds to advertise.**
- Allow defining a set of advertised speeds for auto-negotiation,
+ Added a feature to allow the definition of a set of advertised speeds for auto-negotiation,
explicitly disabling link auto-negotiation (single speed)
and full auto-negotiation.
* **Added new poll-mode driver for Amazon Elastic Network Adapters (ENA).**
- The driver operates variety of ENA adapters through feature negotiation
+ The driver operates for a variety of ENA adapters through feature negotiation
with the adapter and upgradable commands set.
- ENA driver handles PCI Physical and Virtual ENA functions.
+ The ENA driver handles PCI Physical and Virtual ENA functions.
-* **Restored vmxnet3 Tx data ring.**
+* **Restored vmxnet3 TX data ring.**
- Tx data ring has been shown to improve small pkt forwarding performance
- on vSphere environment.
+ TX data ring has been shown to improve small packet forwarding performance
+ on the vSphere environment.
-* **Added vmxnet3 Tx L4 checksum offload.**
+* **Added vmxnet3 TX L4 checksum offload.**
- Support TCP/UDP checksum offload.
+ Added support for TCP/UDP checksum offload to vmxnet3.
* **Added vmxnet3 TSO support.**
+ Added support for TSO to vmxnet3.
+
* **Added vmxnet3 support for jumbo frames.**
Added support for linking multi-segment buffers together to
handle Jumbo packets.
-* **Virtio 1.0.**
+* **Enabled Virtio 1.0 support.**
- Enabled virtio 1.0 support for virtio pmd driver.
+ Enabled Virtio 1.0 support for Virtio pmd driver.
-* **Supported virtio for ARM.**
+* **Supported Virtio for ARM.**
- Enabled virtio support for armv7/v8. Tested for arm64.
- Virtio for arm support VFIO-noiommu mode only.
- Virtio can work with other non-x86 arch too like powerpc.
+ Enabled Virtio support for ARMv7/v8. Tested for ARM64.
+ Virtio for ARM supports VFIO-noiommu mode only.
+ Virtio can work with other non-x86 architectures as well, like PowerPC.
-* **Supported virtio offload in vhost-user.**
+* **Supported Virtio offload in vhost-user.**
- Add the offload and negotiation of checksum and TSO between vhost-user and
- vanilla Linux virtio guest.
+ Added the offload and negotiation of checksum and TSO between vhost-user and
+ vanilla Linux Virtio guest.
* **Added vhost-user live migration support.**
* **Added vhost driver.**
- Added virtual PMD that wraps librte_vhost.
+ Added a virtual PMD that wraps ``librte_vhost``.
* **Added multicast promiscuous mode support on VF for ixgbe.**
- Added multicast promiscuous mode support on ixgbe VF driver. So all the VFs
+ Added multicast promiscuous mode support for the ixgbe VF driver so all VFs
can receive the multicast packets.
- Please note if we want to use this promiscuous mode, we need both PF and VF
- driver to support it. The reason is this VF feature is configured on PF.
- If use kernel PF driver + dpdk VF driver, make sure kernel PF driver support
- VF multicast promiscuous mode. If use dpdk PF + dpdk VF, better make sure PF
- driver is the same version as VF.
+ Please note if you want to use this promiscuous mode, you need both PF and VF
+ driver to support it. The reason is that this VF feature is configured in the PF.
+ If you use kernel PF driver and the dpdk VF driver, make sure the kernel PF driver supports
+ VF multicast promiscuous mode. If you use dpdk PF and dpdk VF ensure the PF
+ driver is the same version as the VF.
* **Added support for E-tag on X550.**
- E-tag is defined in 802.1br. Please reference
- http://www.ieee802.org/1/pages/802.1br.html.
+ E-tag is defined in `802.1BR - Bridge Port Extension <http://www.ieee802.org/1/pages/802.1br.html>`_.
- This feature is for VF, but the settings are on PF. It means
- the CLIs should be used on PF, but some of their effects will be shown on VF.
- The forwarding of E-tag packets based on GRP and E-CID_base will have effect
- on PF. Theoretically the E-tag packets can be forwarded to any pool/queue.
- But normally we'd like to forward the packets to the pools/queues belonging
- to the VFs. And E-tag insertion and stripping will have effect on VFs. When
- VF receives E-tag packets, it should strip the E-tag. When VF transmits
- packets, it should insert the E-tag. Both can be offloaded.
+ This feature is for the VF, but the settings are on the PF. It means
+ the CLIs should be used on the PF, but some of their effects will be shown on the VF.
+ The forwarding of E-tag packets based on GRP and E-CID_base will have an effect
+ on the PF. Theoretically, the E-tag packets can be forwarded to any pool/queue
+ but normally we'd like to forward the packets to the pools/queues belonging
+ to the VFs. And E-tag insertion and stripping will have an effect on VFs. When
+ a VF receives E-tag packets it should strip the E-tag. When the VF transmits
+ packets, it should insert the E-tag. Both actions can be offloaded.
When we want to use this E-tag support feature, the forwarding should be
- enabled to forward the packets received by PF to indicated VFs. And insertion
- and stripping should be enabled for VFs to offload the effort to HW.
+ enabled to forward the packets received by the PF to the indicated VFs. And insertion
+ and stripping should be enabled for VFs to offload the effort to hardware.
+
+ Features added:
* Support E-tag offloading of insertion and stripping.
* Support Forwarding E-tag packets to pools based on
GRP and E-CID_base.
-* **Added support for VxLAN & NVGRE checksum off-load on X550.**
+* **Added support for VxLAN and NVGRE checksum off-load on X550.**
- * Added support for VxLAN & NVGRE RX/TX checksum off-load on
+ * Added support for VxLAN and NVGRE RX/TX checksum off-load on
X550. RX/TX checksum off-load is provided on both inner and
outer IP header and TCP header.
* Added functions to support VxLAN port configuration. The
default VxLAN port number is 4789 but this can be updated
programmatically.
-* **Added new X550EM_a devices.**
+* **Added support for new X550EM_a devices.**
- Added new X550EM_a devices and their mac types, X550EM_a and X550EM_a_vf.
- Updated the code to use the new devices and mac types.
+ Added support for new X550EM_a devices and their MAC types, X550EM_a and X550EM_a_vf.
+ Updated the relevant PMD to use the new devices and MAC types.
* **Added x550em_x V2 device support.**
- Only x550em_x V1 was supported before. Now V2 is supported.
+ Added support for x550em_x V2 device. Only x550em_x V1 was supported before.
A mask for V1 and V2 is defined and used to support both.
* **Supported link speed auto-negotiation on X550EM_X**
- Normally the auto-negotiation is supported by FW. SW need not care about
- that. But on x550em_x, FW doesn't support auto-neg. As the ports of x550em_x
- are 10G, if we connect the port with a peer which is 1G, the link will always
+ Normally the auto-negotiation is supported by firmware and software doesn't care about
+ it. But on x550em_x, firmware doesn't support auto-negotiation. As the ports of x550em_x
+ are 10GbE, if we connect the port with a peer which is 1GbE, the link will always
be down.
- We added the support of auto-neg by SW to avoid this link down issue.
+ We added the support for auto-negotiation by software to avoid this link down issue.
-* **Added sw-firmware sync on X550EM_a.**
+* **Added software-firmware sync on X550EM_a.**
- Added support for sw-firmware sync for resource sharing.
- Use the PHY token, shared between sw-fw for PHY access on X550EM_a.
+ Added support for software-firmware sync for resource sharing.
+ Use the PHY token, shared between software-firmware for PHY access on X550EM_a.
* **Updated the i40e base driver.**
The i40e base driver was updated with changes including the
following:
- * Use Rx control AQ commands to read/write Rx control registers.
+ * Use RX control AQ commands to read/write RX control registers.
* Add new X722 device IDs, and removed X710 one was never used.
* Expose registers for HASH/FD input set configuring.
* **Enabled PCI extended tag for i40e.**
- It enabled extended tag by checking and writing corresponding PCI config
- space bytes, to boost the performance. In the meanwhile, it deprecated the
- legacy way via reading/writing sysfile supported by kernel module igb_uio.
+ Enabled extended tag for i40e by checking and writing corresponding PCI config
+ space bytes, to boost the performance.
+ The legacy method of reading/writing sysfile supported by kernel module igb_uio
+ is now deprecated.
* **Added i40e support for setting mac addresses.**
@@ -197,22 +171,22 @@ This section should contain new features added in this release. Sample format:
* **Added PF reset event reporting in i40e VF driver.**
-* **Added fm10k Rx interrupt support.**
+* **Added fm10k RX interrupt support.**
-* **Optimized fm10k Tx.**
+* **Optimized fm10k TX.**
- * Free multiple mbufs at a time to reduce freeing mbuf cycles.
+ Optimized fm10k TX by freeing multiple mbufs at a time.
-* **Handled error flags in fm10k vector Rx.**
+* **Handled error flags in fm10k vector RX.**
- Parse err flags in Rx desc and set error bits in mbuf with vector instructions.
+ Parse error flags in RX descriptor and set error bits in mbuf with vector instructions.
* **Added fm10k FTAG based forwarding support.**
* **Added mlx5 flow director support.**
- Added flow director support (RTE_FDIR_MODE_PERFECT and
- RTE_FDIR_MODE_PERFECT_MAC_VLAN).
+ Added flow director support (``RTE_FDIR_MODE_PERFECT`` and
+ ``RTE_FDIR_MODE_PERFECT_MAC_VLAN``).
Only available with Mellanox OFED >= 3.2.
@@ -238,7 +212,7 @@ This section should contain new features added in this release. Sample format:
* **Added mlx5 optional packet padding by HW.**
- Added an option to make PCI bus transactions rounded to multiple of a
+ Added an option to make PCI bus transactions rounded to a multiple of a
cache line size for better alignment.
Only available with Mellanox OFED >= 3.2.
@@ -249,10 +223,10 @@ This section should contain new features added in this release. Sample format:
Only available with Mellanox OFED >= 3.2.
-* **Changed szedata2 type of driver from vdev to pdev.**
+* **Changed szedata2 driver type from vdev to pdev.**
Previously szedata2 device had to be added by ``--vdev`` option.
- Now szedata2 PMD recognises the device automatically during EAL
+ Now szedata2 PMD recognizes the device automatically during EAL
initialization.
* **Added szedata2 functions for setting link up/down.**
@@ -261,17 +235,17 @@ This section should contain new features added in this release. Sample format:
* **Added af_packet dynamic removal function.**
- Af_packet device can now be detached using API, like other PMD devices.
+ An af_packet device can now be detached using the API, like other PMD devices.
* **Increased number of next hops for LPM IPv4 to 2^24.**
- The next_hop field is extended from 8 bits to 24 bits for IPv4.
+ The ``next_hop`` field has been extended from 8 bits to 24 bits for IPv4.
* **Added support of SNOW 3G (UEA2 and UIA2) for Intel Quick Assist devices.**
- Enabled support for SNOW 3G wireless algorithm for Intel Quick Assist devices.
- Support for cipher only, hash only is also provided
- along with alg-chaining operations.
+ Enabled support for the SNOW 3G wireless algorithm for Intel Quick Assist devices.
+ Support for cipher-only and hash-only is also provided
+ along with algorithm-chaining operations.
* **Added SNOW3G SW PMD.**
@@ -281,31 +255,29 @@ This section should contain new features added in this release. Sample format:
* **Added AES GCM PMD.**
Added new Crypto PMD to support AES-GCM authenticated encryption and
- authenticated decryption in SW.
+ authenticated decryption in software.
* **Added NULL Crypto PMD**
- Added new Crypto PMD to support null crypto operations in SW.
+ Added new Crypto PMD to support null crypto operations in software.
* **Improved IP Pipeline Application.**
The following features have been added to ip_pipeline application;
* Added CPU utilization measurement and idle cycle rate computation.
- * Added link idenfication support through existing port-mask option or by
+ * Added link identification support through existing port-mask option or by
specifying PCI device in every LINK section in the configuration file.
* Added load balancing support in passthrough pipeline.
* **Added IPsec security gateway example.**
- New application implementing an IPsec Security Gateway.
+ Added a new application implementing an IPsec Security Gateway.
Resolved Issues
---------------
-This section should contain bug fixes added to the relevant sections. Sample format:
-
* **code/section: Fixed issue in the past tense with a full stop.**
Add a short 1-2 sentence description of the resolved issue in the past tense.
@@ -313,21 +285,17 @@ This section should contain bug fixes added to the relevant sections. Sample for
Add the entries in alphabetic order in the relevant sections below.
-EAL
-~~~
-
-
Drivers
~~~~~~~
* **ethdev: Fixed overflow for 100Gbps.**
- 100Gbps in Mbps (100000) was exceeding 16-bit max value of ``link_speed``
+ 100Gbps in Mbps (100000) was exceeding the 16-bit max value of ``link_speed``
in ``rte_eth_link``.
* **ethdev: Fixed byte order consistency between fdir flow and mask.**
- Fixed issue in ethdev library that the structure for setting
+ Fixed issue in ethdev library where the structure for setting
fdir's mask and flow entry was not consistent in byte ordering.
* **cxgbe: Fixed crash due to incorrect size allocated for RSS table.**
@@ -338,12 +306,12 @@ Drivers
* **cxgbe: Fixed setting wrong device MTU.**
- Fixed an incorrect device MTU being set due to ethernet header and
+ Fixed an incorrect device MTU being set due to the Ethernet header and
CRC lengths being added twice.
* **ixgbe: Fixed zeroed VF mac address.**
- Resolved an issue where VF mac address is zeroed out in cases where the VF
+ Resolved an issue where the VF MAC address is zeroed out in cases where the VF
driver is loaded while the PF interface is down.
The solution is to only set it when we get an ACK from the PF.
@@ -370,7 +338,7 @@ Drivers
It generates a MAC address for each VFs during PF host initialization,
and keeps the VF MAC address the same among different VF launch.
-* **i40e: Fixed failure of reading/writing Rx control registers.**
+* **i40e: Fixed failure of reading/writing RX control registers.**
Fixed i40e issue of failing to read/write rx control registers when
under stress with traffic, which might result in application launch
@@ -378,14 +346,14 @@ Drivers
* **i40e: Enabled vector driver by default.**
- Previously, vector driver is disabled by default as it cannot fill packet type
- info for l3fwd to work well. Now there is an option for l3fwd to analysis
- packet type softly, so enable vector driver by default.
+ Previously, vector driver was disabled by default as it couldn't fill packet type
+ info for l3fwd to work well. Now there is an option for l3fwd to analyze
+ the packet type so the vector driver is enabled by default.
* **i40e: Fixed link info of VF.**
- Previously, the VF's link speed kept as 10G and status always was up.
- It did not change even the physical link's status changed.
+ Previously, the VF's link speed stayed at 10GbE and status always was up.
+ It did not change even when the physical link's status changed.
Now this issue is fixed to make VF's link info consistent with physical link.
* **mlx5: Fixed possible crash during initialization.**
@@ -394,7 +362,7 @@ Drivers
* **mlx5: Added port type check.**
- Done to prevent port initialization on non-Ethernet link layers and
+ Added port type check to prevent port initialization on non-Ethernet link layers and
to report an error.
* **mlx5: Applied VLAN filtering to broadcast and IPv6 multicast flows.**
@@ -407,10 +375,10 @@ Drivers
* **aesni_mb: Fixed wrong return value when creating a device.**
- cryptodev_aesni_mb_init() was returning the device id of the device created,
- instead of 0 (when success), that rte_eal_vdev_init() expects.
- This made impossible the creation of more than one aesni_mb device
- from command line.
+ The ``cryptodev_aesni_mb_init()`` function was returning the device id of the device created,
+ instead of 0 (on success) that ``rte_eal_vdev_init()`` expects.
+ This made it impossible to create more than one aesni_mb device
+ from the command line.
* **qat: Fixed AES GCM decryption.**
@@ -424,7 +392,7 @@ Libraries
* **hash: Fixed CRC32c hash computation for non multiple of 4 bytes sizes.**
Fix crc32c hash functions to return a valid crc32c value for data lengths
- not multiple of 4 bytes.
+ not a multiple of 4 bytes.
* **hash: Fixed hash library to support multi-process mode.**
@@ -440,7 +408,7 @@ Libraries
``rte_errno`` to ``EEXIST`` when the object name already exists. This is
the behavior described in the API documentation in the header file.
The previous behavior was to return a pointer to the existing object in
- that case, preventing the caller to know if the object had to be freed
+ that case, preventing the caller from knowing if the object had to be freed
or not.
* **lpm: Fixed return value when allocating an existing object.**
@@ -449,7 +417,7 @@ Libraries
``rte_errno`` to ``EEXIST`` when the object name already exists. This is
the behavior described in the API documentation in the header file.
The previous behavior was to return a pointer to the existing object in
- that case, preventing the caller to know if the object had to be freed
+ that case, preventing the caller from knowing if the object had to be freed
or not.
* **librte_port: Fixed segmentation fault for ring and ethdev writer nodrop.**
@@ -468,39 +436,19 @@ Examples
* **l3fwd: Fixed using packet type blindly.**
- l3fwd makes use of packet type information without even query if devices or PMDs
- really set it. For those don't set ptypes, add an option to parse it softly.
+ l3fwd makes use of packet type information without querying if devices or PMDs
+ really set it. For those devices that don't set ptypes, add an option to parse it.
* **examples/vhost: Fixed frequent mbuf allocation failure.**
- vhost-switch often fails to allocate mbuf when dequeue from vring because it
+ The vhost-switch often fails to allocate mbuf when dequeue from vring because it
wrongly calculates the number of mbufs needed.
-Other
-~~~~~
-
-
-Known Issues
-------------
-
-This section should contain new known issues in this release. Sample format:
-
-* **Add title in present tense with full stop.**
-
- Add a short 1-2 sentence description of the known issue in the present
- tense. Add information on any known workarounds.
-
-
API Changes
-----------
-This section should contain API changes. Sample format:
-
-* Add a short 1-2 sentence description of the API change. Use fixed width
- quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
-
-* The ethdev statistics counter imissed is considered to be independent of ierrors.
+* The ethdev statistics counter ``imissed`` is considered to be independent of ``ierrors``.
All drivers are now counting the missed packets only once, i.e. drivers will
not increment ierrors anymore for missed packets.
@@ -524,13 +472,13 @@ This section should contain API changes. Sample format:
* A parameter ``vlan_type`` has been added to the function
``rte_eth_dev_set_vlan_ether_type``.
-* AF_packet device init function is no longer public. Device should be attached
- with API.
+* The af_packet device init function is no longer public. The device should be attached
+ via the API.
* The LPM ``next_hop`` field is extended from 8 bits to 24 bits for IPv4
while keeping ABI compatibility.
-* A new ``rte_lpm_config`` structure is used so LPM library will allocate
+* A new ``rte_lpm_config`` structure is used so the LPM library will allocate
exactly the amount of memory which is necessary to hold application’s rules.
The previous ABI is kept for compatibility.
@@ -542,10 +490,6 @@ This section should contain API changes. Sample format:
ABI Changes
-----------
-* Add a short 1-2 sentence description of the ABI change that was announced in
- the previous releases and made in this release. Use fixed width quotes for
- ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
-
* The RETA entry size in ``rte_eth_rss_reta_entry64`` has been increased
from 8-bit to 16-bit.
@@ -558,8 +502,6 @@ ABI Changes
Shared Library Versions
-----------------------
-Update any library version updated in this release and prepend with a ``+`` sign.
-
The libraries prepended with a plus sign were incremented in this version.
.. code-block:: diff
--
2.5.0
^ permalink raw reply [relevance 8%]
* Re: [dpdk-dev] [PATCH] doc: announce API changes for device objects
2016-04-07 15:33 5% [dpdk-dev] [PATCH] doc: announce API changes for device objects David Marchand
@ 2016-04-07 15:46 0% ` Jan Viktorin
2016-04-07 17:00 0% ` David Marchand
0 siblings, 1 reply; 200+ results
From: Jan Viktorin @ 2016-04-07 15:46 UTC (permalink / raw)
To: David Marchand; +Cc: dev, olivier.matz, thomas.monjalon
On Thu, 7 Apr 2016 17:33:17 +0200
David Marchand <david.marchand@6wind.com> wrote:
> Following discussions with Jan, here is a deprecation notice to prepare for
> hotplug and rte_device changes to come in 16.07.
>
> Signed-off-by: David Marchand <david.marchand@6wind.com>
> ---
> doc/guides/rel_notes/deprecation.rst | 12 ++++++++++++
> 1 file changed, 12 insertions(+)
>
> diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
> index 98d5529..d749e5d 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -8,6 +8,18 @@ API and ABI deprecation notices are to be posted here.
> Deprecation Notices
> -------------------
>
> +* The ethdev hotplug API is going to be moved to EAL with a notification
> + mechanism added to crypto and ethdev libraries so that hotplug is now
> + available to both of them. This API will be stripped of the device arguments
> + so that it only cares about hotplugging.
> +
> +* Structures embodying pci and vdev devices are going to be reworked to
> + integrate new common rte_device / rte_driver objects (see
> + http://dpdk.org/ml/archives/dev/2016-January/031390.html).
> + ethdev and crypto libraries will then only handle those objects so that they
> + do not need to care about the kind of devices that are being used, making it
> + easier to add new buses later.
As a result, the current rte_driver structure will be renamed to
rte_module and probably reworked in some way due to its semantics and
potential name clash with the new rte_driver struct.
Regards
Jan
> +
> * The EAL function pci_config_space_set is deprecated in release 16.04
> and will be removed from 16.07.
> Macros CONFIG_RTE_PCI_CONFIG, CONFIG_RTE_PCI_EXTENDED_TAG and
--
Jan Viktorin E-mail: Viktorin@RehiveTech.com
System Architect Web: www.RehiveTech.com
RehiveTech
Brno, Czech Republic
^ permalink raw reply [relevance 0%]
* [dpdk-dev] [PATCH] doc: announce API changes for device objects
@ 2016-04-07 15:33 5% David Marchand
2016-04-07 15:46 0% ` Jan Viktorin
0 siblings, 1 reply; 200+ results
From: David Marchand @ 2016-04-07 15:33 UTC (permalink / raw)
To: dev; +Cc: viktorin, olivier.matz, thomas.monjalon
Following discussions with Jan, here is a deprecation notice to prepare for
hotplug and rte_device changes to come in 16.07.
Signed-off-by: David Marchand <david.marchand@6wind.com>
---
doc/guides/rel_notes/deprecation.rst | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 98d5529..d749e5d 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -8,6 +8,18 @@ API and ABI deprecation notices are to be posted here.
Deprecation Notices
-------------------
+* The ethdev hotplug API is going to be moved to EAL with a notification
+ mechanism added to crypto and ethdev libraries so that hotplug is now
+ available to both of them. This API will be stripped of the device arguments
+ so that it only cares about hotplugging.
+
+* Structures embodying pci and vdev devices are going to be reworked to
+ integrate new common rte_device / rte_driver objects (see
+ http://dpdk.org/ml/archives/dev/2016-January/031390.html).
+ ethdev and crypto libraries will then only handle those objects so that they
+ do not need to care about the kind of devices that are being used, making it
+ easier to add new buses later.
+
* The EAL function pci_config_space_set is deprecated in release 16.04
and will be removed from 16.07.
Macros CONFIG_RTE_PCI_CONFIG, CONFIG_RTE_PCI_EXTENDED_TAG and
--
1.9.1
^ permalink raw reply [relevance 5%]
* [dpdk-dev] On DPDK ABI policy
2016-04-07 10:16 5% ` Marc Sune
@ 2016-04-07 11:51 9% ` Panu Matilainen
2016-04-07 21:52 4% ` Matthew Hall
2016-04-08 8:47 9% ` Marc Sune
2016-04-07 21:48 0% ` [dpdk-dev] DPDK namespace Matthew Hall
1 sibling, 2 replies; 200+ results
From: Panu Matilainen @ 2016-04-07 11:51 UTC (permalink / raw)
To: Marc Sune; +Cc: Thomas Monjalon, dev
[ change of subject since this is about ABI policy, not namespacing ]
On 04/07/2016 01:16 PM, Marc Sune wrote:
>
>
> 2016-04-07 11:33 GMT+02:00 Panu Matilainen <pmatilai@redhat.com
> <mailto:pmatilai@redhat.com>>:
>
> On 04/07/2016 12:18 PM, Thomas Monjalon wrote:
>
> Thank you everyone for the feedbacks.
>
> 2016-04-05 15:56, Thomas Monjalon:
>
> The goal of this email is to get some feedback on how
> important it is
> to fix the DPDK namespace.
>
>
> Everybody agree every symbols must be prefixed. Checking and
> fixing the
> namespace consistency will be in the roadmap.
>
> It seems most of you agree renaming would be a nice improvement
> but not
> so important.
> The main drawback is the induced backporting pain, even if we have
> some scripts to convert the patches to the old namespace.
> Note: the backports can be in DPDK itself or in the applications.
>
> If there is enough agreement that we should do something, I
> suggest to
> introduce the "dpdk_" prefix slowly and live with both
> "rte_" and "dpdk_"
> during some time.
> We could start using the new prefix for the new APIs
> (example: crypto)
> or when there is a significant API break (example: mempool).
>
>
> The slow change has been clearly rejected in favor of a complete
> change
> in one patch.
> The timing was also discussed as it could impact the pending
> patches.
> So it would be done at the end or the beginning of a release.
> Marc suggests to do it for 16.04 as the numbering scheme has
> changed.
>
>
> Just noting that it cannot be done in 16.04 because the ABI policy
> requires a deprecation cycle of at least one major release for every
> breakage. And we're discussing a total 100% breakage of everything
> here, even if its just a simple rename.
>
>
> I keep not understanding the ABI policy, and particularly why ABI
> changes have to be announced once cycle before _if_ there is already at
> least one ABI change proposed. DPDK applications will have to recompile
> anyway.
The point is to allow API/ABI consumers to assess in advance what sort
of pains can they expect when moving their applications from one version
to another. Otherwise all sorts of massive changes could ride the wave
of whatever "change 16bit struct member to 32bit" trivialities that are
nevertheless ABI breaks.
There have already been quite a few exceptions to the rule when the ABI
is already being broken, so its not entirely rigid. Another point that
migth warrant some tweaking to the policy is the "core" libraries
depending on each other so an ABI break in any one of them forces
recompile of everything anyway.
> This aspect of the policy only slows down DPDK development and it
One could also think that slowing down development and forcing people to
think ahead are not entirely unintentional or unwanted side-effects :)
Look at the latest librte_vhost initiative to remove unnecessarily
exposed structs to avoid having to deal with ABI breakages all the time:
the policy is effectively encouraging people into better library design.
> pollutes the repository with commits announcing ABI changes that are
> irrelevant after 2 cycles, as (code) diffs show that already (not
> mentioning NEXT_ABI complexity and extra LOCs).
Fully agreed on NEXT_ABI, I never liked it at all.
> Maintaining LTS releases, and enforcing bug fixing in old LTS first,
> upstreaming bugfixes is to me a much better approach to solve backwards
> compatibility issues.
LTS releases could help the situation somewhat, but then again people
tend to still want those new fancy things backported (you know, have the
cake and eat it too) but that can't be done because of ABI breakage, so
they're forced to run the latest version anyway.
> But this is probably another discussion.
Yup, changed subject to avoid mixing it up with the namespace discussion
too much.
- Panu -
^ permalink raw reply [relevance 9%]
* Re: [dpdk-dev] DPDK namespace
2016-04-07 9:33 3% ` Panu Matilainen
@ 2016-04-07 10:16 5% ` Marc Sune
2016-04-07 11:51 9% ` [dpdk-dev] On DPDK ABI policy Panu Matilainen
2016-04-07 21:48 0% ` [dpdk-dev] DPDK namespace Matthew Hall
0 siblings, 2 replies; 200+ results
From: Marc Sune @ 2016-04-07 10:16 UTC (permalink / raw)
To: Panu Matilainen; +Cc: Thomas Monjalon, dev, techboard
2016-04-07 11:33 GMT+02:00 Panu Matilainen <pmatilai@redhat.com>:
> On 04/07/2016 12:18 PM, Thomas Monjalon wrote:
>
>> Thank you everyone for the feedbacks.
>>
>> 2016-04-05 15:56, Thomas Monjalon:
>>
>>> The goal of this email is to get some feedback on how important it is
>>> to fix the DPDK namespace.
>>>
>>
>> Everybody agree every symbols must be prefixed. Checking and fixing the
>> namespace consistency will be in the roadmap.
>>
>> It seems most of you agree renaming would be a nice improvement but not
>> so important.
>> The main drawback is the induced backporting pain, even if we have
>> some scripts to convert the patches to the old namespace.
>> Note: the backports can be in DPDK itself or in the applications.
>>
>> If there is enough agreement that we should do something, I suggest to
>>> introduce the "dpdk_" prefix slowly and live with both "rte_" and "dpdk_"
>>> during some time.
>>> We could start using the new prefix for the new APIs (example: crypto)
>>> or when there is a significant API break (example: mempool).
>>>
>>
>> The slow change has been clearly rejected in favor of a complete change
>> in one patch.
>> The timing was also discussed as it could impact the pending patches.
>> So it would be done at the end or the beginning of a release.
>> Marc suggests to do it for 16.04 as the numbering scheme has changed.
>>
>
> Just noting that it cannot be done in 16.04 because the ABI policy
> requires a deprecation cycle of at least one major release for every
> breakage. And we're discussing a total 100% breakage of everything here,
> even if its just a simple rename.
I keep not understanding the ABI policy, and particularly why ABI changes
have to be announced once cycle before _if_ there is already at least one
ABI change proposed. DPDK applications will have to recompile anyway.
This aspect of the policy only slows down DPDK development and it pollutes
the repository with commits announcing ABI changes that are irrelevant
after 2 cycles, as (code) diffs show that already (not mentioning NEXT_ABI
complexity and extra LOCs).
Maintaining LTS releases, and enforcing bug fixing in old LTS first,
upstreaming bugfixes is to me a much better approach to solve backwards
compatibility issues.
But this is probably another discussion.
Marc
>
> - Panu -
>
>
> There is no strong conclusion at this point because we need to decide
>> wether the renaming deserves to be done or never.
>> I suggest to take the inputs from the technical board.
>>
>> Do not hesitate to comment. Thanks
>>
>>
>
^ permalink raw reply [relevance 5%]
* Re: [dpdk-dev] DPDK namespace
@ 2016-04-07 9:33 3% ` Panu Matilainen
2016-04-07 10:16 5% ` Marc Sune
0 siblings, 1 reply; 200+ results
From: Panu Matilainen @ 2016-04-07 9:33 UTC (permalink / raw)
To: Thomas Monjalon, dev; +Cc: techboard
On 04/07/2016 12:18 PM, Thomas Monjalon wrote:
> Thank you everyone for the feedbacks.
>
> 2016-04-05 15:56, Thomas Monjalon:
>> The goal of this email is to get some feedback on how important it is
>> to fix the DPDK namespace.
>
> Everybody agree every symbols must be prefixed. Checking and fixing the
> namespace consistency will be in the roadmap.
>
> It seems most of you agree renaming would be a nice improvement but not
> so important.
> The main drawback is the induced backporting pain, even if we have
> some scripts to convert the patches to the old namespace.
> Note: the backports can be in DPDK itself or in the applications.
>
>> If there is enough agreement that we should do something, I suggest to
>> introduce the "dpdk_" prefix slowly and live with both "rte_" and "dpdk_"
>> during some time.
>> We could start using the new prefix for the new APIs (example: crypto)
>> or when there is a significant API break (example: mempool).
>
> The slow change has been clearly rejected in favor of a complete change
> in one patch.
> The timing was also discussed as it could impact the pending patches.
> So it would be done at the end or the beginning of a release.
> Marc suggests to do it for 16.04 as the numbering scheme has changed.
Just noting that it cannot be done in 16.04 because the ABI policy
requires a deprecation cycle of at least one major release for every
breakage. And we're discussing a total 100% breakage of everything here,
even if its just a simple rename.
- Panu -
> There is no strong conclusion at this point because we need to decide
> wether the renaming deserves to be done or never.
> I suggest to take the inputs from the technical board.
>
> Do not hesitate to comment. Thanks
>
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] DPDK namespace
@ 2016-04-07 8:22 3% ` Marc
0 siblings, 0 replies; 200+ results
From: Marc @ 2016-04-07 8:22 UTC (permalink / raw)
To: Dave Neary
Cc: Panu Matilainen, Yuanhan Liu, Arnon Warshavsky, Trahe, Fiona,
Thomas Monjalon, dev
On 6 April 2016 at 22:21, Dave Neary <dneary@redhat.com> wrote:
> Hi,
>
> On 04/06/2016 08:07 AM, Panu Matilainen wrote:
> >> +1: it's a bit weird to keep both, especially for a long while, that
> >> every time we turn a rte_ prefix to dpdk_ prefix, we break applications.
> >> Instead of breaking applications many times, I'd prefer to break once.
> >> Therefore, applications could do a simple global rte_ -> dpdk_
> >> substitute:
> >> it doesn't sound that painful then.
>
+1
Either all types and symbols use dpdk_ or rte_. It probably makes more
sense dpdk_, but to me it is not that important.
If it has to be changed, it might be a good idea to do it in this release,
now that version numbering format also changes.
>
> > I concur. If (and I think that should be a pretty big IF) the prefix is
> > to be changed then its better done in one fast sweep than gradually.
> >
> > Gratuitious (or nearly so) change is always extremely annoying, and the
> > longer it takes the more painful it is. Application developers wont much
> > care what the prefix is as long as its consistent, but if they're forced
> > to track prefix changes across several releases with different libraries
> > moving at different pace, they WILL be calling for bloody murder :)
>
> How about the idea of creating (at switch over time) an optionally
> installable dpdk_compat package that just has a list of #defines for the
> old symbols pointing them at the new symbols? That would also allow
> people with old applications to update DPDK without having to modify
> their applications.
>
You would also have to add all typedefs for type names.
Why bothering? Moving from 2.2 to 16.04 requires recompiling your
application (ABI changes), and is as simple as sed -e 's/rte_/dpdk_/g' in
all the application code base.
Marc
>
> Thanks,
> Dave.
>
> --
> Dave Neary - NFV/SDN Community Strategy
> Open Source and Standards, Red Hat - http://community.redhat.com
> Ph: +1-978-399-2182 / Cell: +1-978-799-3338
>
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH] vhost: ABI/API change announcement due to refactor
2016-04-06 6:53 15% [dpdk-dev] [PATCH] vhost: ABI/API change announcement due to refactor Yuanhan Liu
@ 2016-04-07 7:12 7% ` Panu Matilainen
2016-04-10 9:58 4% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Panu Matilainen @ 2016-04-07 7:12 UTC (permalink / raw)
To: Yuanhan Liu, dev; +Cc: huawei.xie, Thomas Monjalon, Ilya Maximets
On 04/06/2016 09:53 AM, Yuanhan Liu wrote:
> We currently exposed way too many fields (or even structures) than
> necessary. For example, vhost_virtqueue struct should NOT be exposed
> to user at all: application just need to tell the right queue id to
> locate a specific queue, and that's all. Instead, the structure should
> be defined in an internal header file. With that, we could do any changes
> to it we want, without worrying about that we may offense the painful
> ABI rules.
>
> Similar changes could be done to virtio_net struct as well, just exposing
> very few fields that are necessary and moving all others to an internal
> structure.
>
> Huawei then suggested a more radical yet much cleaner one: just exposing
> a virtio_net handle to application, just like the way kernel exposes an
> fd to user for locating a specific file, and exposing some new functions
> to access those old fields, such as flags, virt_qp_nb.
>
> With this change, we're likely to be free from ABI violations forever
> (well, except when we have to extend the virtio_net_device_ops struct).
> For example, following nice cleanup would not be a blocking one then:
>
> http://dpdk.org/ml/archives/dev/2016-February/033528.html
>
> Suggested-by: Huawei Xie <huawei.xie@intel.com>
> Cc: Ilya Maximets <i.maximets@samsung.com>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
> doc/guides/rel_notes/deprecation.rst | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
> index ad31355..7d16d86 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -40,3 +40,10 @@ Deprecation Notices
> The existing API will be backward compatible, but there will be new API
> functions added to facilitate the creation of mempools using an external
> handler. The 16.07 release will contain these changes.
> +
> +* A librte_vhost public structures refactor is planned for DPDK 16.07
> + that requires both ABI and API change.
> + The proposed refactor would expose DPDK vhost dev to applications as
> + a handle, like the way kernel exposes an fd to user for locating a
> + specific file, and to keep all major structures internally, so that
> + we are likely to be free from ABI violations in future.
>
Acked-by: Panu Matilainen <pmatilai@redhat.com>
I applaud the initiative, public structs are by far the worst offender
when trying to maintain a stable ABI because they're so hard to
correctly version that hardly anybody besides glibc bothers.
- Panu -
^ permalink raw reply [relevance 7%]
* Re: [dpdk-dev] DPDK namespace
2016-04-06 12:07 0% ` Panu Matilainen
2016-04-06 12:34 0% ` Ananyev, Konstantin
@ 2016-04-06 14:36 0% ` Wiles, Keith
2 siblings, 0 replies; 200+ results
From: Wiles, Keith @ 2016-04-06 14:36 UTC (permalink / raw)
To: Panu Matilainen, Yuanhan Liu, Arnon Warshavsky
Cc: Trahe, Fiona, Thomas Monjalon, dev
>On 04/06/2016 08:26 AM, Yuanhan Liu wrote:
>> On Tue, Apr 05, 2016 at 05:31:22PM +0300, Arnon Warshavsky wrote:
>>> On Tue, Apr 5, 2016 at 5:13 PM, Trahe, Fiona <fiona.trahe@intel.com> wrote:
>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
>>>>> Sent: Tuesday, April 05, 2016 2:57 PM
>>>>> To: dev@dpdk.org
>>>>> Subject: [dpdk-dev] DPDK namespace
>>>>>
>>>>> DPDK is going to be more popular in Linux distributions.
>>>>> It means people will have some DPDK files in their /usr/include and some
>>>> DPDK
>>>>> libraries on their system.
>>>>>
>>>>> Let's imagine someone trying to compile an application which needs
>>>>> rte_ethdev.h. He has to figure out that this "rte header" is provided by
>>>> the DPDK.
>>>>> Hopefully it will be explained on StackOverflow that RTE stands for DPDK.
>>>>> Then someone else will try to run a binary without having installed the
>>>> DPDK
>>>>> libraries. The linker will require libethdev.so (no prefix here).
>>>>> StackOverflow will probably have another good answer (among wrong ones):
>>>>> "Hey Sherlock Holmes, have you tried to install the DPDK library?"
>>>>> Followed by an insight: "You know, the DPDK naming is weird..."
>>>>> And we could continue the story with developers having some naming clash
>>>>> because of some identifiers not prefixed at all.
>>>>>
>>>>> The goal of this email is to get some feedback on how important it is to
>>>> fix the
>>>>> DPDK namespace.
>>>>>
>>>>> If there is enough agreement that we should do something, I suggest to
>>>>> introduce the "dpdk_" prefix slowly and live with both "rte_" and "dpdk_"
>>>>> during some time.
>>>>> We could start using the new prefix for the new APIs (example: crypto)
>>>> or when
>>>>> there is a significant API break (example: mempool).
>>>>>
>>>>> Opinions welcome!
>>>> I don't have an opinion on how important it is to fix the namespace,
>>>> though it does seem like a good idea.
>>>> However if it's to be done, in my opinion it should be completed quickly
>>>> or will just cause more confusion.
>>>> So if rte_cryptoxxx becomes dpdk_cryptoxxx all other libraries should
>>>> follow in next release or two, with
>>>> the resulting ABI compatibility handling. Maybe with dual naming handled
>>>> for several releases, but a
>>>> clear end date when all are converted.
>>>> Else there will be many years with a mix of rte_ and dpdk_
>>>>
>>>>
>>>
>>> Googling rte functions or error codes usually takes you to dpdk dev email
>>> archive so I don't think it is that much difficult to figure out where rte
>>> comes from.
>>> Other than that , except for my own refactoring pains when replacing a dpdk
>>> version, I do not see a major reason why not.
>>> If Going for dpdk_ prefix, I agree with the quick death approach.
>>
>> +1: it's a bit weird to keep both, especially for a long while, that
>> every time we turn a rte_ prefix to dpdk_ prefix, we break applications.
>> Instead of breaking applications many times, I'd prefer to break once.
>> Therefore, applications could do a simple global rte_ -> dpdk_ substitute:
>> it doesn't sound that painful then.
>
>I concur. If (and I think that should be a pretty big IF) the prefix is
>to be changed then its better done in one fast sweep than gradually.
>
>Gratuitious (or nearly so) change is always extremely annoying, and the
>longer it takes the more painful it is. Application developers wont much
>care what the prefix is as long as its consistent, but if they're forced
>to track prefix changes across several releases with different libraries
>moving at different pace, they WILL be calling for bloody murder :)
>
>As for rte_ being strange for DPDK - yes it is, but it takes like 5
>minutes to get over it. It would help to have it explained on dpdk.org
>FAQ: "Due to historical reasons, DPDK libraries are prefixed rte_
>instead of dpdk_ because <insert excuse here, probably early project
>name> and changing it is unnecessarily painful."
As I understand RTE is from the “Run Time Environment” which was the primary set of API’s at the time and it just kept getting propagated :-)
>
>>
>> And here are few more comments:
>>
>> - we should add rte_/dpdk_ prefix to all public structures as well.
>>
>> I'm thinking we are doing well here. I'm just aware that vhost lib
>> does a bad job, which is something I proposed to fix in next release.
>
>Yup, all public symbols should be prefixed. What the exact prefix is
>isn't that important really.
>
>>
>> - If we do the whole change once, I'd suggest to do it ASAP when this
>> release is over.
>>
>> It should be a HUGE change that touches a lot of code, if we do it
>> later, at a stage that a lot of patches for new features have been
>> made or sent out, all of them need rebase. That'd be painful.
>
>Nod, that's yet another aspect to consider.
>
>So to summarize, I'm not strongly opposed to doing a one-time mass rte_
>-> dpdk_ prefix change, but it needs to be one big sweep all at once, or
>not do it at all. Gradual change is a suicide.
>
>Keeping rte_ is not the end of the world by any means, especially when
>applied consistently and explained someplace.
To me rte_ is just fine, plus we have to change the structures names and defines names. I am sure we can figure out a script to convert any app for the developer, but why change. The rte_ prefix is something which can be explained and dpdk_ adds one character to type :-)
>
> - Panu -
>
Regards,
Keith
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
@ 2016-04-06 14:00 0% ` David Harton (dharton)
1 sibling, 0 replies; 200+ results
From: David Harton (dharton) @ 2016-04-06 14:00 UTC (permalink / raw)
To: Harry van Haaren, dev; +Cc: maryam.tahhan
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Harry van Haaren
> Sent: Tuesday, April 05, 2016 1:58 PM
> To: dev@dpdk.org
> Cc: maryam.tahhan@intel.com; Harry van Haaren <harry.van.haaren@intel.com>
> Subject: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
>
> This patch adds a notice that the API for the xstats functionality will be
> modified in the 16.07 release, with no backwards compatibility planned as
> it would require code duplication in each PMD that supports xstats.
>
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> ---
> doc/guides/rel_notes/deprecation.rst | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/doc/guides/rel_notes/deprecation.rst
> b/doc/guides/rel_notes/deprecation.rst
> index 98d5529..13c3a95 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -54,3 +54,8 @@ Deprecation Notices
> induce a modification of the rte_mempool structure, plus a
> modification of the API of rte_mempool_obj_iter(), implying a breakage
> of the ABI.
> +
> +* ABI change is planned for the xstats API and rte_eth_xstats struct,
> +to
> + facilitate updating to an API that allows retrieval of values without
> +any
> + string copies or parsing. No backwards compatibility is planned, as
> +it would
> + require code duplication in every PMD that supports xstats.
> --
> 2.5.0
Acked-by: David Harton <dharton@cisco.com>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
2016-04-06 12:14 0% ` Thomas Monjalon
@ 2016-04-06 13:49 0% ` David Harton (dharton)
0 siblings, 0 replies; 200+ results
From: David Harton (dharton) @ 2016-04-06 13:49 UTC (permalink / raw)
To: Thomas Monjalon, Van Haaren, Harry; +Cc: dev, Tahhan, Maryam, olivier.matz
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Wednesday, April 06, 2016 8:14 AM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> Cc: David Harton (dharton) <dharton@cisco.com>; dev@dpdk.org; Tahhan,
> Maryam <maryam.tahhan@intel.com>; olivier.matz@6wind.com
> Subject: Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
>
> 2016-04-06 11:16, Van Haaren, Harry:
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > > The issue we are going to fix is that currently PMDs copy strings
> > > > when retrieving
> > > statistics, which causes unnecessary overhead. The implementation is
> > > not decided yet, but using an int->value mapping seems logical.
> >
> > > I am not sure performance is so much critical when retrieving
> statistics.
> >
> > In the previous discussion David was concerned about performance
> > impact of string copies, are those concerns still present David?
> >
> > > The extended stats can be infinitely extended. So a string
> > > identifier seems a lot more natural.
> >
> > I'm not suggesting that the string identifier is removed totally.
> >
> > > I do not agree to add a new numeric identifier in the API each time
> > > a driver wants to report a specific statistic for debugging purpose.
> >
> > And I agree - the ints are just an index to xstats arrays, no eth-dev
> wide enums here.
Yes, I abandoned the idea of a set of stats ids. I can see where registration will be problematic and cumbersome to driver developers.
> > The proposal is to make the API more flexible, see example:
> > http://thread.gmane.org/gmane.comp.networking.dpdk.devel/31728/focus=3
> > 2795
> >
> > This more flexible API would allow other types of information about
> > statistics be retrieved too.
I have prototyped this. If there is interest/acceptance I can work on making an official patch to share back to the community.
Using this method still gives the flexibility the current API desires while giving the user the control to only obtain the counters. This of course assumes that the counters per device are static but that seems a safe bet.
>
> OK I think I start to understand.
>
> > For now, the sent patch announces that the API/ABI may change, and we
> > can discuss details of API as development starts.
>
> This should not be the normal process.
> It is important to understand what should be the changes to decide of
> announcing or not a deprecation.
> In the case of the mempool reworks, the patch have been sent and discussed
> on the mailing list.
> Given the previous explanations (and knowing you did good job on stats), I
> give my
> Acked-by: Thomas Monjalon <thomas.monjalon@6wind.com>
Thanks for considering this.
Regards,
Dave
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] DPDK namespace
2016-04-06 12:07 0% ` Panu Matilainen
@ 2016-04-06 12:34 0% ` Ananyev, Konstantin
2016-04-06 14:36 0% ` Wiles, Keith
2 siblings, 0 replies; 200+ results
From: Ananyev, Konstantin @ 2016-04-06 12:34 UTC (permalink / raw)
To: Panu Matilainen, Yuanhan Liu, Arnon Warshavsky
Cc: Trahe, Fiona, Thomas Monjalon, dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Panu Matilainen
> Sent: Wednesday, April 06, 2016 1:08 PM
> To: Yuanhan Liu; Arnon Warshavsky
> Cc: Trahe, Fiona; Thomas Monjalon; dev@dpdk.org
> Subject: Re: [dpdk-dev] DPDK namespace
>
> On 04/06/2016 08:26 AM, Yuanhan Liu wrote:
> > On Tue, Apr 05, 2016 at 05:31:22PM +0300, Arnon Warshavsky wrote:
> >> On Tue, Apr 5, 2016 at 5:13 PM, Trahe, Fiona <fiona.trahe@intel.com> wrote:
> >>
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> >>>> Sent: Tuesday, April 05, 2016 2:57 PM
> >>>> To: dev@dpdk.org
> >>>> Subject: [dpdk-dev] DPDK namespace
> >>>>
> >>>> DPDK is going to be more popular in Linux distributions.
> >>>> It means people will have some DPDK files in their /usr/include and some
> >>> DPDK
> >>>> libraries on their system.
> >>>>
> >>>> Let's imagine someone trying to compile an application which needs
> >>>> rte_ethdev.h. He has to figure out that this "rte header" is provided by
> >>> the DPDK.
> >>>> Hopefully it will be explained on StackOverflow that RTE stands for DPDK.
> >>>> Then someone else will try to run a binary without having installed the
> >>> DPDK
> >>>> libraries. The linker will require libethdev.so (no prefix here).
> >>>> StackOverflow will probably have another good answer (among wrong ones):
> >>>> "Hey Sherlock Holmes, have you tried to install the DPDK library?"
> >>>> Followed by an insight: "You know, the DPDK naming is weird..."
> >>>> And we could continue the story with developers having some naming clash
> >>>> because of some identifiers not prefixed at all.
> >>>>
> >>>> The goal of this email is to get some feedback on how important it is to
> >>> fix the
> >>>> DPDK namespace.
> >>>>
> >>>> If there is enough agreement that we should do something, I suggest to
> >>>> introduce the "dpdk_" prefix slowly and live with both "rte_" and "dpdk_"
> >>>> during some time.
> >>>> We could start using the new prefix for the new APIs (example: crypto)
> >>> or when
> >>>> there is a significant API break (example: mempool).
> >>>>
> >>>> Opinions welcome!
> >>> I don't have an opinion on how important it is to fix the namespace,
> >>> though it does seem like a good idea.
> >>> However if it's to be done, in my opinion it should be completed quickly
> >>> or will just cause more confusion.
> >>> So if rte_cryptoxxx becomes dpdk_cryptoxxx all other libraries should
> >>> follow in next release or two, with
> >>> the resulting ABI compatibility handling. Maybe with dual naming handled
> >>> for several releases, but a
> >>> clear end date when all are converted.
> >>> Else there will be many years with a mix of rte_ and dpdk_
> >>>
> >>>
> >>
> >> Googling rte functions or error codes usually takes you to dpdk dev email
> >> archive so I don't think it is that much difficult to figure out where rte
> >> comes from.
> >> Other than that , except for my own refactoring pains when replacing a dpdk
> >> version, I do not see a major reason why not.
> >> If Going for dpdk_ prefix, I agree with the quick death approach.
> >
> > +1: it's a bit weird to keep both, especially for a long while, that
> > every time we turn a rte_ prefix to dpdk_ prefix, we break applications.
> > Instead of breaking applications many times, I'd prefer to break once.
> > Therefore, applications could do a simple global rte_ -> dpdk_ substitute:
> > it doesn't sound that painful then.
>
> I concur. If (and I think that should be a pretty big IF) the prefix is
> to be changed then its better done in one fast sweep than gradually.
>
> Gratuitious (or nearly so) change is always extremely annoying, and the
> longer it takes the more painful it is. Application developers wont much
> care what the prefix is as long as its consistent, but if they're forced
> to track prefix changes across several releases with different libraries
> moving at different pace, they WILL be calling for bloody murder :)
>
> As for rte_ being strange for DPDK - yes it is, but it takes like 5
> minutes to get over it. It would help to have it explained on dpdk.org
> FAQ: "Due to historical reasons, DPDK libraries are prefixed rte_
> instead of dpdk_ because <insert excuse here, probably early project
> name> and changing it is unnecessarily painful."
>
> >
> > And here are few more comments:
> >
> > - we should add rte_/dpdk_ prefix to all public structures as well.
> >
> > I'm thinking we are doing well here. I'm just aware that vhost lib
> > does a bad job, which is something I proposed to fix in next release.
>
> Yup, all public symbols should be prefixed. What the exact prefix is
> isn't that important really.
>
> >
> > - If we do the whole change once, I'd suggest to do it ASAP when this
> > release is over.
> >
> > It should be a HUGE change that touches a lot of code, if we do it
> > later, at a stage that a lot of patches for new features have been
> > made or sent out, all of them need rebase. That'd be painful.
>
> Nod, that's yet another aspect to consider.
>
> So to summarize, I'm not strongly opposed to doing a one-time mass rte_
> -> dpdk_ prefix change, but it needs to be one big sweep all at once, or
> not do it at all. Gradual change is a suicide.
>
> Keeping rte_ is not the end of the world by any means, especially when
> applied consistently and explained someplace.
>
Yep, I have exactly the same thoughts:
1. Yes, dpdk_' prefix is a better naming approach than 'rte_',
but for me not that better to overweight all the pain of such big change.
2. If we still decide to do that change - my preference would be to do it in one go.
I personally don't care that much what the prefix would be, as long as it is consistent
across the whole codebase.
Konstantin
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
2016-04-06 11:16 3% ` Van Haaren, Harry
@ 2016-04-06 12:14 0% ` Thomas Monjalon
2016-04-06 13:49 0% ` David Harton (dharton)
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-04-06 12:14 UTC (permalink / raw)
To: Van Haaren, Harry
Cc: 'David Harton (dharton)', dev, Tahhan, Maryam, olivier.matz
2016-04-06 11:16, Van Haaren, Harry:
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > The issue we are going to fix is that currently PMDs copy strings when retrieving
> > statistics, which causes unnecessary overhead. The implementation is not decided yet, but
> > using an int->value mapping seems logical.
>
> > I am not sure performance is so much critical when retrieving statistics.
>
> In the previous discussion David was concerned about performance impact
> of string copies, are those concerns still present David?
>
> > The extended stats can be infinitely extended. So a string identifier seems
> > a lot more natural.
>
> I'm not suggesting that the string identifier is removed totally.
>
> > I do not agree to add a new numeric identifier in the API each time a driver
> > wants to report a specific statistic for debugging purpose.
>
> And I agree - the ints are just an index to xstats arrays, no eth-dev wide enums here.
> The proposal is to make the API more flexible, see example:
> http://thread.gmane.org/gmane.comp.networking.dpdk.devel/31728/focus=32795
>
> This more flexible API would allow other types of information about
> statistics be retrieved too.
OK I think I start to understand.
> For now, the sent patch announces that the API/ABI may change, and we can
> discuss details of API as development starts.
This should not be the normal process.
It is important to understand what should be the changes to decide of
announcing or not a deprecation.
In the case of the mempool reworks, the patch have been sent and discussed
on the mailing list.
Given the previous explanations (and knowing you did good job on stats),
I give my
Acked-by: Thomas Monjalon <thomas.monjalon@6wind.com>
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] DPDK namespace
@ 2016-04-06 12:07 0% ` Panu Matilainen
2016-04-06 12:34 0% ` Ananyev, Konstantin
` (2 more replies)
0 siblings, 3 replies; 200+ results
From: Panu Matilainen @ 2016-04-06 12:07 UTC (permalink / raw)
To: Yuanhan Liu, Arnon Warshavsky; +Cc: Trahe, Fiona, Thomas Monjalon, dev
On 04/06/2016 08:26 AM, Yuanhan Liu wrote:
> On Tue, Apr 05, 2016 at 05:31:22PM +0300, Arnon Warshavsky wrote:
>> On Tue, Apr 5, 2016 at 5:13 PM, Trahe, Fiona <fiona.trahe@intel.com> wrote:
>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
>>>> Sent: Tuesday, April 05, 2016 2:57 PM
>>>> To: dev@dpdk.org
>>>> Subject: [dpdk-dev] DPDK namespace
>>>>
>>>> DPDK is going to be more popular in Linux distributions.
>>>> It means people will have some DPDK files in their /usr/include and some
>>> DPDK
>>>> libraries on their system.
>>>>
>>>> Let's imagine someone trying to compile an application which needs
>>>> rte_ethdev.h. He has to figure out that this "rte header" is provided by
>>> the DPDK.
>>>> Hopefully it will be explained on StackOverflow that RTE stands for DPDK.
>>>> Then someone else will try to run a binary without having installed the
>>> DPDK
>>>> libraries. The linker will require libethdev.so (no prefix here).
>>>> StackOverflow will probably have another good answer (among wrong ones):
>>>> "Hey Sherlock Holmes, have you tried to install the DPDK library?"
>>>> Followed by an insight: "You know, the DPDK naming is weird..."
>>>> And we could continue the story with developers having some naming clash
>>>> because of some identifiers not prefixed at all.
>>>>
>>>> The goal of this email is to get some feedback on how important it is to
>>> fix the
>>>> DPDK namespace.
>>>>
>>>> If there is enough agreement that we should do something, I suggest to
>>>> introduce the "dpdk_" prefix slowly and live with both "rte_" and "dpdk_"
>>>> during some time.
>>>> We could start using the new prefix for the new APIs (example: crypto)
>>> or when
>>>> there is a significant API break (example: mempool).
>>>>
>>>> Opinions welcome!
>>> I don't have an opinion on how important it is to fix the namespace,
>>> though it does seem like a good idea.
>>> However if it's to be done, in my opinion it should be completed quickly
>>> or will just cause more confusion.
>>> So if rte_cryptoxxx becomes dpdk_cryptoxxx all other libraries should
>>> follow in next release or two, with
>>> the resulting ABI compatibility handling. Maybe with dual naming handled
>>> for several releases, but a
>>> clear end date when all are converted.
>>> Else there will be many years with a mix of rte_ and dpdk_
>>>
>>>
>>
>> Googling rte functions or error codes usually takes you to dpdk dev email
>> archive so I don't think it is that much difficult to figure out where rte
>> comes from.
>> Other than that , except for my own refactoring pains when replacing a dpdk
>> version, I do not see a major reason why not.
>> If Going for dpdk_ prefix, I agree with the quick death approach.
>
> +1: it's a bit weird to keep both, especially for a long while, that
> every time we turn a rte_ prefix to dpdk_ prefix, we break applications.
> Instead of breaking applications many times, I'd prefer to break once.
> Therefore, applications could do a simple global rte_ -> dpdk_ substitute:
> it doesn't sound that painful then.
I concur. If (and I think that should be a pretty big IF) the prefix is
to be changed then its better done in one fast sweep than gradually.
Gratuitious (or nearly so) change is always extremely annoying, and the
longer it takes the more painful it is. Application developers wont much
care what the prefix is as long as its consistent, but if they're forced
to track prefix changes across several releases with different libraries
moving at different pace, they WILL be calling for bloody murder :)
As for rte_ being strange for DPDK - yes it is, but it takes like 5
minutes to get over it. It would help to have it explained on dpdk.org
FAQ: "Due to historical reasons, DPDK libraries are prefixed rte_
instead of dpdk_ because <insert excuse here, probably early project
name> and changing it is unnecessarily painful."
>
> And here are few more comments:
>
> - we should add rte_/dpdk_ prefix to all public structures as well.
>
> I'm thinking we are doing well here. I'm just aware that vhost lib
> does a bad job, which is something I proposed to fix in next release.
Yup, all public symbols should be prefixed. What the exact prefix is
isn't that important really.
>
> - If we do the whole change once, I'd suggest to do it ASAP when this
> release is over.
>
> It should be a HUGE change that touches a lot of code, if we do it
> later, at a stage that a lot of patches for new features have been
> made or sent out, all of them need rebase. That'd be painful.
Nod, that's yet another aspect to consider.
So to summarize, I'm not strongly opposed to doing a one-time mass rte_
-> dpdk_ prefix change, but it needs to be one big sweep all at once, or
not do it at all. Gradual change is a suicide.
Keeping rte_ is not the end of the world by any means, especially when
applied consistently and explained someplace.
- Panu -
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
2016-04-06 9:22 0% ` Thomas Monjalon
@ 2016-04-06 11:16 3% ` Van Haaren, Harry
2016-04-06 12:14 0% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Van Haaren, Harry @ 2016-04-06 11:16 UTC (permalink / raw)
To: Thomas Monjalon
Cc: 'David Harton (dharton)', dev, Tahhan, Maryam, olivier.matz
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Subject: Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
> > The issue we are going to fix is that currently PMDs copy strings when retrieving
> statistics, which causes unnecessary overhead. The implementation is not decided yet, but
> using an int->value mapping seems logical.
> I am not sure performance is so much critical when retrieving statistics.
In the previous discussion David was concerned about performance impact
of string copies, are those concerns still present David?
> The extended stats can be infinitely extended. So a string identifier seems
> a lot more natural.
I'm not suggesting that the string identifier is removed totally.
> I do not agree to add a new numeric identifier in the API each time a driver
> wants to report a specific statistic for debugging purpose.
And I agree - the ints are just an index to xstats arrays, no eth-dev wide enums here.
The proposal is to make the API more flexible, see example:
http://thread.gmane.org/gmane.comp.networking.dpdk.devel/31728/focus=32795
This more flexible API would allow other types of information about
statistics be retrieved too.
For now, the sent patch announces that the API/ABI may change, and we can
discuss details of API as development starts.
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] Fw: dpdk-armv7-testing - Build # 43 - Failure!
@ 2016-04-06 10:18 4% ` Jan Viktorin
0 siblings, 0 replies; 200+ results
From: Jan Viktorin @ 2016-04-06 10:18 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dev, Adrien Mazarguil
On Wed, 06 Apr 2016 12:05:31 +0200
Thomas Monjalon <thomas.monjalon@6wind.com> wrote:
> Hi Jan,
>
> 2016-04-06 11:53, Jan Viktorin:
> > Please, see the attached log file.
>
> The text attachments are filtered out on the mailing list because
> it is not convenient or impossible to read in the archives.
> Please use inline text, next time. Thanks
I am sorry, didn't notice that. Nobody has pointed to this before.
dpdk-armv7-testing - Build # 44 - Still Failing
See the following log:
[...]
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_parse.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_parse.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_parse_num.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_parse_num.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_parse_ipaddr.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_parse_ipaddr.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_parse_etheraddr.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_parse_etheraddr.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_parse_string.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_parse_string.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_rdline.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_rdline.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_vt100.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_vt100.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_socket.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_socket.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_cirbuf.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_cirbuf.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
SYMLINK-FILE include/cmdline_parse_portlist.h
ln -nsf `/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/scripts/relpath.sh /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_cmdline/cmdline_parse_portlist.h /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include` /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include
INSTALL-LIB librte_cmdline.a
cp -f librte_cmdline.a /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/lib
== Build lib/librte_ether
/opt/gcc/br2-arm32-glibc-4.9.x/usr/bin/arm-buildroot-linux-gnueabi-gcc -Wp,-MD,./.rte_ethdev.o.d.tmp -mfloat-abi=softfp -mfloat-abi=softfp -mfloat-abi=softfp -pthread -march=armv7-a -mtune=cortex-a9 -mfpu=neon -DRTE_MACHINE_CPUFLAG_NEON -I/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include -include /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/build/include/rte_config.h -O3 -W -Wall -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wold-style-definition -Wpointer-arith -Wcast-align -Wnested-externs -Wcast-qual -Wformat-nonliteral -Wformat-security -Wundef -Wwrite-strings -Werror -Wno-error=cast-align -o rte_ethdev.o -c /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_ether/rte_ethdev.c
In file included from /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_ether/rte_ethdev.h:186:0,
from /var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_ether/rte_ethdev.c:70:
/var/lib/jenkins/jobs/dpdk-armv7-testing/workspace/lib/librte_ether/rte_eth_ctrl.h:39:23: fatal error: rte_ether.h: No such file or directory
#include <rte_ether.h>
^
compilation terminated.
make[3]: *** [rte_ethdev.o] Error 1
make[2]: *** [librte_ether] Error 2
make[1]: *** [lib] Error 2
make: *** [all] Error 2
Build step 'Execute shell' marked build as failure
[WARNINGS] Skipping publisher since build result is FAILURE
Skipped archiving because build is not successful
Email was triggered for: Failure - Any
Sending email for trigger: Failure - Any
--
Jan Viktorin E-mail: Viktorin@RehiveTech.com
System Architect Web: www.RehiveTech.com
RehiveTech
Brno, Czech Republic
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [PATCH v13 4/8] ethdev: rename link speed constants
2016-04-06 9:16 3% ` Weglicki, MichalX
@ 2016-04-06 9:34 3% ` Thomas Monjalon
0 siblings, 0 replies; 200+ results
From: Thomas Monjalon @ 2016-04-06 9:34 UTC (permalink / raw)
To: Weglicki, MichalX
Cc: Marc Sune, Xu, Qian Q, Xing, Beilei, dev, Ananyev, Konstantin,
Lu, Wenzhuo, Richardson, Bruce, Glynn, Michael J, Gray, Mark D
2016-04-06 09:16, Weglicki, MichalX:
> Hello,
>
> Thank you for your answer.
>
> I didn't mention that patch is cosmetic, rather naming.
>
> But all clear, I somehow missed it 9 months ago, and couldn't find it.
>
> Is there any official place where I can find upcoming ABI & API changes, or I have to dig through mailing list?
Yes, the API and ABI changes are described in the release notes:
http://dpdk.org/browse/dpdk/tree/doc/guides/rel_notes/release_16_04.rst#n471
And the next deprecations are announced in another section:
http://dpdk.org/browse/dpdk/tree/doc/guides/rel_notes/deprecation.rst
PS: please answer inline
-----------------
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> 2016-04-06 08:34, Weglicki, MichalX:
> > Hello,
> >
> > I have a question about this patch.
> >
> > As far as I see changing ETH_LINK_SPEED_ to ETH_SPEED_NUM_ is rather cosmetic change, am I right?
>
> No.
> ETH_LINK_SPEED was used for configuration and reported speed.
> Now the configuration is done with a bitmap filled with new ETH_LINK_SPEED
> and the old numerical values are kept for other usages as ETH_SPEED_NUM.
>
> > Disadvantage of that is that it breaks compatibility with OVS (compilation) and thus it also breaks backward compatibility between OVS & DPDK. Is it really necessary?
>
> Yes that's why this change was discussed 9 months ago and announced in the
> previous release notes.
>
> > It would be great to keep ABI & API stable if possible.
>
> We try to keep it stable when possible and continue to bring some improvements.
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
2016-04-06 9:02 0% ` Van Haaren, Harry
@ 2016-04-06 9:22 0% ` Thomas Monjalon
2016-04-06 11:16 3% ` Van Haaren, Harry
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-04-06 9:22 UTC (permalink / raw)
To: Van Haaren, Harry
Cc: 'David Harton (dharton)', dev, Tahhan, Maryam, olivier.matz
2016-04-06 09:02, Van Haaren, Harry:
> + David Harton,
>
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > Subject: Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
> > 2016-04-05 18:58, Harry van Haaren:
> > > +* ABI change is planned for the xstats API
>
> > Have you already submitted a RFC patch to let us have an opinion on the change?
> > We need, at least, to see the structure changes.
>
> This API break is to allow changing the API of xstats given the conversation that was on-list, as discussed in this thread:
>
> http://thread.gmane.org/gmane.comp.networking.dpdk.devel/31728/focus=31903
>
> The issue we are going to fix is that currently PMDs copy strings when retrieving statistics, which causes unnecessary overhead. The implementation is not decided yet, but using an int->value mapping seems logical.
I am not sure performance is so much critical when retrieving statistics.
The extended stats can be infinitely extended. So a string identifier seems
a lot more natural.
I do not agree to add a new numeric identifier in the API each time a driver
wants to report a specific statistic for debugging purpose.
> The rte_eth_xstats struct size may be modified, and the API to retrieve statistics will be modified.
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH v13 4/8] ethdev: rename link speed constants
2016-04-06 8:52 0% ` Thomas Monjalon
@ 2016-04-06 9:16 3% ` Weglicki, MichalX
2016-04-06 9:34 3% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Weglicki, MichalX @ 2016-04-06 9:16 UTC (permalink / raw)
To: Thomas Monjalon
Cc: Marc Sune, Xu, Qian Q, Xing, Beilei, dev, Ananyev, Konstantin,
Lu, Wenzhuo, Richardson, Bruce, Glynn, Michael J, Gray, Mark D
Hello,
Thank you for your answer.
I didn't mention that patch is cosmetic, rather naming.
But all clear, I somehow missed it 9 months ago, and couldn't find it.
Is there any official place where I can find upcoming ABI & API changes, or I have to dig through mailing list?
Thank you in advance.
Br,
Michal.
-----Original Message-----
From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
Sent: Wednesday, April 6, 2016 9:53 AM
To: Weglicki, MichalX <michalx.weglicki@intel.com>
Cc: Marc Sune <marcdevel@gmail.com>; Xu, Qian Q <qian.q.xu@intel.com>; Xing, Beilei <beilei.xing@intel.com>; dev@dpdk.org; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Lu, Wenzhuo <wenzhuo.lu@intel.com>; Richardson, Bruce <bruce.richardson@intel.com>; Glynn, Michael J <michael.j.glynn@intel.com>; Gray, Mark D <mark.d.gray@intel.com>
Subject: Re: [dpdk-dev] [PATCH v13 4/8] ethdev: rename link speed constants
2016-04-06 08:34, Weglicki, MichalX:
> Hello,
>
> I have a question about this patch.
>
> As far as I see changing ETH_LINK_SPEED_ to ETH_SPEED_NUM_ is rather cosmetic change, am I right?
No.
ETH_LINK_SPEED was used for configuration and reported speed.
Now the configuration is done with a bitmap filled with new ETH_LINK_SPEED
and the old numerical values are kept for other usages as ETH_SPEED_NUM.
> Disadvantage of that is that it breaks compatibility with OVS (compilation) and thus it also breaks backward compatibility between OVS & DPDK. Is it really necessary?
Yes that's why this change was discussed 9 months ago and announced in the
previous release notes.
> It would be great to keep ABI & API stable if possible.
We try to keep it stable when possible and continue to bring some improvements.
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263
This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.
^ permalink raw reply [relevance 3%]
* Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
@ 2016-04-06 9:02 0% ` Van Haaren, Harry
2016-04-06 9:22 0% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Van Haaren, Harry @ 2016-04-06 9:02 UTC (permalink / raw)
To: Thomas Monjalon, 'David Harton (dharton)'; +Cc: dev, Tahhan, Maryam
+ David Harton,
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Subject: Re: [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07
> 2016-04-05 18:58, Harry van Haaren:
> > +* ABI change is planned for the xstats API
> Have you already submitted a RFC patch to let us have an opinion on the change?
> We need, at least, to see the structure changes.
This API break is to allow changing the API of xstats given the conversation that was on-list, as discussed in this thread:
http://thread.gmane.org/gmane.comp.networking.dpdk.devel/31728/focus=31903
The issue we are going to fix is that currently PMDs copy strings when retrieving statistics, which causes unnecessary overhead. The implementation is not decided yet, but using an int->value mapping seems logical.
The rte_eth_xstats struct size may be modified, and the API to retrieve statistics will be modified.
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH v13 4/8] ethdev: rename link speed constants
2016-04-06 8:34 2% ` Weglicki, MichalX
@ 2016-04-06 8:52 0% ` Thomas Monjalon
2016-04-06 9:16 3% ` Weglicki, MichalX
0 siblings, 1 reply; 200+ results
From: Thomas Monjalon @ 2016-04-06 8:52 UTC (permalink / raw)
To: Weglicki, MichalX
Cc: Marc Sune, Xu, Qian Q, Xing, Beilei, dev, Ananyev, Konstantin,
Lu, Wenzhuo, Richardson, Bruce, Glynn, Michael J, Gray, Mark D
2016-04-06 08:34, Weglicki, MichalX:
> Hello,
>
> I have a question about this patch.
>
> As far as I see changing ETH_LINK_SPEED_ to ETH_SPEED_NUM_ is rather cosmetic change, am I right?
No.
ETH_LINK_SPEED was used for configuration and reported speed.
Now the configuration is done with a bitmap filled with new ETH_LINK_SPEED
and the old numerical values are kept for other usages as ETH_SPEED_NUM.
> Disadvantage of that is that it breaks compatibility with OVS (compilation) and thus it also breaks backward compatibility between OVS & DPDK. Is it really necessary?
Yes that's why this change was discussed 9 months ago and announced in the
previous release notes.
> It would be great to keep ABI & API stable if possible.
We try to keep it stable when possible and continue to bring some improvements.
^ permalink raw reply [relevance 0%]
* Re: [dpdk-dev] [PATCH] doc: announce ABI change for rte_port_source_params structure
@ 2016-04-06 8:51 4% ` Azarewicz, PiotrX T
2016-04-07 21:24 4% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Azarewicz, PiotrX T @ 2016-04-06 8:51 UTC (permalink / raw)
To: Singh, Jasvinder, Zhang, Roy Fan, dev
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Fan Zhang
> > Sent: Thursday, March 31, 2016 2:29 PM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH] doc: announce ABI change for
> > rte_port_source_params structure
> >
> > Several new fields will be added to structure rte_port_source_params
> > for source port enhancement with pcap file reading support.
> >
> > Signed-off-by: Fan Zhang <roy.fan.zhang@intel.com>
> > Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
>
> Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
Acked-by: Piotr Azarewicz <piotrx.t.azarewicz@intel.com>
^ permalink raw reply [relevance 4%]
* Re: [dpdk-dev] [PATCH v13 4/8] ethdev: rename link speed constants
@ 2016-04-06 8:34 2% ` Weglicki, MichalX
2016-04-06 8:52 0% ` Thomas Monjalon
0 siblings, 1 reply; 200+ results
From: Weglicki, MichalX @ 2016-04-06 8:34 UTC (permalink / raw)
To: Marc Sune, Thomas Monjalon, Xu, Qian Q, Xing, Beilei, dev,
Ananyev, Konstantin, Lu, Wenzhuo, Richardson, Bruce, Glynn,
Michael J, Gray, Mark D
Hello,
I have a question about this patch.
As far as I see changing ETH_LINK_SPEED_ to ETH_SPEED_NUM_ is rather cosmetic change, am I right?
Disadvantage of that is that it breaks compatibility with OVS (compilation) and thus it also breaks backward compatibility between OVS & DPDK. Is it really necessary?
It would be great to keep ABI & API stable if possible.
Br,
Michal.
-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Marc Sune
Sent: Saturday, March 26, 2016 1:27 AM
To: Thomas Monjalon <thomas.monjalon@6wind.com>; Xu, Qian Q <qian.q.xu@intel.com>; Xing, Beilei <beilei.xing@intel.com>; dev@dpdk.org; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Lu, Wenzhuo <wenzhuo.lu@intel.com>; Richardson, Bruce <bruce.richardson@intel.com>; Glynn, Michael J <michael.j.glynn@intel.com>
Cc: Marc Sune <marcdevel@gmail.com>
Subject: [dpdk-dev] [PATCH v13 4/8] ethdev: rename link speed constants
The speed numbers ETH_LINK_SPEED_ are renamed ETH_SPEED_NUM_.
The prefix ETH_LINK_SPEED_ is kept for AUTONEG and will be used
for bit flags in next patch.
Signed-off-by: Marc Sune <marcdevel@gmail.com>
---
app/test-pmd/cmdline.c | 10 +++++-----
app/test/virtual_pmd.c | 2 +-
drivers/net/af_packet/rte_eth_af_packet.c | 2 +-
drivers/net/bonding/rte_eth_bond_8023ad.c | 12 ++++++------
drivers/net/cxgbe/base/t4_hw.c | 8 ++++----
drivers/net/e1000/em_ethdev.c | 8 ++++----
drivers/net/e1000/igb_ethdev.c | 8 ++++----
drivers/net/ena/ena_ethdev.c | 2 +-
drivers/net/i40e/i40e_ethdev.c | 30 +++++++++++++++---------------
drivers/net/i40e/i40e_ethdev_vf.c | 2 +-
drivers/net/ixgbe/ixgbe_ethdev.c | 22 +++++++++++-----------
drivers/net/mpipe/mpipe_tilegx.c | 4 ++--
drivers/net/nfp/nfp_net.c | 2 +-
drivers/net/null/rte_eth_null.c | 2 +-
drivers/net/pcap/rte_eth_pcap.c | 2 +-
drivers/net/ring/rte_eth_ring.c | 2 +-
drivers/net/szedata2/rte_eth_szedata2.c | 8 ++++----
drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +-
drivers/net/xenvirt/rte_eth_xenvirt.c | 2 +-
lib/librte_ether/rte_ethdev.h | 29 ++++++++++++++++++-----------
20 files changed, 83 insertions(+), 76 deletions(-)
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index eb7bbb4..815b53b 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -1006,20 +1006,20 @@ parse_and_check_speed_duplex(char *speedstr, char *duplexstr, uint16_t *speed)
}
if (!strcmp(speedstr, "10")) {
- *speed = ETH_LINK_SPEED_10;
+ *speed = ETH_SPEED_NUM_10M;
} else if (!strcmp(speedstr, "100")) {
- *speed = ETH_LINK_SPEED_100;
+ *speed = ETH_SPEED_NUM_100M;
} else {
if (duplex != ETH_LINK_FULL_DUPLEX) {
printf("Invalid speed/duplex parameters\n");
return -1;
}
if (!strcmp(speedstr, "1000")) {
- *speed = ETH_LINK_SPEED_1000;
+ *speed = ETH_SPEED_NUM_1G;
} else if (!strcmp(speedstr, "10000")) {
- *speed = ETH_LINK_SPEED_10G;
+ *speed = ETH_SPEED_NUM_10G;
} else if (!strcmp(speedstr, "40000")) {
- *speed = ETH_LINK_SPEED_40G;
+ *speed = ETH_SPEED_NUM_40G;
} else if (!strcmp(speedstr, "auto")) {
*speed = ETH_LINK_SPEED_AUTONEG;
} else {
diff --git a/app/test/virtual_pmd.c b/app/test/virtual_pmd.c
index b1d40d7..b4bd2f2 100644
--- a/app/test/virtual_pmd.c
+++ b/app/test/virtual_pmd.c
@@ -604,7 +604,7 @@ virtual_ethdev_create(const char *name, struct ether_addr *mac_addr,
TAILQ_INIT(&(eth_dev->link_intr_cbs));
eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
- eth_dev->data->dev_link.link_speed = ETH_LINK_SPEED_10000;
+ eth_dev->data->dev_link.link_speed = ETH_SPEED_NUM_10G;
eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
eth_dev->data->mac_addrs = rte_zmalloc(name, ETHER_ADDR_LEN, 0);
diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index dee7b59..641f849 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -116,7 +116,7 @@ static const char *valid_arguments[] = {
static const char *drivername = "AF_PACKET PMD";
static struct rte_eth_link pmd_link = {
- .link_speed = 10000,
+ .link_speed = ETH_SPEED_NUM_10G,
.link_duplex = ETH_LINK_FULL_DUPLEX,
.link_status = ETH_LINK_DOWN,
};
diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c
index 1b7e93a..ac8306f 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -711,22 +711,22 @@ link_speed_key(uint16_t speed) {
case ETH_LINK_SPEED_AUTONEG:
key_speed = 0x00;
break;
- case ETH_LINK_SPEED_10:
+ case ETH_SPEED_NUM_10M:
key_speed = BOND_LINK_SPEED_KEY_10M;
break;
- case ETH_LINK_SPEED_100:
+ case ETH_SPEED_NUM_100M:
key_speed = BOND_LINK_SPEED_KEY_100M;
break;
- case ETH_LINK_SPEED_1000:
+ case ETH_SPEED_NUM_1G:
key_speed = BOND_LINK_SPEED_KEY_1000M;
break;
- case ETH_LINK_SPEED_10G:
+ case ETH_SPEED_NUM_10G:
key_speed = BOND_LINK_SPEED_KEY_10G;
break;
- case ETH_LINK_SPEED_20G:
+ case ETH_SPEED_NUM_20G:
key_speed = BOND_LINK_SPEED_KEY_20G;
break;
- case ETH_LINK_SPEED_40G:
+ case ETH_SPEED_NUM_40G:
key_speed = BOND_LINK_SPEED_KEY_40G;
break;
default:
diff --git a/drivers/net/cxgbe/base/t4_hw.c b/drivers/net/cxgbe/base/t4_hw.c
index 884d2cf..79af806 100644
--- a/drivers/net/cxgbe/base/t4_hw.c
+++ b/drivers/net/cxgbe/base/t4_hw.c
@@ -2159,13 +2159,13 @@ int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl)
if (stat & F_FW_PORT_CMD_TXPAUSE)
fc |= PAUSE_TX;
if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_100M))
- speed = ETH_LINK_SPEED_100;
+ speed = ETH_SPEED_NUM_100M;
else if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_1G))
- speed = ETH_LINK_SPEED_1000;
+ speed = ETH_SPEED_NUM_1G;
else if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_10G))
- speed = ETH_LINK_SPEED_10000;
+ speed = ETH_SPEED_NUM_10G;
else if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_40G))
- speed = ETH_LINK_SPEED_40G;
+ speed = ETH_SPEED_NUM_40G;
for_each_port(adap, i) {
pi = adap2pinfo(adap, i);
diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index fad8f2f..473d77f 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -625,7 +625,7 @@ eth_em_start(struct rte_eth_dev *dev)
else
goto error_invalid_config;
break;
- case ETH_LINK_SPEED_10:
+ case ETH_SPEED_NUM_10M:
if (dev->data->dev_conf.link_duplex == ETH_LINK_AUTONEG_DUPLEX)
hw->phy.autoneg_advertised = E1000_ALL_10_SPEED;
else if (dev->data->dev_conf.link_duplex ==
@@ -637,7 +637,7 @@ eth_em_start(struct rte_eth_dev *dev)
else
goto error_invalid_config;
break;
- case ETH_LINK_SPEED_100:
+ case ETH_SPEED_NUM_100M:
if (dev->data->dev_conf.link_duplex == ETH_LINK_AUTONEG_DUPLEX)
hw->phy.autoneg_advertised = E1000_ALL_100_SPEED;
else if (dev->data->dev_conf.link_duplex ==
@@ -649,7 +649,7 @@ eth_em_start(struct rte_eth_dev *dev)
else
goto error_invalid_config;
break;
- case ETH_LINK_SPEED_1000:
+ case ETH_SPEED_NUM_1G:
if ((dev->data->dev_conf.link_duplex ==
ETH_LINK_AUTONEG_DUPLEX) ||
(dev->data->dev_conf.link_duplex ==
@@ -658,7 +658,7 @@ eth_em_start(struct rte_eth_dev *dev)
else
goto error_invalid_config;
break;
- case ETH_LINK_SPEED_10000:
+ case ETH_SPEED_NUM_10G:
default:
goto error_invalid_config;
}
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 4dfa7e3..86f25f6 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -1244,7 +1244,7 @@ eth_igb_start(struct rte_eth_dev *dev)
else
goto error_invalid_config;
break;
- case ETH_LINK_SPEED_10:
+ case ETH_SPEED_NUM_10M:
if (dev->data->dev_conf.link_duplex == ETH_LINK_AUTONEG_DUPLEX)
hw->phy.autoneg_advertised = E1000_ALL_10_SPEED;
else if (dev->data->dev_conf.link_duplex == ETH_LINK_HALF_DUPLEX)
@@ -1254,7 +1254,7 @@ eth_igb_start(struct rte_eth_dev *dev)
else
goto error_invalid_config;
break;
- case ETH_LINK_SPEED_100:
+ case ETH_SPEED_NUM_100M:
if (dev->data->dev_conf.link_duplex == ETH_LINK_AUTONEG_DUPLEX)
hw->phy.autoneg_advertised = E1000_ALL_100_SPEED;
else if (dev->data->dev_conf.link_duplex == ETH_LINK_HALF_DUPLEX)
@@ -1264,14 +1264,14 @@ eth_igb_start(struct rte_eth_dev *dev)
else
goto error_invalid_config;
break;
- case ETH_LINK_SPEED_1000:
+ case ETH_SPEED_NUM_1G:
if ((dev->data->dev_conf.link_duplex == ETH_LINK_AUTONEG_DUPLEX) ||
(dev->data->dev_conf.link_duplex == ETH_LINK_FULL_DUPLEX))
hw->phy.autoneg_advertised = ADVERTISE_1000_FULL;
else
goto error_invalid_config;
break;
- case ETH_LINK_SPEED_10000:
+ case ETH_SPEED_NUM_10G:
default:
goto error_invalid_config;
}
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 325c513..1046286 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -504,7 +504,7 @@ static int ena_link_update(struct rte_eth_dev *dev,
struct rte_eth_link *link = &dev->data->dev_link;
link->link_status = 1;
- link->link_speed = ETH_LINK_SPEED_10G;
+ link->link_speed = ETH_SPEED_NUM_10G;
link->link_duplex = ETH_LINK_FULL_DUPLEX;
return 0;
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 05126e8..cce9e6f 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -1386,19 +1386,19 @@ i40e_parse_link_speed(uint16_t eth_link_speed)
uint8_t link_speed = I40E_LINK_SPEED_UNKNOWN;
switch (eth_link_speed) {
- case ETH_LINK_SPEED_40G:
+ case ETH_SPEED_NUM_40G:
link_speed = I40E_LINK_SPEED_40GB;
break;
- case ETH_LINK_SPEED_20G:
+ case ETH_SPEED_NUM_20G:
link_speed = I40E_LINK_SPEED_20GB;
break;
- case ETH_LINK_SPEED_10G:
+ case ETH_SPEED_NUM_10G:
link_speed = I40E_LINK_SPEED_10GB;
break;
- case ETH_LINK_SPEED_1000:
+ case ETH_SPEED_NUM_1G:
link_speed = I40E_LINK_SPEED_1GB;
break;
- case ETH_LINK_SPEED_100:
+ case ETH_SPEED_NUM_100M:
link_speed = I40E_LINK_SPEED_100MB;
break;
}
@@ -1768,7 +1768,7 @@ i40e_dev_link_update(struct rte_eth_dev *dev,
/* Get link status information from hardware */
status = i40e_aq_get_link_info(hw, false, &link_status, NULL);
if (status != I40E_SUCCESS) {
- link.link_speed = ETH_LINK_SPEED_100;
+ link.link_speed = ETH_SPEED_NUM_100M;
link.link_duplex = ETH_LINK_FULL_DUPLEX;
PMD_DRV_LOG(ERR, "Failed to get link info");
goto out;
@@ -1790,22 +1790,22 @@ i40e_dev_link_update(struct rte_eth_dev *dev,
/* Parse the link status */
switch (link_status.link_speed) {
case I40E_LINK_SPEED_100MB:
- link.link_speed = ETH_LINK_SPEED_100;
+ link.link_speed = ETH_SPEED_NUM_100M;
break;
case I40E_LINK_SPEED_1GB:
- link.link_speed = ETH_LINK_SPEED_1000;
+ link.link_speed = ETH_SPEED_NUM_1G;
break;
case I40E_LINK_SPEED_10GB:
- link.link_speed = ETH_LINK_SPEED_10G;
+ link.link_speed = ETH_SPEED_NUM_10G;
break;
case I40E_LINK_SPEED_20GB:
- link.link_speed = ETH_LINK_SPEED_20G;
+ link.link_speed = ETH_SPEED_NUM_20G;
break;
case I40E_LINK_SPEED_40GB:
- link.link_speed = ETH_LINK_SPEED_40G;
+ link.link_speed = ETH_SPEED_NUM_40G;
break;
default:
- link.link_speed = ETH_LINK_SPEED_100;
+ link.link_speed = ETH_SPEED_NUM_100M;
break;
}
@@ -8158,15 +8158,15 @@ i40e_start_timecounters(struct rte_eth_dev *dev)
rte_i40e_dev_atomic_read_link_status(dev, &link);
switch (link.link_speed) {
- case ETH_LINK_SPEED_40G:
+ case ETH_SPEED_NUM_40G:
tsync_inc_l = I40E_PTP_40GB_INCVAL & 0xFFFFFFFF;
tsync_inc_h = I40E_PTP_40GB_INCVAL >> 32;
break;
- case ETH_LINK_SPEED_10G:
+ case ETH_SPEED_NUM_10G:
tsync_inc_l = I40E_PTP_10GB_INCVAL & 0xFFFFFFFF;
tsync_inc_h = I40E_PTP_10GB_INCVAL >> 32;
break;
- case ETH_LINK_SPEED_1000:
+ case ETH_SPEED_NUM_1G:
tsync_inc_l = I40E_PTP_1GB_INCVAL & 0xFFFFFFFF;
tsync_inc_h = I40E_PTP_1GB_INCVAL >> 32;
break;
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index 91df13b..295dcd2 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -2126,7 +2126,7 @@ i40evf_dev_link_update(struct rte_eth_dev *dev,
else {
/* Always assume it's up, for Linux driver PF host */
new_link.link_duplex = ETH_LINK_AUTONEG_DUPLEX;
- new_link.link_speed = ETH_LINK_SPEED_10000;
+ new_link.link_speed = ETH_SPEED_NUM_10G;
new_link.link_status = ETH_LINK_UP;
}
i40evf_dev_atomic_write_link_status(dev, &new_link);
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 21a3b8c..a0179d2 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -2199,17 +2199,17 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
IXGBE_LINK_SPEED_82599_AUTONEG :
IXGBE_LINK_SPEED_82598_AUTONEG;
break;
- case ETH_LINK_SPEED_100:
+ case ETH_SPEED_NUM_100M:
/*
* Invalid for 82598 but error will be detected by
* ixgbe_setup_link()
*/
speed = IXGBE_LINK_SPEED_100_FULL;
break;
- case ETH_LINK_SPEED_1000:
+ case ETH_SPEED_NUM_1G:
speed = IXGBE_LINK_SPEED_1GB_FULL;
break;
- case ETH_LINK_SPEED_10000:
+ case ETH_SPEED_NUM_10G:
speed = IXGBE_LINK_SPEED_10GB_FULL;
break;
default:
@@ -3074,7 +3074,7 @@ ixgbe_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete)
diag = ixgbe_check_link(hw, &link_speed, &link_up, 1);
if (diag != 0) {
- link.link_speed = ETH_LINK_SPEED_100;
+ link.link_speed = ETH_SPEED_NUM_100M;
link.link_duplex = ETH_LINK_HALF_DUPLEX;
rte_ixgbe_dev_atomic_write_link_status(dev, &link);
if (link.link_status == old.link_status)
@@ -3095,19 +3095,19 @@ ixgbe_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete)
default:
case IXGBE_LINK_SPEED_UNKNOWN:
link.link_duplex = ETH_LINK_HALF_DUPLEX;
- link.link_speed = ETH_LINK_SPEED_100;
+ link.link_speed = ETH_SPEED_NUM_100M;
break;
case IXGBE_LINK_SPEED_100_FULL:
- link.link_speed = ETH_LINK_SPEED_100;
+ link.link_speed = ETH_SPEED_NUM_100M;
break;
case IXGBE_LINK_SPEED_1GB_FULL:
- link.link_speed = ETH_LINK_SPEED_1000;
+ link.link_speed = ETH_SPEED_NUM_1G;
break;
case IXGBE_LINK_SPEED_10GB_FULL:
- link.link_speed = ETH_LINK_SPEED_10000;
+ link.link_speed = ETH_SPEED_NUM_10G;
break;
}
rte_ixgbe_dev_atomic_write_link_status(dev, &link);
@@ -5909,15 +5909,15 @@ ixgbe_start_timecounters(struct rte_eth_dev *dev)
rte_ixgbe_dev_atomic_read_link_status(dev, &link);
switch (link.link_speed) {
- case ETH_LINK_SPEED_100:
+ case ETH_SPEED_NUM_100M:
incval = IXGBE_INCVAL_100;
shift = IXGBE_INCVAL_SHIFT_100;
break;
- case ETH_LINK_SPEED_1000:
+ case ETH_SPEED_NUM_1G:
incval = IXGBE_INCVAL_1GB;
shift = IXGBE_INCVAL_SHIFT_1GB;
break;
- case ETH_LINK_SPEED_10000:
+ case ETH_SPEED_NUM_10G:
default:
incval = IXGBE_INCVAL_10GB;
shift = IXGBE_INCVAL_SHIFT_10GB;
diff --git a/drivers/net/mpipe/mpipe_tilegx.c b/drivers/net/mpipe/mpipe_tilegx.c
index d93ab7e..1a77c7a 100644
--- a/drivers/net/mpipe/mpipe_tilegx.c
+++ b/drivers/net/mpipe/mpipe_tilegx.c
@@ -395,11 +395,11 @@ mpipe_link_update(struct rte_eth_dev *dev, int wait_to_complete)
speed = state & GXIO_MPIPE_LINK_SPEED_MASK;
if (speed == GXIO_MPIPE_LINK_1G) {
- new.link_speed = ETH_LINK_SPEED_1000;
+ new.link_speed = ETH_SPEED_NUM_1G;
new.link_duplex = ETH_LINK_FULL_DUPLEX;
new.link_status = ETH_LINK_UP;
} else if (speed == GXIO_MPIPE_LINK_10G) {
- new.link_speed = ETH_LINK_SPEED_10000;
+ new.link_speed = ETH_SPEED_NUM_10G;
new.link_duplex = ETH_LINK_FULL_DUPLEX;
new.link_status = ETH_LINK_UP;
}
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index 80dda85..18ea0f4 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -821,7 +821,7 @@ nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
link.link_duplex = ETH_LINK_FULL_DUPLEX;
/* Other cards can limit the tx and rx rate per VF */
- link.link_speed = ETH_LINK_SPEED_40G;
+ link.link_speed = ETH_SPEED_NUM_40G;
if (old.link_status != link.link_status) {
nfp_net_dev_atomic_write_link_status(dev, &link);
diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 6adea91..5640585 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -89,7 +89,7 @@ struct pmd_internals {
static struct ether_addr eth_addr = { .addr_bytes = {0} };
static const char *drivername = "Null PMD";
static struct rte_eth_link pmd_link = {
- .link_speed = 10000,
+ .link_speed = ETH_SPEED_NUM_10G,
.link_duplex = ETH_LINK_FULL_DUPLEX,
.link_status = ETH_LINK_DOWN,
};
diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
index b90c725..c657951 100644
--- a/drivers/net/pcap/rte_eth_pcap.c
+++ b/drivers/net/pcap/rte_eth_pcap.c
@@ -123,7 +123,7 @@ static int open_single_iface(const char *iface, pcap_t **pcap);
static struct ether_addr eth_addr = { .addr_bytes = { 0, 0, 0, 0x1, 0x2, 0x3 } };
static const char *drivername = "Pcap PMD";
static struct rte_eth_link pmd_link = {
- .link_speed = 10000,
+ .link_speed = ETH_SPEED_NUM_10G,
.link_duplex = ETH_LINK_FULL_DUPLEX,
.link_status = ETH_LINK_DOWN,
};
diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index 4335c6a..58685e9 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -77,7 +77,7 @@ struct pmd_internals {
static const char *drivername = "Rings PMD";
static struct rte_eth_link pmd_link = {
- .link_speed = 10000,
+ .link_speed = ETH_SPEED_NUM_10G,
.link_duplex = ETH_LINK_FULL_DUPLEX,
.link_status = ETH_LINK_DOWN,
};
diff --git a/drivers/net/szedata2/rte_eth_szedata2.c b/drivers/net/szedata2/rte_eth_szedata2.c
index 47aa7e3..dd1ae9e 100644
--- a/drivers/net/szedata2/rte_eth_szedata2.c
+++ b/drivers/net/szedata2/rte_eth_szedata2.c
@@ -1149,10 +1149,10 @@ eth_link_update(struct rte_eth_dev *dev,
switch (cgmii_link_speed(ibuf)) {
case SZEDATA2_LINK_SPEED_10G:
- link.link_speed = ETH_LINK_SPEED_10G;
+ link.link_speed = ETH_SPEED_NUM_10G;
break;
case SZEDATA2_LINK_SPEED_40G:
- link.link_speed = ETH_LINK_SPEED_40G;
+ link.link_speed = ETH_SPEED_NUM_40G;
break;
case SZEDATA2_LINK_SPEED_100G:
/*
@@ -1161,10 +1161,10 @@ eth_link_update(struct rte_eth_dev *dev,
* will be changed to support 100Gbps speed change
* this value to 100G.
*/
- link.link_speed = ETH_LINK_SPEED_10G;
+ link.link_speed = ETH_SPEED_NUM_10G;
break;
default:
- link.link_speed = ETH_LINK_SPEED_10G;
+ link.link_speed = ETH_SPEED_NUM_10G;
break;
}
diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index 3f26217..6afa14e 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -779,7 +779,7 @@ vmxnet3_dev_link_update(struct rte_eth_dev *dev, __attribute__((unused)) int wai
if (ret & 0x1) {
link.link_status = ETH_LINK_UP;
link.link_duplex = ETH_LINK_FULL_DUPLEX;
- link.link_speed = ETH_LINK_SPEED_10000;
+ link.link_speed = ETH_SPEED_NUM_10G;
}
vmxnet3_dev_atomic_write_link_status(dev, &link);
diff --git a/drivers/net/xenvirt/rte_eth_xenvirt.c b/drivers/net/xenvirt/rte_eth_xenvirt.c
index 9453a06..77d3ba1 100644
--- a/drivers/net/xenvirt/rte_eth_xenvirt.c
+++ b/drivers/net/xenvirt/rte_eth_xenvirt.c
@@ -70,7 +70,7 @@ static int virtio_idx = 0;
static const char *drivername = "xen virtio PMD";
static struct rte_eth_link pmd_link = {
- .link_speed = 10000,
+ .link_speed = ETH_SPEED_NUM_10G,
.link_duplex = ETH_LINK_FULL_DUPLEX,
.link_status = ETH_LINK_DOWN,
};
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 2d13f92..bc7d607 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -242,23 +242,30 @@ struct rte_eth_stats {
};
/**
+ * Ethernet numeric link speeds in Mbps
+ */
+#define ETH_LINK_SPEED_AUTONEG 0 /**< Auto-negotiate link speed. */
+#define ETH_SPEED_NUM_10M 10 /**< 10 Mbps */
+#define ETH_SPEED_NUM_100M 100 /**< 100 Mbps */
+#define ETH_SPEED_NUM_1G 1000 /**< 1 Gbps */
+#define ETH_SPEED_NUM_2_5G 2500 /**< 2.5 Gbps */
+#define ETH_SPEED_NUM_5G 5000 /**< 5 Gbps */
+#define ETH_SPEED_NUM_10G 10000 /**< 10 Gbps */
+#define ETH_SPEED_NUM_20G 20000 /**< 20 Gbps */
+#define ETH_SPEED_NUM_25G 25000 /**< 25 Gbps */
+#define ETH_SPEED_NUM_40G 40000 /**< 40 Gbps */
+#define ETH_SPEED_NUM_50G 50000 /**< 50 Gbps */
+#define ETH_SPEED_NUM_56G 56000 /**< 56 Gbps */
+
+/**
* A structure used to retrieve link-level information of an Ethernet port.
*/
struct rte_eth_link {
- uint16_t link_speed; /**< ETH_LINK_SPEED_[10, 100, 1000, 10000] */
+ uint16_t link_speed; /**< ETH_SPEED_NUM_ */
uint16_t link_duplex; /**< ETH_LINK_[HALF/FULL]_DUPLEX */
uint8_t link_status : 1; /**< ETH_LINK_[DOWN/UP] */
}__attribute__((aligned(8))); /**< aligned for atomic64 read/write */
-#define ETH_LINK_SPEED_AUTONEG 0 /**< Auto-negotiate link speed. */
-#define ETH_LINK_SPEED_10 10 /**< 10 megabits/second. */
-#define ETH_LINK_SPEED_100 100 /**< 100 megabits/second. */
-#define ETH_LINK_SPEED_1000 1000 /**< 1 gigabits/second. */
-#define ETH_LINK_SPEED_10000 10000 /**< 10 gigabits/second. */
-#define ETH_LINK_SPEED_10G 10000 /**< alias of 10 gigabits/second. */
-#define ETH_LINK_SPEED_20G 20000 /**< 20 gigabits/second. */
-#define ETH_LINK_SPEED_40G 40000 /**< 40 gigabits/second. */
-
/* Utility constants */
#define ETH_LINK_AUTONEG_DUPLEX 0 /**< Auto-negotiate duplex. */
#define ETH_LINK_HALF_DUPLEX 1 /**< Half-duplex connection. */
@@ -779,7 +786,7 @@ struct rte_intr_conf {
*/
struct rte_eth_conf {
uint16_t link_speed;
- /**< ETH_LINK_SPEED_10[0|00|000], or 0 for autonegotation */
+ /**< ETH_SPEED_NUM_ or 0 for autonegotiation */
uint16_t link_duplex;
/**< ETH_LINK_[HALF_DUPLEX|FULL_DUPLEX], or 0 for autonegotation */
struct rte_eth_rxmode rxmode; /**< Port RX configuration. */
--
2.1.4
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263
This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.
^ permalink raw reply [relevance 2%]
* [dpdk-dev] [PATCH] vhost: ABI/API change announcement due to refactor
@ 2016-04-06 6:53 15% Yuanhan Liu
2016-04-07 7:12 7% ` Panu Matilainen
0 siblings, 1 reply; 200+ results
From: Yuanhan Liu @ 2016-04-06 6:53 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Thomas Monjalon, Yuanhan Liu, Ilya Maximets
We currently exposed way too many fields (or even structures) than
necessary. For example, vhost_virtqueue struct should NOT be exposed
to user at all: application just need to tell the right queue id to
locate a specific queue, and that's all. Instead, the structure should
be defined in an internal header file. With that, we could do any changes
to it we want, without worrying about that we may offense the painful
ABI rules.
Similar changes could be done to virtio_net struct as well, just exposing
very few fields that are necessary and moving all others to an internal
structure.
Huawei then suggested a more radical yet much cleaner one: just exposing
a virtio_net handle to application, just like the way kernel exposes an
fd to user for locating a specific file, and exposing some new functions
to access those old fields, such as flags, virt_qp_nb.
With this change, we're likely to be free from ABI violations forever
(well, except when we have to extend the virtio_net_device_ops struct).
For example, following nice cleanup would not be a blocking one then:
http://dpdk.org/ml/archives/dev/2016-February/033528.html
Suggested-by: Huawei Xie <huawei.xie@intel.com>
Cc: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
doc/guides/rel_notes/deprecation.rst | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index ad31355..7d16d86 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -40,3 +40,10 @@ Deprecation Notices
The existing API will be backward compatible, but there will be new API
functions added to facilitate the creation of mempools using an external
handler. The 16.07 release will contain these changes.
+
+* A librte_vhost public structures refactor is planned for DPDK 16.07
+ that requires both ABI and API change.
+ The proposed refactor would expose DPDK vhost dev to applications as
+ a handle, like the way kernel exposes an fd to user for locating a
+ specific file, and to keep all major structures internally, so that
+ we are likely to be free from ABI violations in future.
--
1.9.0
^ permalink raw reply [relevance 15%]
Results 12001-12200 of ~18000 next (older) | prev (newer) | reverse | sort options + mbox downloads above
-- links below jump to the message on this page --
2016-01-29 14:08 [dpdk-dev] [PATCH 0/9] prepare for rte_device / rte_driver David Marchand
2016-04-20 11:44 ` [dpdk-dev] [PATCH v2 00/17] " David Marchand
2016-04-20 11:44 3% ` [dpdk-dev] [PATCH v2 04/17] eal: remove duplicate function declaration David Marchand
2016-02-12 18:36 [dpdk-dev] [PATCH v4] mempool: reduce rte_mempool structure size Keith Wiles
2016-04-14 9:42 2% ` [dpdk-dev] [PATCH v5] " Olivier Matz
2016-04-14 13:28 0% ` Wiles, Keith
2016-04-14 13:53 0% ` Wiles, Keith
2016-05-17 5:31 0% ` Thomas Monjalon
2016-03-09 9:50 [dpdk-dev] [PATCH v3 0/4] external mempool manager David Hunt
2016-04-14 13:57 2% ` [dpdk-dev] [PATCH v4 0/3] " Olivier Matz
2016-05-19 13:44 2% ` [dpdk-dev] mempool: " David Hunt
2016-05-19 13:44 ` [dpdk-dev] [PATCH v5 1/3] mempool: support external handler David Hunt
2016-05-24 15:35 ` Jerin Jacob
2016-05-27 9:52 ` Hunt, David
2016-05-27 10:33 ` Jerin Jacob
2016-05-27 14:44 ` Hunt, David
2016-05-30 9:41 3% ` Jerin Jacob
2016-05-30 11:27 0% ` Hunt, David
2016-06-01 16:19 2% ` [dpdk-dev] [PATCH v6 0/5] mempool: add external mempool manager David Hunt
2016-06-02 13:27 2% ` [dpdk-dev] [PATCH v7 " David Hunt
2016-06-02 13:27 ` [dpdk-dev] [PATCH v7 1/5] mempool: support external mempool operations David Hunt
2016-06-02 13:38 2% ` [dpdk-dev] [PATCH v7 0/5] mempool: add external mempool manager Hunt, David
2016-06-03 14:58 2% ` [dpdk-dev] [PATCH v8 " David Hunt
2016-03-09 16:19 [dpdk-dev] [RFC 00/35] mempool: rework memory allocation Olivier Matz
2016-04-14 10:19 2% ` [dpdk-dev] [PATCH 00/36] " Olivier Matz
2016-04-14 13:50 0% ` Wiles, Keith
2016-04-14 14:01 0% ` Olivier MATZ
2016-04-14 14:03 0% ` Wiles, Keith
2016-05-18 11:04 2% ` [dpdk-dev] [PATCH v3 00/35] " Olivier Matz
2016-05-18 11:04 10% ` [dpdk-dev] [PATCH v3 35/35] doc: update release notes about mempool allocation Olivier Matz
2016-05-19 12:47 0% ` [dpdk-dev] [PATCH v3 00/35] mempool: rework memory allocation Thomas Monjalon
2016-05-20 8:42 0% ` Panu Matilainen
2016-03-24 7:14 [dpdk-dev] Suggestions for the dpdk stable tree Christian Ehrhardt
2016-05-20 8:07 ` Christian Ehrhardt
2016-05-20 14:49 ` Mcnamara, John
2016-05-23 2:21 3% ` Yuanhan Liu
2016-06-01 19:01 0% ` Mcnamara, John
2016-03-25 19:42 [dpdk-dev] [PATCH v12 0/8] ethdev: 100G and link speed API refactoring Thomas Monjalon
2016-03-26 1:27 ` [dpdk-dev] [PATCH v13 " Marc Sune
2016-03-26 1:27 ` [dpdk-dev] [PATCH v13 4/8] ethdev: rename link speed constants Marc Sune
2016-04-06 8:34 2% ` Weglicki, MichalX
2016-04-06 8:52 0% ` Thomas Monjalon
2016-04-06 9:16 3% ` Weglicki, MichalX
2016-04-06 9:34 3% ` Thomas Monjalon
2016-03-31 13:29 [dpdk-dev] [PATCH] doc: announce ABI change for rte_port_source_params structure Fan Zhang
2016-04-05 21:16 ` Singh, Jasvinder
2016-04-06 8:51 4% ` Azarewicz, PiotrX T
2016-04-07 21:24 4% ` Thomas Monjalon
2016-04-12 12:39 4% ` Thomas Monjalon
2016-04-05 9:23 [dpdk-dev] [PATCH] doc: announce ABI changes for user-owned mempool caches Lazaros Koromilas
2016-04-05 15:42 ` Olivier Matz
2016-04-08 14:01 4% ` Hunt, David
2016-04-10 9:55 4% ` Thomas Monjalon
2016-04-05 13:56 [dpdk-dev] DPDK namespace Thomas Monjalon
2016-04-05 14:13 ` Trahe, Fiona
2016-04-05 14:31 ` Arnon Warshavsky
2016-04-06 5:26 ` Yuanhan Liu
2016-04-06 12:07 0% ` Panu Matilainen
2016-04-06 12:34 0% ` Ananyev, Konstantin
2016-04-06 14:36 0% ` Wiles, Keith
2016-04-06 20:21 ` Dave Neary
2016-04-07 8:22 3% ` Marc
2016-04-07 9:18 ` Thomas Monjalon
2016-04-07 9:33 3% ` Panu Matilainen
2016-04-07 10:16 5% ` Marc Sune
2016-04-07 11:51 9% ` [dpdk-dev] On DPDK ABI policy Panu Matilainen
2016-04-07 21:52 4% ` Matthew Hall
2016-04-08 8:29 4% ` Marc Sune
2016-04-08 8:47 9% ` Marc Sune
2016-04-07 21:48 0% ` [dpdk-dev] DPDK namespace Matthew Hall
2016-04-05 17:58 [dpdk-dev] [PATCH] doc: announce xstats api change for 16.07 Harry van Haaren
2016-04-05 18:45 ` Thomas Monjalon
2016-04-06 9:02 0% ` Van Haaren, Harry
2016-04-06 9:22 0% ` Thomas Monjalon
2016-04-06 11:16 3% ` Van Haaren, Harry
2016-04-06 12:14 0% ` Thomas Monjalon
2016-04-06 13:49 0% ` David Harton (dharton)
2016-04-06 14:00 0% ` David Harton (dharton)
2016-04-06 6:53 15% [dpdk-dev] [PATCH] vhost: ABI/API change announcement due to refactor Yuanhan Liu
2016-04-07 7:12 7% ` Panu Matilainen
2016-04-10 9:58 4% ` Thomas Monjalon
2016-04-10 10:02 4% ` Thomas Monjalon
2016-04-06 9:53 [dpdk-dev] Fw: dpdk-armv7-testing - Build # 43 - Failure! Jan Viktorin
2016-04-06 10:05 ` Thomas Monjalon
2016-04-06 10:18 4% ` Jan Viktorin
2016-04-06 16:58 [dpdk-dev] ovs crash when running traffic from VM to VM over DPDK and vhostuser Yuanhan Liu
2016-05-02 17:40 3% ` Yi Ba
2016-04-07 15:33 5% [dpdk-dev] [PATCH] doc: announce API changes for device objects David Marchand
2016-04-07 15:46 0% ` Jan Viktorin
2016-04-07 17:00 0% ` David Marchand
2016-04-07 17:09 3% ` Jan Viktorin
2016-04-07 17:24 0% ` David Marchand
2016-04-07 16:02 8% [dpdk-dev] [PATCH v1] doc: fix release notes for 16.04 John McNamara
2016-04-12 12:01 6% [dpdk-dev] [PATCH v1] doc: add template release notes for 16.11 John McNamara
2016-04-12 12:55 6% [dpdk-dev] [PATCH v1] doc: add template release notes for 16.07 John McNamara
2016-04-14 9:44 4% [dpdk-dev] [RFC 0/2] add new fields to rte_eth_dev_info structure Reshma Pattan
2016-04-14 9:44 9% ` [dpdk-dev] [RFC 1/2] doc: announce ABI change for " Reshma Pattan
2016-04-15 9:42 4% ` Mcnamara, John
2016-04-15 10:02 8% ` Thomas Monjalon
2016-04-14 9:44 ` [dpdk-dev] [RFC 2/2] librte_ether: add new fields to rte_eth_dev_info struct Reshma Pattan
2016-04-15 10:36 3% ` Thomas Monjalon
2016-04-15 11:32 0% ` Ananyev, Konstantin
2016-04-14 18:33 25% [dpdk-dev] [PATCH] port: bump ABI for pcap file support Thomas Monjalon
2016-04-15 10:32 4% ` Dumitrescu, Cristian
2016-04-20 9:55 4% ` Thomas Monjalon
2016-04-14 21:33 4% [dpdk-dev] [PATCH] pci: remove deprecated specific config Thomas Monjalon
2016-04-15 14:44 3% [dpdk-dev] [RFC PATCH v1 0/3] Remove string operations from xstats Remy Horton
2016-04-20 16:03 0% ` David Harton (dharton)
2016-04-28 14:56 0% ` Tahhan, Maryam
2016-04-28 15:58 0% ` David Harton (dharton)
2016-04-29 12:52 0% ` David Harton (dharton)
2016-04-15 22:33 15% [dpdk-dev] [PATCH] mk: do not enforce any specific ARM ABI Jan Viktorin
2016-05-02 15:47 4% ` Thomas Monjalon
2016-04-19 11:11 [dpdk-dev] perfomance of rte_lpm rule subsystem Александр Киселев
2016-04-19 15:46 3% ` Stephen Hemminger
2016-04-19 20:46 0% ` Vladimir Medvedkin
2016-05-02 19:38 0% ` Александр Киселев
2016-04-19 14:03 5% [dpdk-dev] [PATCH] ethdev: remove deprecated statistics Thomas Monjalon
2016-04-20 9:47 4% ` [dpdk-dev] [PATCH v2] " Thomas Monjalon
2016-04-25 9:18 2% [dpdk-dev] [RFC] eal: provide option to set vhost_user socket owner/permissions Christian Ehrhardt
2016-04-26 4:16 0% ` Yuanhan Liu
2016-04-26 7:24 0% ` Christian Ehrhardt
2016-04-26 4:45 3% [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix Yuanhan Liu
2016-04-26 4:45 2% ` [dpdk-dev] [PATCH 7/7] examples/vhost: switch_worker cleanup Yuanhan Liu
2016-04-28 5:45 0% ` [dpdk-dev] [PATCH 0/7] vhost/example cleanup/fix Wang, Zhihong
2016-04-28 6:09 0% ` Yuanhan Liu
[not found] ` <1462224230-19460-1-git-send-email-yuanhan.liu@linux.intel.com>
2016-05-02 21:23 2% ` [dpdk-dev] [PATCH v2 7/8] examples/vhost: switch_worker cleanup Yuanhan Liu
2016-05-09 18:06 0% ` [dpdk-dev] [PATCH v2 0/8] vhost/example cleanup/fix Yuanhan Liu
2016-05-02 22:25 9% [dpdk-dev] [PATCH 00/16] vhost ABI/API refactoring Yuanhan Liu
2016-05-02 22:25 2% ` [dpdk-dev] [PATCH 10/16] vhost: export vid as the only interface to applications Yuanhan Liu
2016-05-10 16:17 0% ` Rich Lane
2016-05-10 16:39 0% ` Yuanhan Liu
2016-05-02 22:25 4% ` [dpdk-dev] [PATCH 14/16] vhost: reserve few more space for future extension Yuanhan Liu
2016-05-13 5:24 8% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Yuanhan Liu
2016-05-13 5:25 7% ` [dpdk-dev] [PATCH v2 11/19] vhost: introduce new API to export queue free entries Yuanhan Liu
2016-05-13 5:25 3% ` [dpdk-dev] [PATCH v2 12/19] vhost: remove dependency on priv field Yuanhan Liu
2016-05-13 5:25 12% ` [dpdk-dev] [PATCH v2 13/19] vhost: export vid as the only interface to applications Yuanhan Liu
2016-05-13 5:25 4% ` [dpdk-dev] [PATCH v2 17/19] vhost: reserve few more space for future extension Yuanhan Liu
2016-05-26 17:04 4% ` [dpdk-dev] [PATCH v2 00/19] vhost ABI/API refactoring Rich Lane
2016-05-27 1:36 4% ` Yuanhan Liu
2016-06-07 3:51 9% ` [dpdk-dev] [PATCH v3 00/20] " Yuanhan Liu
2016-06-07 3:52 7% ` [dpdk-dev] [PATCH v3 11/20] vhost: introduce new API to export queue free entries Yuanhan Liu
2016-06-07 3:52 3% ` [dpdk-dev] [PATCH v3 12/20] vhost: remove dependency on priv field Yuanhan Liu
2016-06-07 3:52 13% ` [dpdk-dev] [PATCH v3 13/20] vhost: export vid as the only interface to applications Yuanhan Liu
2016-06-07 3:52 4% ` [dpdk-dev] [PATCH v3 17/20] vhost: reserve few more space for future extension Yuanhan Liu
2016-06-07 3:52 6% ` [dpdk-dev] [PATCH v3 18/20] examples/tep_term: adapt to new vhost ABI/API changes Yuanhan Liu
2016-05-03 0:46 3% [dpdk-dev] [PATCH 0/3] [RFC] vhost: micro vhost optimization Yuanhan Liu
2016-05-10 21:49 0% ` Rich Lane
2016-05-10 22:08 0% ` Yuanhan Liu
2016-05-03 5:51 [dpdk-dev] [PATCH 0/2] NSH packet type support in i40e Jingjing Wu
2016-05-03 5:51 ` [dpdk-dev] [PATCH 1/2] mbuf: new NSH packet type Jingjing Wu
2016-05-19 12:26 4% ` Olivier Matz
2016-05-06 10:55 [dpdk-dev] [PATCH 0/5] add packet capture framework Reshma Pattan
2016-05-06 10:55 6% ` [dpdk-dev] [PATCH 5/5] doc: update doc for " Reshma Pattan
2016-05-10 9:39 ` [dpdk-dev] [PATCHv2 0/5] add " Reshma Pattan
2016-05-10 9:40 6% ` [dpdk-dev] [PATCHv2 5/5] doc: update doc for " Reshma Pattan
2016-05-17 16:37 ` [dpdk-dev] [PATCH v3 0/8] add " Reshma Pattan
2016-05-17 16:37 6% ` [dpdk-dev] [PATCH v3 8/8] doc: update doc for " Reshma Pattan
2016-05-23 21:38 3% ` [dpdk-dev] [PATCH v4 0/9] add " Reshma Pattan
2016-05-23 21:38 ` [dpdk-dev] [PATCH v4 3/9] librte_ether: add new fields to rte_eth_dev_info struct Reshma Pattan
2016-05-23 22:24 3% ` Stephen Hemminger
2016-05-24 8:09 3% ` Pattan, Reshma
2016-05-23 21:38 6% ` [dpdk-dev] [PATCH v4 8/9] doc: update doc for packet capture framework Reshma Pattan
2016-05-23 21:38 9% ` [dpdk-dev] [PATCH v4 9/9] doc: announce ABI change for rte_eth_dev_info structure Reshma Pattan
2016-05-06 20:05 [dpdk-dev] [PATCH v1] hash: add tsx support for cuckoo hash Shen Wei
2016-05-07 4:56 3% ` Stephen Hemminger
2016-05-09 16:51 4% ` Shen, Wei1
2016-05-07 6:40 3% [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
2016-05-10 3:23 3% ` Xu, Qian Q
2016-05-10 17:41 0% ` Yuanhan Liu
2016-05-13 6:16 3% ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
2016-06-07 4:05 3% ` [dpdk-dev] [PATCH v3 " Yuanhan Liu
2016-05-10 9:13 [dpdk-dev] Ring PMD: why are stats counters atomic? Mauricio Vásquez
2016-05-10 9:36 ` Bruce Richardson
2016-05-16 13:12 ` Mauricio Vásquez
2016-05-16 13:16 3% ` Bruce Richardson
2016-05-10 10:11 [dpdk-dev] [PATCH] sched: fix useless call Daniel Mrzyglod
2016-05-10 17:18 ` Dumitrescu, Cristian
2016-05-11 9:46 ` Ferruh Yigit
2016-05-13 10:12 ` Thomas Monjalon
2016-05-13 11:04 4% ` Dumitrescu, Cristian
2016-05-10 16:24 [dpdk-dev] [RFC] mbuf: new flag when vlan is stripped Olivier Matz
2016-05-23 8:46 2% ` [dpdk-dev] [PATCH] mbuf: new flag when Vlan " Olivier Matz
2016-05-23 8:59 0% ` Ananyev, Konstantin
2016-05-23 9:20 0% ` Ananyev, Konstantin
2016-05-27 14:33 2% ` [dpdk-dev] [PATCH v2] " Olivier Matz
2016-05-11 6:08 [dpdk-dev] [PATCH] pci: Add the class_id support in pci probe Ziye Yang
2016-05-11 15:21 3% ` Stephen Hemminger
2016-05-11 15:34 3% ` Richardson, Bruce
2016-05-19 12:25 7% ` [dpdk-dev] [PATCH v2] ci: " Ziye Yang
2016-05-19 13:17 7% ` [dpdk-dev] [PATCH v3] " Ziye Yang
2016-05-24 12:50 7% ` [dpdk-dev] [PATCH v4] Pci: Add the class_id support Ziye Yang
2016-05-11 10:48 [dpdk-dev] [PATCH] examples/ethtool: include case for 64-bit registers zr
2016-05-25 6:36 ` [dpdk-dev] [PATCH 1/2] ethdev: add callback to get register size in bytes zr
2016-05-27 10:28 4% ` Panu Matilainen
2016-05-27 14:43 3% ` Thomas Monjalon
2016-05-30 9:32 0% ` Zyta Szpak
2016-05-13 8:15 [dpdk-dev] [PATCH v3] i40e: configure MTU Beilei Xing
2016-05-20 15:17 4% ` [dpdk-dev] [PATCH v4] " Beilei Xing
2016-05-13 12:50 [dpdk-dev] [PATCH 0/7] virtio-net support on ppc64 Olivier Matz
2016-05-17 9:59 ` [dpdk-dev] [PATCH v2 " Olivier Matz
2016-05-17 9:59 ` [dpdk-dev] [PATCH v2 5/7] eal/linux: mmap ioports " Olivier Matz
2016-05-17 15:54 ` David Marchand
2016-05-23 13:07 3% ` Yuanhan Liu
2016-05-23 13:40 3% ` Olivier Matz
2016-05-24 5:15 3% ` Yuanhan Liu
2016-05-30 8:45 0% ` Olivier Matz
2016-05-13 13:27 16% [dpdk-dev] [PATCH] doc: move rel_notes instructions as comments Olivier Matz
2016-05-16 13:18 9% [dpdk-dev] [PATCH 0/2] doc: announce ABI change of struct rte_port_source_params Fan Zhang
2016-05-16 13:18 18% ` [dpdk-dev] [PATCH 1/2] " Fan Zhang
2016-05-16 13:18 18% ` [dpdk-dev] [PATCH 2/2] doc: announce ABI change of struct rte_port_sink_params Fan Zhang
2016-05-16 13:57 9% ` Panu Matilainen
2016-05-19 14:18 20% ` [dpdk-dev] [PATCH v2] doc: announce ABI change of struct rte_port_source_params and rte_port_sink_params Fan Zhang
2016-05-16 20:41 [dpdk-dev] [PATCH 0/4] Implement pmd hardware support exports Neil Horman
2016-05-16 20:41 2% ` [dpdk-dev] [PATCH 1/4] pmdinfo: Add buildtools and pmdinfo utility Neil Horman
2016-05-18 13:57 [dpdk-dev] [PATCH] mbuf: make rearm_data address naturally aligned Jerin Jacob
2016-05-18 16:43 ` Bruce Richardson
2016-05-18 18:50 ` Jerin Jacob
2016-05-19 8:50 3% ` Bruce Richardson
2016-05-19 11:54 0% ` Jan Viktorin
2016-05-19 12:18 0% ` Ananyev, Konstantin
2016-05-19 13:35 0% ` Jerin Jacob
2016-05-20 13:51 13% [dpdk-dev] [PATCH v1] doc: fix code section in abi versioning doc John McNamara
2016-05-20 14:08 13% [dpdk-dev] [PATCH v2] " John McNamara
2016-05-26 7:28 4% [dpdk-dev] [PATCH] ethdev: change comments of VLAN type Beilei Xing
2016-05-30 15:26 [dpdk-dev] about rx checksum flags Olivier Matz
2016-05-31 8:09 ` Yuanhan Liu
2016-05-31 19:11 ` Olivier MATZ
2016-05-31 20:28 3% ` Stephen Hemminger
2016-05-31 20:58 0% ` Olivier MATZ
2016-05-31 22:02 0% ` Stephen Hemminger
2016-06-01 9:06 0% ` Ananyev, Konstantin
2016-06-02 7:42 0% ` Chandran, Sugesh
2016-06-01 15:00 [dpdk-dev] [RFC] Yet another option for DPDK options Wiles, Keith
2016-06-02 10:41 ` Neil Horman
2016-06-02 13:19 ` Thomas Monjalon
2016-06-02 13:53 ` Wiles, Keith
2016-06-02 17:11 ` Neil Horman
2016-06-02 19:41 3% ` Wiles, Keith
2016-06-02 20:08 3% ` Neil Horman
2016-06-02 20:53 ` Matthew Hall
2016-06-02 22:34 3% ` Neil Horman
2016-06-03 10:29 0% ` Bruce Richardson
2016-06-03 11:01 0% ` Bruce Richardson
2016-06-03 11:50 0% ` Neil Horman
2016-06-03 12:01 0% ` Arnon Warshavsky
2016-06-03 12:53 0% ` Panu Matilainen
2016-06-03 14:31 0% ` Arnon Warshavsky
2016-06-03 16:04 ` Wiles, Keith
2016-06-03 17:44 3% ` Neil Horman
2016-06-03 18:29 3% ` Wiles, Keith
2016-06-03 18:38 0% ` Neil Horman
2016-06-03 18:52 0% ` Arnon Warshavsky
2016-06-03 19:00 0% ` Wiles, Keith
2016-06-03 19:07 0% ` Wiles, Keith
2016-06-03 19:18 0% ` Neil Horman
2016-06-03 19:23 0% ` Wiles, Keith
2016-06-03 19:28 0% ` Arnon Warshavsky
2016-06-03 12:14 0% ` Panu Matilainen
2016-06-03 15:07 4% [dpdk-dev] RFC: DPDK Long Term Support Mcnamara, John
2016-06-03 16:05 0% ` Thomas Monjalon
2016-06-06 11:49 0% ` Yuanhan Liu
2016-06-03 18:17 3% ` Matthew Hall
2016-06-05 18:15 5% ` Neil Horman
2016-06-06 9:27 5% ` Thomas Monjalon
2016-06-06 13:47 5% ` Neil Horman
2016-06-06 14:21 4% ` Thomas Monjalon
2016-06-06 15:07 5% ` Neil Horman
2016-06-06 5:40 [dpdk-dev] [PATCH 0/8] support reset of VF link Wenzhuo Lu
2016-06-06 5:40 4% ` [dpdk-dev] [PATCH 2/8] lib/librte_ether: defind RX/TX lock mode Wenzhuo Lu
2016-06-07 5:45 [dpdk-dev] [PATCH v2 0/8] support reset of VF link Zhe Tao
2016-06-07 5:45 4% ` [dpdk-dev] [PATCH v2 2/8] lib/librte_ether: defind RX/TX lock mode Zhe Tao
2016-06-07 6:12 [dpdk-dev] [PATCH v3 0/8] support reset of VF link Zhe Tao
2016-06-07 6:12 4% ` [dpdk-dev] [PATCH v3 2/8] lib/librte_ether: defind RX/TX lock mode Zhe Tao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).