From: Stephen Hemminger <stephen@networkplumber.org>
To: declan.doherty@intel.com
Cc: dev@dpdk.org, Eric Kinzie <ekinzie@brocade.com>
Subject: [dpdk-dev] [PATCH 7/8] bond: per-slave intermediate rx ring
Date: Fri, 4 Dec 2015 09:14:19 -0800 [thread overview]
Message-ID: <1449249260-15165-8-git-send-email-stephen@networkplumber.org> (raw)
In-Reply-To: <1449249260-15165-1-git-send-email-stephen@networkplumber.org>
From: Eric Kinzie <ekinzie@brocade.com>
Need to handle the case when bonding two (or more) ixgbe devices
together. The existing code would break because the ixgbe devices
assumed that if setup in vector mode that the burst size would always
be large.
To solve this, during bond 802.3ad receive, a burst of packets is
fetched from each slave into a local array and appended to per-slave
ring buffer. Packets are taken from the head of the ring buffer and
returned to the caller. The number of mbufs provided to each slave is
sufficient to meet the requirements of the ixgbe vector receive.
Signed-off-by: Eric Kinzie <ekinzie@brocade.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
drivers/net/bonding/rte_eth_bond_api.c | 29 ++++++++++++
drivers/net/bonding/rte_eth_bond_pmd.c | 71 ++++++++++++++++++++++--------
drivers/net/bonding/rte_eth_bond_private.h | 4 ++
3 files changed, 86 insertions(+), 18 deletions(-)
diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c
index 64058ff..91b3819 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -37,6 +37,8 @@
#include <rte_malloc.h>
#include <rte_ethdev.h>
#include <rte_tcp.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
#include "rte_eth_bond.h"
#include "rte_eth_bond_private.h"
@@ -170,6 +172,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
{
struct bond_dev_private *internals = NULL;
struct rte_eth_dev *eth_dev = NULL;
+ char mem_name[RTE_ETH_NAME_MAX_LEN];
/* now do all data allocation - for eth_dev structure, dummy pci driver
* and internal (private) data
@@ -254,6 +257,18 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
memset(internals->slaves, 0, sizeof(internals->slaves));
+ snprintf(mem_name, RTE_DIM(mem_name), "%s_rx", name);
+ internals->rx_ring = rte_ring_lookup(mem_name);
+ if (internals->rx_ring == NULL) {
+ internals->rx_ring = rte_ring_create(mem_name,
+ rte_align32pow2(PMD_BOND_RECV_RING_PKTS *
+ rte_lcore_count()),
+ socket_id, 0);
+ if (internals->rx_ring == NULL)
+ rte_panic("%s: Failed to create rx ring '%s': %s\n", name,
+ mem_name, rte_strerror(rte_errno));
+ }
+
/* Set mode 4 default configuration */
bond_mode_8023ad_setup(eth_dev, NULL);
if (bond_ethdev_mode_set(eth_dev, mode)) {
@@ -532,12 +547,26 @@ __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 0,
sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs)));
}
+
if (internals->slave_count == 0) {
+ /* Remove any remaining packets in the receive ring */
+ struct rte_mbuf *bufs[PMD_BOND_RECV_PKTS_PER_SLAVE];
+ unsigned j, count;
+
internals->rx_offload_capa = 0;
internals->tx_offload_capa = 0;
internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
internals->reta_size = 0;
+
+ do {
+ count = rte_ring_dequeue_burst(internals->rx_ring,
+ (void **)bufs,
+ PMD_BOND_RECV_PKTS_PER_SLAVE);
+ for (j = 0; j < count; j++)
+ rte_pktmbuf_free(bufs[j]);
+ } while (count > 0);
}
+
return 0;
}
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 868e66b..043160e 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -180,10 +180,15 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
struct bond_dev_private *internals = bd_rx_q->dev_private;
struct ether_addr bond_mac;
+ unsigned rx_ring_avail = rte_ring_free_count(internals->rx_ring);
+ struct rte_mbuf *mbuf_bounce[PMD_BOND_RECV_PKTS_PER_SLAVE];
+
struct ether_hdr *hdr;
const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
uint16_t num_rx_total = 0; /* Total number of received packets */
+ uint16_t num_rx_slave;
+ uint16_t num_enq_slave;
uint8_t slaves[RTE_MAX_ETHPORTS];
uint8_t slave_count;
@@ -191,6 +196,9 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
const uint8_t promisc = internals->promiscuous_en;
uint8_t i, j, k;
+ if (rx_ring_avail < PMD_BOND_RECV_PKTS_PER_SLAVE)
+ goto dequeue;
+
rte_eth_macaddr_get(internals->port_id, &bond_mac);
/* Copy slave list to protect against slave up/down changes during tx
* bursting */
@@ -198,23 +206,27 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
slave_count = bond_active_slaves_by_rxqid(internals, bd_rx_q->queue_id,
slaves);
- for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
- j = num_rx_total;
+ for (i = 0; i < slave_count && num_rx_total < rx_ring_avail; i++) {
+ j = 0;
collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
/* Read packets from this slave */
- num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
- &bufs[num_rx_total], nb_pkts - num_rx_total);
-
- for (k = j; k < 2 && k < num_rx_total; k++)
- rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
+ if (unlikely(rx_ring_avail - num_rx_total < PMD_BOND_RECV_PKTS_PER_SLAVE))
+ continue;
+ num_rx_slave = rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
+ mbuf_bounce,
+ PMD_BOND_RECV_PKTS_PER_SLAVE);
+ for (k = j; k < 2 && k < num_rx_slave; k++)
+ rte_prefetch0(rte_pktmbuf_mtod(mbuf_bounce[k], void *));
/* Handle slow protocol packets. */
- while (j < num_rx_total) {
- if (j + 3 < num_rx_total)
- rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
+ while (j < num_rx_slave) {
+ if (j + 3 < num_rx_slave)
+ rte_prefetch0(rte_pktmbuf_mtod(
+ mbuf_bounce[j + 3],
+ void *));
- hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
+ hdr = rte_pktmbuf_mtod(mbuf_bounce[j], struct ether_hdr *);
/* Remove packet from array if it is slow packet or slave is not
* in collecting state or bondign interface is not in promiscus
* mode and packet address does not match. */
@@ -225,22 +237,45 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
if (hdr->ether_type == ether_type_slow_be) {
bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
- bufs[j]);
+ mbuf_bounce[j]);
} else
- rte_pktmbuf_free(bufs[j]);
+ rte_pktmbuf_free(mbuf_bounce[j]);
/* Packet is managed by mode 4 or dropped, shift the array */
- num_rx_total--;
- if (j < num_rx_total) {
- memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
- (num_rx_total - j));
+ num_rx_slave--;
+ if (j < num_rx_slave) {
+ memmove(&mbuf_bounce[j],
+ &mbuf_bounce[j + 1],
+ sizeof(mbuf_bounce[0]) *
+ (num_rx_slave - j));
}
} else
j++;
}
+
+ if (num_rx_slave > 0) {
+ if (mbuf_bounce[0] == NULL)
+ RTE_LOG(ERR, PMD, "%s: Enqueue a NULL??\n",
+ __func__);
+
+ num_enq_slave = rte_ring_enqueue_burst(internals->rx_ring,
+ (void **)mbuf_bounce,
+ num_rx_slave);
+
+ if (num_enq_slave < num_rx_slave) {
+ RTE_LOG(ERR, PMD,
+ "%s: failed to enqueue %u packets",
+ __func__,
+ (num_rx_slave - num_enq_slave));
+ for (j = num_enq_slave; j < num_rx_slave; j++)
+ rte_pktmbuf_free(mbuf_bounce[j]);
+ }
+ num_rx_total += num_enq_slave;
+ }
}
- return num_rx_total;
+dequeue:
+ return rte_ring_dequeue_burst(internals->rx_ring, (void **)bufs, nb_pkts);
}
#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
diff --git a/drivers/net/bonding/rte_eth_bond_private.h b/drivers/net/bonding/rte_eth_bond_private.h
index 02f6de1..2c43bc3 100644
--- a/drivers/net/bonding/rte_eth_bond_private.h
+++ b/drivers/net/bonding/rte_eth_bond_private.h
@@ -50,6 +50,8 @@
#define PMD_BOND_LSC_POLL_PERIOD_KVARG ("lsc_poll_period_ms")
#define PMD_BOND_LINK_UP_PROP_DELAY_KVARG ("up_delay")
#define PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG ("down_delay")
+#define PMD_BOND_RECV_RING_PKTS 512
+#define PMD_BOND_RECV_PKTS_PER_SLAVE 32
#define PMD_BOND_XMIT_POLICY_LAYER2_KVARG ("l2")
#define PMD_BOND_XMIT_POLICY_LAYER23_KVARG ("l23")
@@ -171,6 +173,8 @@ struct bond_dev_private {
struct rte_kvargs *kvlist;
uint8_t slave_update_idx;
+
+ struct rte_ring *rx_ring;
};
extern struct eth_dev_ops default_dev_ops;
--
2.1.4
next prev parent reply other threads:[~2015-12-04 17:14 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-12-04 17:14 [dpdk-dev] [PATCH 0/8] bonding: fixes and enhancements Stephen Hemminger
2015-12-04 17:14 ` [dpdk-dev] [PATCH 1/8] bond: use existing enslaved device queues Stephen Hemminger
2016-01-05 13:32 ` Declan Doherty
2015-12-04 17:14 ` [dpdk-dev] [PATCH 2/8] bond mode 4: copy entire config structure Stephen Hemminger
2016-01-05 13:32 ` Declan Doherty
2015-12-04 17:14 ` [dpdk-dev] [PATCH 3/8] bond mode 4: do not ignore multicast Stephen Hemminger
2016-01-05 13:32 ` Declan Doherty
2015-12-04 17:14 ` [dpdk-dev] [PATCH 4/8] bond mode 4: allow external state machine Stephen Hemminger
2016-01-05 13:33 ` Declan Doherty
2015-12-04 17:14 ` [dpdk-dev] [PATCH 5/8] bond: active slaves with no primary Stephen Hemminger
2016-01-05 13:34 ` Declan Doherty
2015-12-04 17:14 ` [dpdk-dev] [PATCH 6/8] bond: handle slaves with fewer queues than bonding device Stephen Hemminger
2015-12-04 18:36 ` Andriy Berestovskyy
2015-12-04 19:18 ` Eric Kinzie
2016-01-05 13:46 ` Declan Doherty
2016-01-05 15:31 ` Stephen Hemminger
2016-02-03 11:28 ` Bruce Richardson
2016-02-03 15:17 ` Declan Doherty
2016-02-03 15:21 ` Thomas Monjalon
2016-02-18 10:26 ` Iremonger, Bernard
2016-02-19 19:17 ` [dpdk-dev] [PATCH v2 0/6] bonding: fixes and enhancements Eric Kinzie
2016-02-19 19:17 ` [dpdk-dev] [PATCH v2 1/6] bond: use existing enslaved device queues Eric Kinzie
2016-02-19 19:17 ` [dpdk-dev] [PATCH v2 2/6] bond mode 4: copy entire config structure Eric Kinzie
2016-02-19 19:17 ` [dpdk-dev] [PATCH v2 3/6] bond mode 4: do not ignore multicast Eric Kinzie
2016-02-19 19:17 ` [dpdk-dev] [PATCH v2 4/6] bond mode 4: allow external state machine Eric Kinzie
2016-02-22 13:03 ` Panu Matilainen
2016-02-25 15:22 ` Iremonger, Bernard
2016-03-01 17:31 ` [dpdk-dev] [PATCH V3 0/4] bonding: fixes and enhancements Eric Kinzie
2016-03-01 17:31 ` [dpdk-dev] [PATCH V3 1/4] bond mode 4: copy entire config structure Eric Kinzie
2016-03-01 17:32 ` [dpdk-dev] [PATCH V3 2/4] bond mode 4: do not ignore multicast Eric Kinzie
2016-03-01 17:32 ` [dpdk-dev] [PATCH V3 3/4] bond: active slaves with no primary Eric Kinzie
2016-03-01 17:32 ` [dpdk-dev] [PATCH V3 4/4] bond: do not activate slave twice Eric Kinzie
2016-03-10 15:41 ` [dpdk-dev] [PATCH V3 0/4] bonding: fixes and enhancements Bruce Richardson
2016-03-01 17:40 ` [dpdk-dev] [PATCH v2 4/6] bond mode 4: allow external state machine Eric Kinzie
2016-03-02 9:49 ` Iremonger, Bernard
2016-02-19 19:17 ` [dpdk-dev] [PATCH v2 5/6] bond: active slaves with no primary Eric Kinzie
2016-02-19 19:17 ` [dpdk-dev] [PATCH v2 6/6] bond: do not activate slave twice Eric Kinzie
2015-12-04 17:14 ` Stephen Hemminger [this message]
2015-12-04 17:14 ` [dpdk-dev] [PATCH 8/8] " Stephen Hemminger
2016-01-05 13:47 ` Declan Doherty
2015-12-23 10:51 ` [dpdk-dev] [PATCH 0/8] bonding: fixes and enhancements Iremonger, Bernard
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1449249260-15165-8-git-send-email-stephen@networkplumber.org \
--to=stephen@networkplumber.org \
--cc=declan.doherty@intel.com \
--cc=dev@dpdk.org \
--cc=ekinzie@brocade.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).