From: Matan Azrad <matan@mellanox.com>
To: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Cc: dev@dpdk.org, Ophir Munk <ophirmu@mellanox.com>
Subject: [dpdk-dev] [PATCH v5 6/8] net/mlx4: separate Tx segment cases
Date: Thu, 2 Nov 2017 16:42:49 +0000 [thread overview]
Message-ID: <1509640971-8637-7-git-send-email-matan@mellanox.com> (raw)
In-Reply-To: <1509640971-8637-1-git-send-email-matan@mellanox.com>
Optimize single segment case by processing it in different block which
prevents checks, calculations and barriers relevant only for multi
segment case.
Call a dedicated function for handling multi segments case.
Signed-off-by: Matan Azrad <matan@mellanox.com>
Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
drivers/net/mlx4/mlx4_rxtx.c | 237 +++++++++++++++++++++++++++----------------
1 file changed, 152 insertions(+), 85 deletions(-)
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index abd125f..d13c8d2 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -61,6 +61,9 @@
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
+#define WQE_ONE_DATA_SEG_SIZE \
+ (sizeof(struct mlx4_wqe_ctrl_seg) + sizeof(struct mlx4_wqe_data_seg))
+
/**
* Pointer-value pair structure used in tx_post_send for saving the first
* DWORD (32 byte) of a TXBB.
@@ -140,21 +143,18 @@ struct pv {
* 0 on success, -1 on failure.
*/
static int
-mlx4_txq_complete(struct txq *txq)
+mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
+ struct mlx4_sq *sq)
{
unsigned int elts_comp = txq->elts_comp;
unsigned int elts_tail = txq->elts_tail;
- const unsigned int elts_n = txq->elts_n;
struct mlx4_cq *cq = &txq->mcq;
- struct mlx4_sq *sq = &txq->msq;
struct mlx4_cqe *cqe;
uint32_t cons_index = cq->cons_index;
uint16_t new_index;
uint16_t nr_txbbs = 0;
int pkts = 0;
- if (unlikely(elts_comp == 0))
- return 0;
/*
* Traverse over all CQ entries reported and handle each WQ entry
* reported by them.
@@ -237,6 +237,120 @@ struct pv {
return buf->pool;
}
+static int
+mlx4_tx_burst_segs(struct rte_mbuf *buf, struct txq *txq,
+ struct mlx4_wqe_ctrl_seg **pctrl)
+{
+ int wqe_real_size;
+ int nr_txbbs;
+ struct pv *pv = (struct pv *)txq->bounce_buf;
+ struct mlx4_sq *sq = &txq->msq;
+ uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
+ struct mlx4_wqe_ctrl_seg *ctrl;
+ struct mlx4_wqe_data_seg *dseg;
+ struct rte_mbuf *sbuf;
+ uint32_t lkey;
+ uintptr_t addr;
+ uint32_t byte_count;
+ int pv_counter = 0;
+
+ /* Calculate the needed work queue entry size for this packet. */
+ wqe_real_size = sizeof(struct mlx4_wqe_ctrl_seg) +
+ buf->nb_segs * sizeof(struct mlx4_wqe_data_seg);
+ nr_txbbs = MLX4_SIZE_TO_TXBBS(wqe_real_size);
+ /*
+ * Check that there is room for this WQE in the send queue and that
+ * the WQE size is legal.
+ */
+ if (((sq->head - sq->tail) + nr_txbbs +
+ sq->headroom_txbbs) >= sq->txbb_cnt ||
+ nr_txbbs > MLX4_MAX_WQE_TXBBS) {
+ return -1;
+ }
+ /* Get the control and data entries of the WQE. */
+ ctrl = (struct mlx4_wqe_ctrl_seg *)mlx4_get_send_wqe(sq, head_idx);
+ dseg = (struct mlx4_wqe_data_seg *)((uintptr_t)ctrl +
+ sizeof(struct mlx4_wqe_ctrl_seg));
+ *pctrl = ctrl;
+ /* Fill the data segments with buffer information. */
+ for (sbuf = buf; sbuf != NULL; sbuf = sbuf->next, dseg++) {
+ addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+ rte_prefetch0((volatile void *)addr);
+ /* Handle WQE wraparound. */
+ if (dseg >= (struct mlx4_wqe_data_seg *)sq->eob)
+ dseg = (struct mlx4_wqe_data_seg *)sq->buf;
+ dseg->addr = rte_cpu_to_be_64(addr);
+ /* Memory region key (big endian) for this memory pool. */
+ lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf));
+ dseg->lkey = rte_cpu_to_be_32(lkey);
+#ifndef NDEBUG
+ /* Calculate the needed work queue entry size for this packet */
+ if (unlikely(dseg->lkey == rte_cpu_to_be_32((uint32_t)-1))) {
+ /* MR does not exist. */
+ DEBUG("%p: unable to get MP <-> MR association",
+ (void *)txq);
+ /*
+ * Restamp entry in case of failure.
+ * Make sure that size is written correctly
+ * Note that we give ownership to the SW, not the HW.
+ */
+ wqe_real_size = sizeof(struct mlx4_wqe_ctrl_seg) +
+ buf->nb_segs * sizeof(struct mlx4_wqe_data_seg);
+ ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
+ mlx4_txq_stamp_freed_wqe(sq, head_idx,
+ (sq->head & sq->txbb_cnt) ? 0 : 1);
+ return -1;
+ }
+#endif /* NDEBUG */
+ if (likely(sbuf->data_len)) {
+ byte_count = rte_cpu_to_be_32(sbuf->data_len);
+ } else {
+ /*
+ * Zero length segment is treated as inline segment
+ * with zero data.
+ */
+ byte_count = RTE_BE32(0x80000000);
+ }
+ /*
+ * If the data segment is not at the beginning of a
+ * Tx basic block (TXBB) then write the byte count,
+ * else postpone the writing to just before updating the
+ * control segment.
+ */
+ if ((uintptr_t)dseg & (uintptr_t)(MLX4_TXBB_SIZE - 1)) {
+ /*
+ * Need a barrier here before writing the byte_count
+ * fields to make sure that all the data is visible
+ * before the byte_count field is set.
+ * Otherwise, if the segment begins a new cacheline,
+ * the HCA prefetcher could grab the 64-byte chunk and
+ * get a valid (!= 0xffffffff) byte count but stale
+ * data, and end up sending the wrong data.
+ */
+ rte_io_wmb();
+ dseg->byte_count = byte_count;
+ } else {
+ /*
+ * This data segment starts at the beginning of a new
+ * TXBB, so we need to postpone its byte_count writing
+ * for later.
+ */
+ pv[pv_counter].dseg = dseg;
+ pv[pv_counter++].val = byte_count;
+ }
+ }
+ /* Write the first DWORD of each TXBB save earlier. */
+ if (pv_counter) {
+ /* Need a barrier here before writing the byte_count. */
+ rte_io_wmb();
+ for (--pv_counter; pv_counter >= 0; pv_counter--)
+ pv[pv_counter].dseg->byte_count = pv[pv_counter].val;
+ }
+ /* Fill the control parameters for this packet. */
+ ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
+ return nr_txbbs;
+}
+
/**
* DPDK callback for Tx.
*
@@ -260,10 +374,11 @@ struct pv {
unsigned int i;
unsigned int max;
struct mlx4_sq *sq = &txq->msq;
- struct pv *pv = (struct pv *)txq->bounce_buf;
+ int nr_txbbs;
assert(txq->elts_comp_cd != 0);
- mlx4_txq_complete(txq);
+ if (likely(txq->elts_comp != 0))
+ mlx4_txq_complete(txq, elts_n, sq);
max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
@@ -282,7 +397,6 @@ struct pv {
uint32_t owner_opcode = MLX4_OPCODE_SEND;
struct mlx4_wqe_ctrl_seg *ctrl;
struct mlx4_wqe_data_seg *dseg;
- struct rte_mbuf *sbuf;
union {
uint32_t flags;
uint16_t flags16[2];
@@ -290,10 +404,6 @@ struct pv {
uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
uint32_t lkey;
uintptr_t addr;
- uint32_t byte_count;
- int wqe_real_size;
- int nr_txbbs;
- int pv_counter = 0;
/* Clean up old buffer. */
if (likely(elt->buf != NULL)) {
@@ -312,38 +422,29 @@ struct pv {
} while (tmp != NULL);
}
RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
- /*
- * Calculate the needed work queue entry size
- * for this packet.
- */
- wqe_real_size = sizeof(struct mlx4_wqe_ctrl_seg) +
- buf->nb_segs * sizeof(struct mlx4_wqe_data_seg);
- nr_txbbs = MLX4_SIZE_TO_TXBBS(wqe_real_size);
- /*
- * Check that there is room for this WQE in the send
- * queue and that the WQE size is legal.
- */
- if (((sq->head - sq->tail) + nr_txbbs +
- sq->headroom_txbbs) >= sq->txbb_cnt ||
- nr_txbbs > MLX4_MAX_WQE_TXBBS) {
- elt->buf = NULL;
- break;
- }
- /* Get the control and data entries of the WQE. */
- ctrl = (struct mlx4_wqe_ctrl_seg *)
- mlx4_get_send_wqe(sq, head_idx);
- dseg = (struct mlx4_wqe_data_seg *)((uintptr_t)ctrl +
- sizeof(struct mlx4_wqe_ctrl_seg));
- /* Fill the data segments with buffer information. */
- for (sbuf = buf; sbuf != NULL; sbuf = sbuf->next, dseg++) {
- addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+ if (buf->nb_segs == 1) {
+ /*
+ * Check that there is room for this WQE in the send
+ * queue and that the WQE size is legal
+ */
+ if (((sq->head - sq->tail) + 1 + sq->headroom_txbbs) >=
+ sq->txbb_cnt || 1 > MLX4_MAX_WQE_TXBBS) {
+ elt->buf = NULL;
+ break;
+ }
+ /* Get the control and data entries of the WQE. */
+ ctrl = (struct mlx4_wqe_ctrl_seg *)
+ mlx4_get_send_wqe(sq, head_idx);
+ dseg = (struct mlx4_wqe_data_seg *)((uintptr_t)ctrl +
+ sizeof(struct mlx4_wqe_ctrl_seg));
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
rte_prefetch0((volatile void *)addr);
/* Handle WQE wraparound. */
if (dseg >= (struct mlx4_wqe_data_seg *)sq->eob)
dseg = (struct mlx4_wqe_data_seg *)sq->buf;
dseg->addr = rte_cpu_to_be_64(addr);
/* Memory region key (big endian). */
- lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf));
+ lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(buf));
dseg->lkey = rte_cpu_to_be_32(lkey);
#ifndef NDEBUG
if (unlikely(dseg->lkey ==
@@ -357,61 +458,27 @@ struct pv {
* Note that we give ownership to the SW,
* not the HW.
*/
- ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
+ ctrl->fence_size =
+ (WQE_ONE_DATA_SEG_SIZE >> 4) & 0x3f;
mlx4_txq_stamp_freed_wqe(sq, head_idx,
(sq->head & sq->txbb_cnt) ? 0 : 1);
elt->buf = NULL;
break;
}
#endif /* NDEBUG */
- if (likely(sbuf->data_len)) {
- byte_count = rte_cpu_to_be_32(sbuf->data_len);
- } else {
- /*
- * Zero length segment is treated as inline
- * segment with zero data.
- */
- byte_count = RTE_BE32(0x80000000);
- }
- /*
- * If the data segment is not at the beginning
- * of a Tx basic block (TXBB) then write the
- * byte count, else postpone the writing to
- * just before updating the control segment.
- */
- if ((uintptr_t)dseg & (uintptr_t)(MLX4_TXBB_SIZE - 1)) {
- /*
- * Need a barrier here before writing the
- * byte_count fields to make sure that all the
- * data is visible before the byte_count field
- * is set. otherwise, if the segment begins a
- * new cacheline, the HCA prefetcher could grab
- * the 64-byte chunk and get a valid
- * (!= 0xffffffff) byte count but stale data,
- * and end up sending the wrong data.
- */
- rte_io_wmb();
- dseg->byte_count = byte_count;
- } else {
- /*
- * This data segment starts at the beginning of
- * a new TXBB, so we need to postpone its
- * byte_count writing for later.
- */
- pv[pv_counter].dseg = dseg;
- pv[pv_counter++].val = byte_count;
- }
- }
- /* Write the first DWORD of each TXBB save earlier. */
- if (pv_counter) {
- /* Need a barrier before writing the byte_count. */
+ /* Need a barrier here before byte count store. */
rte_io_wmb();
- for (--pv_counter; pv_counter >= 0; pv_counter--)
- pv[pv_counter].dseg->byte_count =
- pv[pv_counter].val;
+ dseg->byte_count = rte_cpu_to_be_32(buf->data_len);
+ /* Fill the control parameters for this packet. */
+ ctrl->fence_size = (WQE_ONE_DATA_SEG_SIZE >> 4) & 0x3f;
+ nr_txbbs = 1;
+ } else {
+ nr_txbbs = mlx4_tx_burst_segs(buf, txq, &ctrl);
+ if (nr_txbbs < 0) {
+ elt->buf = NULL;
+ break;
+ }
}
- /* Fill the control parameters for this packet. */
- ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
/*
* For raw Ethernet, the SOLICIT flag is used to indicate
* that no ICRC should be calculated.
--
1.8.3.1
next prev parent reply other threads:[~2017-11-02 16:43 UTC|newest]
Thread overview: 84+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1508752838-30408-1-git-send-email-ophirmu@mellanox.com>
2017-10-23 14:21 ` [dpdk-dev] [PATCH v2 0/7] net/mlx4: follow-up on new TX datapath introduced in RC1 Ophir Munk
2017-10-23 14:21 ` [dpdk-dev] [PATCH v2 1/7] net/mlx4: remove error flows from Tx fast path Ophir Munk
2017-10-25 16:49 ` Adrien Mazarguil
2017-10-23 14:21 ` [dpdk-dev] [PATCH v2 2/7] net/mlx4: inline more Tx functions Ophir Munk
2017-10-25 16:49 ` Adrien Mazarguil
2017-10-25 21:42 ` Ophir Munk
2017-10-26 7:48 ` Adrien Mazarguil
2017-10-26 14:27 ` Ophir Munk
2017-10-29 19:30 ` Ophir Munk
2017-10-23 14:21 ` [dpdk-dev] [PATCH v2 3/7] net/mlx4: save lkey in big-endian format Ophir Munk
2017-10-23 15:24 ` Nélio Laranjeiro
2017-10-23 14:21 ` [dpdk-dev] [PATCH v2 4/7] net/mlx4: merge Tx path functions Ophir Munk
2017-10-24 13:51 ` Nélio Laranjeiro
2017-10-24 20:36 ` Ophir Munk
2017-10-25 7:50 ` Nélio Laranjeiro
2017-10-26 10:31 ` Matan Azrad
2017-10-26 12:12 ` Nélio Laranjeiro
2017-10-26 12:30 ` Matan Azrad
2017-10-26 13:44 ` Nélio Laranjeiro
2017-10-26 16:21 ` Matan Azrad
2017-10-23 14:21 ` [dpdk-dev] [PATCH v2 5/7] net/mlx4: remove unnecessary variables in Tx burst Ophir Munk
2017-10-25 16:49 ` Adrien Mazarguil
2017-10-23 14:21 ` [dpdk-dev] [PATCH v2 6/7] net/mlx4: improve performance of one Tx segment Ophir Munk
2017-10-25 16:50 ` Adrien Mazarguil
2017-10-23 14:22 ` [dpdk-dev] [PATCH v2 7/7] net/mlx4: separate Tx for multi-segments Ophir Munk
2017-10-25 16:50 ` Adrien Mazarguil
2017-10-30 8:15 ` Ophir Munk
2017-10-30 10:07 ` [dpdk-dev] [PATCH v3 0/7] Tx path improvements Matan Azrad
2017-10-30 10:07 ` [dpdk-dev] [PATCH v3 1/7] net/mlx4: remove error flows from Tx fast path Matan Azrad
2017-10-30 14:23 ` Adrien Mazarguil
2017-10-30 18:11 ` Matan Azrad
2017-10-31 10:16 ` Adrien Mazarguil
2017-10-30 10:07 ` [dpdk-dev] [PATCH v3 2/7] net/mlx4: associate MR to MP in a short function Matan Azrad
2017-10-30 14:23 ` Adrien Mazarguil
2017-10-31 13:25 ` Ophir Munk
2017-10-30 10:07 ` [dpdk-dev] [PATCH v3 3/7] net/mlx4: merge Tx path functions Matan Azrad
2017-10-30 14:23 ` Adrien Mazarguil
2017-10-30 18:12 ` Matan Azrad
2017-10-30 10:07 ` [dpdk-dev] [PATCH v3 4/7] net/mlx4: remove completion counter in Tx burst Matan Azrad
2017-10-30 14:23 ` Adrien Mazarguil
2017-10-30 10:07 ` [dpdk-dev] [PATCH v3 5/7] net/mlx4: separate Tx segment cases Matan Azrad
2017-10-30 14:23 ` Adrien Mazarguil
2017-10-30 18:23 ` Matan Azrad
2017-10-31 10:17 ` Adrien Mazarguil
2017-10-30 10:07 ` [dpdk-dev] [PATCH v3 6/7] net/mlx4: mitigate Tx path memory barriers Matan Azrad
2017-10-30 14:23 ` Adrien Mazarguil
2017-10-30 19:47 ` Matan Azrad
2017-10-31 10:17 ` Adrien Mazarguil
2017-10-31 11:35 ` Matan Azrad
2017-10-31 13:21 ` Adrien Mazarguil
2017-10-30 10:07 ` [dpdk-dev] [PATCH v3 7/7] net/mlx4: remove empty Tx segment support Matan Azrad
2017-10-30 14:24 ` Adrien Mazarguil
2017-10-31 18:21 ` [dpdk-dev] [PATCH v4 0/8] net/mlx4: Tx path improvements Matan Azrad
2017-10-31 18:21 ` [dpdk-dev] [PATCH v4 1/8] net/mlx4: remove error flows from Tx fast path Matan Azrad
2017-10-31 18:21 ` [dpdk-dev] [PATCH v4 2/8] net/mlx4: associate MR to MP in a short function Matan Azrad
2017-11-02 13:42 ` Adrien Mazarguil
2017-10-31 18:21 ` [dpdk-dev] [PATCH v4 3/8] net/mlx4: fix ring wraparound compiler hint Matan Azrad
2017-11-02 13:42 ` Adrien Mazarguil
2017-10-31 18:21 ` [dpdk-dev] [PATCH v4 4/8] net/mlx4: merge Tx path functions Matan Azrad
2017-11-02 13:42 ` Adrien Mazarguil
2017-10-31 18:21 ` [dpdk-dev] [PATCH v4 5/8] net/mlx4: remove duplicate handling in Tx burst Matan Azrad
2017-11-02 13:42 ` Adrien Mazarguil
2017-10-31 18:21 ` [dpdk-dev] [PATCH v4 6/8] net/mlx4: separate Tx segment cases Matan Azrad
2017-11-02 13:43 ` Adrien Mazarguil
2017-10-31 18:21 ` [dpdk-dev] [PATCH v4 7/8] net/mlx4: fix HW memory optimizations careless Matan Azrad
2017-11-02 13:43 ` Adrien Mazarguil
2017-10-31 18:21 ` [dpdk-dev] [PATCH v4 8/8] net/mlx4: mitigate Tx path memory barriers Matan Azrad
2017-11-02 13:43 ` Adrien Mazarguil
2017-11-02 13:41 ` [dpdk-dev] [PATCH] net/mlx4: fix missing include Adrien Mazarguil
2017-11-02 20:35 ` Ferruh Yigit
2017-11-02 16:42 ` [dpdk-dev] [PATCH v5 0/8] net/mlx4: Tx path improvements Matan Azrad
2017-11-02 16:42 ` [dpdk-dev] [PATCH v5 1/8] net/mlx4: remove error flows from Tx fast path Matan Azrad
2017-11-02 16:42 ` [dpdk-dev] [PATCH v5 2/8] net/mlx4: associate MR to MP in a short function Matan Azrad
2017-11-02 16:42 ` [dpdk-dev] [PATCH v5 3/8] net/mlx4: fix ring wraparound compiler hint Matan Azrad
2017-11-02 16:42 ` [dpdk-dev] [PATCH v5 4/8] net/mlx4: merge Tx path functions Matan Azrad
2017-11-02 16:42 ` [dpdk-dev] [PATCH v5 5/8] net/mlx4: remove duplicate handling in Tx burst Matan Azrad
2017-11-02 16:42 ` Matan Azrad [this message]
2017-11-02 16:42 ` [dpdk-dev] [PATCH v5 7/8] net/mlx4: fix HW memory optimizations careless Matan Azrad
2017-11-02 16:42 ` [dpdk-dev] [PATCH v5 8/8] net/mlx4: mitigate Tx path memory barriers Matan Azrad
2017-11-02 17:07 ` [dpdk-dev] [PATCH v5 0/8] net/mlx4: Tx path improvements Adrien Mazarguil
2017-11-02 20:35 ` Ferruh Yigit
2017-11-02 20:41 ` Ferruh Yigit
2017-11-03 9:48 ` Adrien Mazarguil
2017-11-03 19:25 ` Ferruh Yigit
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1509640971-8637-7-git-send-email-matan@mellanox.com \
--to=matan@mellanox.com \
--cc=adrien.mazarguil@6wind.com \
--cc=dev@dpdk.org \
--cc=ophirmu@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).