From: Alexander Kozyrev <akozyrev@mellanox.com>
To: dev@dpdk.org
Cc: rasland@mellanox.com, matan@mellanox.com,
viacheslavo@mellanox.com, stable@dpdk.org
Subject: [dpdk-dev] [PATCH v3 2/3] net/mlx5: enable MPRQ multi-stride operations
Date: Thu, 9 Apr 2020 21:24:03 +0000 [thread overview]
Message-ID: <1586467444-15197-3-git-send-email-akozyrev@mellanox.com> (raw)
In-Reply-To: <1586467444-15197-1-git-send-email-akozyrev@mellanox.com>
MPRQ feature should be updated to allow a packet to be received
into multiple strides in order to support the MTU exceeding 8KB.
Special care is needed to prevent the headroom corruption in the
multi-stride mode since the headroom space is borrowed by the PMD
from the tail of the preceding stride. Copy the whole packet into
a separate mbuf in this case or just the overlapping data if the
Rx scattering is supported by an application.
Cc: stable@dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
drivers/net/mlx5/mlx5_rxq.c | 28 ++++--------------
drivers/net/mlx5/mlx5_rxtx.c | 68 +++++++++++++++++++-------------------------
drivers/net/mlx5/mlx5_rxtx.h | 2 +-
3 files changed, 36 insertions(+), 62 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 7adeabc..08e4ccb 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1797,7 +1797,6 @@ struct mlx5_rxq_ctrl *
unsigned int mprq_stride_size;
unsigned int mprq_stride_cap;
struct mlx5_dev_config *config = &priv->config;
- unsigned int strd_headroom_en;
/*
* Always allocate extra slots, even if eventually
* the vector Rx will not be used.
@@ -1843,26 +1842,11 @@ struct mlx5_rxq_ctrl *
tmpl->socket = socket;
if (dev->data->dev_conf.intr_conf.rxq)
tmpl->irq = 1;
- /*
- * LRO packet may consume all the stride memory, hence we cannot
- * guaranty head-room near the packet memory in the stride.
- * In this case scatter is, for sure, enabled and an empty mbuf may be
- * added in the start for the head-room.
- */
- if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 &&
- non_scatter_min_mbuf_size > mb_len) {
- strd_headroom_en = 0;
- mprq_stride_size = RTE_MIN(max_rx_pkt_len,
- 1u << config->mprq.max_stride_size_n);
- } else {
- strd_headroom_en = 1;
- mprq_stride_size = non_scatter_min_mbuf_size;
- }
mprq_stride_nums = config->mprq.stride_num_n ?
config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
- mprq_stride_size = (mprq_stride_size <=
- (1U << config->mprq.max_stride_size_n)) ?
- log2above(mprq_stride_size) : MLX5_MPRQ_STRIDE_SIZE_N;
+ mprq_stride_size = non_scatter_min_mbuf_size <=
+ (1U << config->mprq.max_stride_size_n) ?
+ log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
mprq_stride_cap = (config->mprq.stride_num_n ?
(1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
(config->mprq.stride_size_n ?
@@ -1879,8 +1863,7 @@ struct mlx5_rxq_ctrl *
* Otherwise, enable Rx scatter if necessary.
*/
if (mprq_en && desc > (1U << mprq_stride_nums) &&
- (non_scatter_min_mbuf_size -
- (lro_on_queue ? RTE_PKTMBUF_HEADROOM : 0) <=
+ (non_scatter_min_mbuf_size <=
(1U << config->mprq.max_stride_size_n) ||
(config->mprq.stride_size_n &&
non_scatter_min_mbuf_size <= mprq_stride_cap))) {
@@ -1893,7 +1876,8 @@ struct mlx5_rxq_ctrl *
tmpl->rxq.strd_sz_n = config->mprq.stride_size_n ?
config->mprq.stride_size_n : mprq_stride_size;
tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
- tmpl->rxq.strd_headroom_en = strd_headroom_en;
+ tmpl->rxq.strd_scatter_en =
+ !!(offloads & DEV_RX_OFFLOAD_SCATTER);
tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
config->mprq.max_memcpy_len);
max_lro_size = RTE_MIN(max_rx_pkt_len,
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index f3bf763..4c27952 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1658,21 +1658,20 @@ enum mlx5_txcmp_code {
unsigned int i = 0;
uint32_t rq_ci = rxq->rq_ci;
uint16_t consumed_strd = rxq->consumed_strd;
- uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM;
struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
while (i < pkts_n) {
struct rte_mbuf *pkt;
void *addr;
int ret;
- unsigned int len;
+ uint32_t len;
uint16_t strd_cnt;
uint16_t strd_idx;
uint32_t offset;
uint32_t byte_cnt;
+ int32_t hdrm_overlap;
volatile struct mlx5_mini_cqe8 *mcqe = NULL;
uint32_t rss_hash_res = 0;
- uint8_t lro_num_seg;
if (consumed_strd == strd_n) {
/* Replace WQE only if the buffer is still in use. */
@@ -1719,18 +1718,6 @@ enum mlx5_txcmp_code {
MLX5_ASSERT(strd_idx < strd_n);
MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
wq_mask));
- lro_num_seg = cqe->lro_num_seg;
- /*
- * Currently configured to receive a packet per a stride. But if
- * MTU is adjusted through kernel interface, device could
- * consume multiple strides without raising an error. In this
- * case, the packet should be dropped because it is bigger than
- * the max_rx_pkt_len.
- */
- if (unlikely(!lro_num_seg && strd_cnt > 1)) {
- ++rxq->stats.idropped;
- continue;
- }
pkt = rte_pktmbuf_alloc(rxq->mp);
if (unlikely(pkt == NULL)) {
++rxq->stats.rx_nombuf;
@@ -1742,12 +1729,16 @@ enum mlx5_txcmp_code {
len -= RTE_ETHER_CRC_LEN;
offset = strd_idx * strd_sz + strd_shift;
addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
+ hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
/*
* Memcpy packets to the target mbuf if:
* - The size of packet is smaller than mprq_max_memcpy_len.
* - Out of buffer in the Mempool for Multi-Packet RQ.
+ * - There is no space for a headroom and scatter is disabled.
*/
- if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) {
+ if (len <= rxq->mprq_max_memcpy_len ||
+ rxq->mprq_repl == NULL ||
+ (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
/*
* When memcpy'ing packet due to out-of-buffer, the
* packet must be smaller than the target mbuf.
@@ -1769,7 +1760,7 @@ enum mlx5_txcmp_code {
rte_atomic16_add_return(&buf->refcnt, 1);
MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf->refcnt) <=
strd_n + 1);
- buf_addr = RTE_PTR_SUB(addr, headroom_sz);
+ buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
/*
* MLX5 device doesn't use iova but it is necessary in a
* case where the Rx packet is transmitted via a
@@ -1788,43 +1779,42 @@ enum mlx5_txcmp_code {
rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
buf_len, shinfo);
/* Set mbuf head-room. */
- pkt->data_off = headroom_sz;
+ SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
- /*
- * Prevent potential overflow due to MTU change through
- * kernel interface.
- */
- if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
- rte_pktmbuf_free_seg(pkt);
- ++rxq->stats.idropped;
- continue;
- }
+ MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) <
+ len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
DATA_LEN(pkt) = len;
/*
- * LRO packet may consume all the stride memory, in this
- * case packet head-room space is not guaranteed so must
- * to add an empty mbuf for the head-room.
+ * Copy the last fragment of a packet (up to headroom
+ * size bytes) in case there is a stride overlap with
+ * a next packet's headroom. Allocate a separate mbuf
+ * to store this fragment and link it. Scatter is on.
*/
- if (!rxq->strd_headroom_en) {
- struct rte_mbuf *headroom_mbuf =
- rte_pktmbuf_alloc(rxq->mp);
+ if (hdrm_overlap > 0) {
+ MLX5_ASSERT(rxq->strd_scatter_en);
+ struct rte_mbuf *seg =
+ rte_pktmbuf_alloc(rxq->mp);
- if (unlikely(headroom_mbuf == NULL)) {
+ if (unlikely(seg == NULL)) {
rte_pktmbuf_free_seg(pkt);
++rxq->stats.rx_nombuf;
break;
}
- PORT(pkt) = rxq->port_id;
- NEXT(headroom_mbuf) = pkt;
- pkt = headroom_mbuf;
+ SET_DATA_OFF(seg, 0);
+ rte_memcpy(rte_pktmbuf_mtod(seg, void *),
+ RTE_PTR_ADD(addr, len - hdrm_overlap),
+ hdrm_overlap);
+ DATA_LEN(seg) = hdrm_overlap;
+ DATA_LEN(pkt) = len - hdrm_overlap;
+ NEXT(pkt) = seg;
NB_SEGS(pkt) = 2;
}
}
rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
- if (lro_num_seg > 1) {
+ if (cqe->lro_num_seg > 1) {
mlx5_lro_update_hdr(addr, cqe, len);
pkt->ol_flags |= PKT_RX_LRO;
- pkt->tso_segsz = strd_sz;
+ pkt->tso_segsz = len / cqe->lro_num_seg;
}
PKT_LEN(pkt) = len;
PORT(pkt) = rxq->port_id;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 939778a..d155c24 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -119,7 +119,7 @@ struct mlx5_rxq_data {
unsigned int strd_sz_n:4; /* Log 2 of stride size. */
unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
unsigned int err_state:2; /* enum mlx5_rxq_err_state. */
- unsigned int strd_headroom_en:1; /* Enable mbuf headroom in MPRQ. */
+ unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
unsigned int lro:1; /* Enable LRO. */
unsigned int :1; /* Remaining bits. */
volatile uint32_t *rq_db;
--
1.8.3.1
next prev parent reply other threads:[~2020-04-09 21:24 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-31 21:52 [dpdk-dev] [PATCH 0/4] net/mlx5: add large packet size support to MPRQ Alexander Kozyrev
2020-03-31 21:52 ` [dpdk-dev] [PATCH 1/4] net/mlx5: add a devarg to specify MPRQ stride size Alexander Kozyrev
2020-04-02 10:00 ` Slava Ovsiienko
2020-03-31 21:52 ` [dpdk-dev] [PATCH 2/4] net/mlx5: enable MPRQ multi-stride operations Alexander Kozyrev
2020-04-02 10:01 ` Slava Ovsiienko
2020-03-31 21:52 ` [dpdk-dev] [PATCH 3/4] doc: add a decsription for MPRQ stride size devarg Alexander Kozyrev
2020-03-31 21:52 ` [dpdk-dev] [PATCH 4/4] net/mlx5: add multi-segment packets in MPRQ mode Alexander Kozyrev
2020-04-02 10:02 ` Slava Ovsiienko
2020-04-02 18:11 ` [dpdk-dev] [PATCH 0/3] net/mlx5: add large packet size support to MPRQ Alexander Kozyrev
2020-04-02 18:11 ` [dpdk-dev] [PATCH 1/3] net/mlx5: add a devarg to specify MPRQ stride size Alexander Kozyrev
2020-04-02 18:11 ` [dpdk-dev] [PATCH 2/3] net/mlx5: enable MPRQ multi-stride operations Alexander Kozyrev
2020-04-02 18:11 ` [dpdk-dev] [PATCH 3/3] net/mlx5: add multi-segment packets in MPRQ mode Alexander Kozyrev
2020-04-09 22:23 ` [dpdk-dev] [PATCH v4 0/3] net/mlx5: add large packet size support to MPRQ Alexander Kozyrev
2020-04-09 22:23 ` [dpdk-dev] [PATCH v4 1/3] net/mlx5: add a devarg to specify MPRQ stride size Alexander Kozyrev
2020-04-14 11:42 ` Ferruh Yigit
2020-04-14 12:52 ` Thomas Monjalon
2020-04-15 11:01 ` Ferruh Yigit
2020-04-15 11:25 ` Luca Boccassi
2020-04-15 15:34 ` Alexander Kozyrev
2020-04-15 15:52 ` [dpdk-dev] [dpdk-stable] " Luca Boccassi
2020-04-09 22:23 ` [dpdk-dev] [PATCH v4 2/3] net/mlx5: enable MPRQ multi-stride operations Alexander Kozyrev
2020-04-09 22:23 ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: add multi-segment packets in MPRQ mode Alexander Kozyrev
2020-04-10 14:01 ` [dpdk-dev] [PATCH v4 0/3] net/mlx5: add large packet size support to MPRQ Matan Azrad
2020-04-13 10:57 ` Raslan Darawsheh
2020-04-09 21:24 ` [dpdk-dev] [PATCH v3 " Alexander Kozyrev
2020-04-09 21:24 ` [dpdk-dev] [PATCH v3 1/3] net/mlx5: add a devarg to specify MPRQ stride size Alexander Kozyrev
2020-04-09 21:24 ` Alexander Kozyrev [this message]
2020-04-09 21:24 ` [dpdk-dev] [PATCH v3 3/3] net/mlx5: add multi-segment packets in MPRQ mode Alexander Kozyrev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1586467444-15197-3-git-send-email-akozyrev@mellanox.com \
--to=akozyrev@mellanox.com \
--cc=dev@dpdk.org \
--cc=matan@mellanox.com \
--cc=rasland@mellanox.com \
--cc=stable@dpdk.org \
--cc=viacheslavo@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).