From: Bruce Richardson <bruce.richardson@intel.com>
To: dev@dpdk.org
Cc: Bruce Richardson <bruce.richardson@intel.com>,
Ian Stokes <ian.stokes@intel.com>,
David Christensen <drc@linux.ibm.com>,
Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>,
Wathsala Vithanage <wathsala.vithanage@arm.com>
Subject: [RFC PATCH 19/21] net/i40e: use vector SW ring for all vector paths
Date: Fri, 22 Nov 2024 12:54:12 +0000 [thread overview]
Message-ID: <20241122125418.2857301-20-bruce.richardson@intel.com> (raw)
In-Reply-To: <20241122125418.2857301-1-bruce.richardson@intel.com>
The AVX-512 code path used a smaller SW ring structure only containing
the mbuf pointer, but no other fields. The other fields are only used in
the scalar code path, so update all vector driver code paths (AVX2, SSE,
Neon, Altivec) to use the smaller, faster structure.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/i40e/i40e_rxtx.c | 8 +++++---
drivers/net/i40e/i40e_rxtx_vec_altivec.c | 12 ++++++------
drivers/net/i40e/i40e_rxtx_vec_avx2.c | 12 ++++++------
drivers/net/i40e/i40e_rxtx_vec_avx512.c | 14 ++------------
drivers/net/i40e/i40e_rxtx_vec_common.h | 6 ------
drivers/net/i40e/i40e_rxtx_vec_neon.c | 12 ++++++------
drivers/net/i40e/i40e_rxtx_vec_sse.c | 12 ++++++------
7 files changed, 31 insertions(+), 45 deletions(-)
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 4878b9b8aa..05f7f380c4 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1892,7 +1892,7 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
tx_queue_id);
txq->vector_tx = ad->tx_vec_allowed;
- txq->vector_sw_ring = ad->tx_use_avx512;
+ txq->vector_sw_ring = txq->vector_tx;
/*
* tx_queue_id is queue id application refers to, while
@@ -3551,9 +3551,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
}
}
+ if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
+ ad->tx_vec_allowed = false;
+
if (ad->tx_simple_allowed) {
- if (ad->tx_vec_allowed &&
- rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+ if (ad->tx_vec_allowed) {
#ifdef RTE_ARCH_X86
if (ad->tx_use_avx512) {
#ifdef CC_AVX512_SUPPORT
diff --git a/drivers/net/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
index 2ab09eb167..7acf44d3fe 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
@@ -553,14 +553,14 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
{
struct ieth_tx_queue *txq = (struct ieth_tx_queue *)tx_queue;
volatile struct i40e_tx_desc *txdp;
- struct ieth_tx_entry *txep;
+ struct ieth_vec_tx_entry *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
int i;
if (txq->nb_tx_free < txq->tx_free_thresh)
- i40e_tx_free_bufs(txq);
+ ieth_tx_free_bufs_vector(txq, i40e_tx_desc_done, false);
nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
nb_commit = nb_pkts;
@@ -569,13 +569,13 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = txq->tx_tail;
txdp = &txq->i40e_tx_ring[tx_id];
- txep = &txq->sw_ring[tx_id];
+ txep = &txq->sw_ring_v[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
- ieth_tx_backlog_entry(txep, tx_pkts, n);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, n);
for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
vtx1(txdp, *tx_pkts, flags);
@@ -589,10 +589,10 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
/* avoid reach the end of ring */
txdp = &txq->i40e_tx_ring[tx_id];
- txep = &txq->sw_ring[tx_id];
+ txep = &txq->sw_ring_v[tx_id];
}
- ieth_tx_backlog_entry(txep, tx_pkts, nb_commit);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
vtx(txdp, tx_pkts, nb_commit, flags);
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index e32fa160bf..8f593378d3 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -745,13 +745,13 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
{
struct ieth_tx_queue *txq = (struct ieth_tx_queue *)tx_queue;
volatile struct i40e_tx_desc *txdp;
- struct ieth_tx_entry *txep;
+ struct ieth_vec_tx_entry *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
if (txq->nb_tx_free < txq->tx_free_thresh)
- i40e_tx_free_bufs(txq);
+ ieth_tx_free_bufs_vector(txq, i40e_tx_desc_done, false);
nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
if (unlikely(nb_pkts == 0))
@@ -759,13 +759,13 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = txq->tx_tail;
txdp = &txq->i40e_tx_ring[tx_id];
- txep = &txq->sw_ring[tx_id];
+ txep = &txq->sw_ring_v[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
- ieth_tx_backlog_entry(txep, tx_pkts, n);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, n);
vtx(txdp, tx_pkts, n - 1, flags);
tx_pkts += (n - 1);
@@ -780,10 +780,10 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
/* avoid reach the end of ring */
txdp = &txq->i40e_tx_ring[tx_id];
- txep = &txq->sw_ring[tx_id];
+ txep = &txq->sw_ring_v[tx_id];
}
- ieth_tx_backlog_entry(txep, tx_pkts, nb_commit);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
vtx(txdp, tx_pkts, nb_commit, flags);
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 0ab3a4f02c..e0f1b2bc10 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -807,16 +807,6 @@ vtx(volatile struct i40e_tx_desc *txdp,
}
}
-static __rte_always_inline void
-tx_backlog_entry_avx512(struct ieth_vec_tx_entry *txep,
- struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
- int i;
-
- for (i = 0; i < (int)nb_pkts; ++i)
- txep[i].mbuf = tx_pkts[i];
-}
-
static inline uint16_t
i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
@@ -844,7 +834,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
- tx_backlog_entry_avx512(txep, tx_pkts, n);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, n);
vtx(txdp, tx_pkts, n - 1, flags);
tx_pkts += (n - 1);
@@ -862,7 +852,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
txep = (void *)txq->sw_ring;
}
- tx_backlog_entry_avx512(txep, tx_pkts, nb_commit);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
vtx(txdp, tx_pkts, nb_commit, flags);
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index 60f2130f4d..72b4a44faf 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -24,12 +24,6 @@ i40e_tx_desc_done(struct ieth_tx_queue *txq, uint16_t idx)
rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE);
}
-static __rte_always_inline int
-i40e_tx_free_bufs(struct ieth_tx_queue *txq)
-{
- return ieth_tx_free_bufs(txq, i40e_tx_desc_done);
-}
-
static inline void
_i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
{
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index b30da1a78c..502dcc9407 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -681,14 +681,14 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
{
struct ieth_tx_queue *txq = (struct ieth_tx_queue *)tx_queue;
volatile struct i40e_tx_desc *txdp;
- struct ieth_tx_entry *txep;
+ struct ieth_vec_tx_entry *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
int i;
if (txq->nb_tx_free < txq->tx_free_thresh)
- i40e_tx_free_bufs(txq);
+ ieth_tx_free_bufs_vector(txq, i40e_tx_desc_done, false);
nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
if (unlikely(nb_pkts == 0))
@@ -696,13 +696,13 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
tx_id = txq->tx_tail;
txdp = &txq->i40e_tx_ring[tx_id];
- txep = &txq->sw_ring[tx_id];
+ txep = &txq->sw_ring_v[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
- ieth_tx_backlog_entry(txep, tx_pkts, n);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, n);
for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
vtx1(txdp, *tx_pkts, flags);
@@ -716,10 +716,10 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
/* avoid reach the end of ring */
txdp = &txq->i40e_tx_ring[tx_id];
- txep = &txq->sw_ring[tx_id];
+ txep = &txq->sw_ring_v[tx_id];
}
- ieth_tx_backlog_entry(txep, tx_pkts, nb_commit);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
vtx(txdp, tx_pkts, nb_commit, flags);
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index 5107cb9f01..958380815a 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -700,14 +700,14 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
{
struct ieth_tx_queue *txq = (struct ieth_tx_queue *)tx_queue;
volatile struct i40e_tx_desc *txdp;
- struct ieth_tx_entry *txep;
+ struct ieth_vec_tx_entry *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
int i;
if (txq->nb_tx_free < txq->tx_free_thresh)
- i40e_tx_free_bufs(txq);
+ ieth_tx_free_bufs_vector(txq, i40e_tx_desc_done, false);
nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
if (unlikely(nb_pkts == 0))
@@ -715,13 +715,13 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = txq->tx_tail;
txdp = &txq->i40e_tx_ring[tx_id];
- txep = &txq->sw_ring[tx_id];
+ txep = &txq->sw_ring_v[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
- ieth_tx_backlog_entry(txep, tx_pkts, n);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, n);
for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
vtx1(txdp, *tx_pkts, flags);
@@ -735,10 +735,10 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
/* avoid reach the end of ring */
txdp = &txq->i40e_tx_ring[tx_id];
- txep = &txq->sw_ring[tx_id];
+ txep = &txq->sw_ring_v[tx_id];
}
- ieth_tx_backlog_entry(txep, tx_pkts, nb_commit);
+ ieth_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
vtx(txdp, tx_pkts, nb_commit, flags);
--
2.43.0
next prev parent reply other threads:[~2024-11-22 12:57 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-22 12:53 [RFC PATCH 00/21] Reduce code duplication across Intel NIC drivers Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 01/21] common/intel_eth: add pkt reassembly fn for intel drivers Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 02/21] common/intel_eth: provide common Tx entry structures Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 03/21] common/intel_eth: add Tx mbuf ring replenish fn Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 04/21] drivers/net: align Tx queue struct field names Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 05/21] drivers/net: add prefix for driver-specific structs Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 06/21] common/intel_eth: merge ice and i40e Tx queue struct Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 07/21] net/iavf: use common Tx queue structure Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 08/21] net/ixgbe: convert Tx queue context cache field to ptr Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 09/21] net/ixgbe: use common Tx queue structure Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 10/21] common/intel_eth: pack " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 11/21] common/intel_eth: add post-Tx buffer free function Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 12/21] common/intel_eth: add Tx buffer free fn for AVX-512 Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 13/21] net/iavf: use common Tx " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 14/21] net/ice: move Tx queue mbuf cleanup fn to common Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 15/21] net/i40e: use common Tx queue mbuf cleanup fn Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 16/21] net/ixgbe: " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 17/21] net/iavf: " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 18/21] net/ice: use vector SW ring for all vector paths Bruce Richardson
2024-11-22 12:54 ` Bruce Richardson [this message]
2024-11-22 12:54 ` [RFC PATCH 20/21] net/iavf: " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 21/21] net/ixgbe: use common Tx backlog entry fn Bruce Richardson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241122125418.2857301-20-bruce.richardson@intel.com \
--to=bruce.richardson@intel.com \
--cc=dev@dpdk.org \
--cc=drc@linux.ibm.com \
--cc=ian.stokes@intel.com \
--cc=konstantin.v.ananyev@yandex.ru \
--cc=wathsala.vithanage@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).