patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH 1/4] net/i40e: fix AVX-512 pointer copy on 32-bit
       [not found] <20240906141127.628873-1-bruce.richardson@intel.com>
@ 2024-09-06 14:11 ` Bruce Richardson
  2024-09-30 13:27   ` Stokes, Ian
  2024-09-06 14:11 ` [PATCH 2/4] net/ice: " Bruce Richardson
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 6+ messages in thread
From: Bruce Richardson @ 2024-09-06 14:11 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, stable

The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
copying 32 pointers only requires half the number of AVX-512 load store
operations.

Fixes: 5171b4ee6b6b ("net/i40e: optimize Tx by using AVX512")
Cc: stable@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/i40e/i40e_rxtx_vec_avx512.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 0238b03f8a..3b2750221b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -799,6 +799,7 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
 		uint32_t copied = 0;
 		/* n is multiple of 32 */
 		while (copied < n) {
+#ifdef RTE_ARCH_64
 			const __m512i a = _mm512_load_si512(&txep[copied]);
 			const __m512i b = _mm512_load_si512(&txep[copied + 8]);
 			const __m512i c = _mm512_load_si512(&txep[copied + 16]);
@@ -808,6 +809,12 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
 			_mm512_storeu_si512(&cache_objs[copied + 8], b);
 			_mm512_storeu_si512(&cache_objs[copied + 16], c);
 			_mm512_storeu_si512(&cache_objs[copied + 24], d);
+#else
+			const __m512i a = _mm512_load_si512(&txep[copied]);
+			const __m512i b = _mm512_load_si512(&txep[copied + 16]);
+			_mm512_storeu_si512(&cache_objs[copied], a);
+			_mm512_storeu_si512(&cache_objs[copied + 16], b);
+#endif
 			copied += 32;
 		}
 		cache->len += n;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/4] net/ice: fix AVX-512 pointer copy on 32-bit
       [not found] <20240906141127.628873-1-bruce.richardson@intel.com>
  2024-09-06 14:11 ` [PATCH 1/4] net/i40e: fix AVX-512 pointer copy on 32-bit Bruce Richardson
@ 2024-09-06 14:11 ` Bruce Richardson
  2024-09-30 13:29   ` Stokes, Ian
  2024-09-06 14:11 ` [PATCH 3/4] net/iavf: " Bruce Richardson
  2024-09-06 14:11 ` [PATCH 4/4] common/idpf: " Bruce Richardson
  3 siblings, 1 reply; 6+ messages in thread
From: Bruce Richardson @ 2024-09-06 14:11 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, stable

The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
copying 32 pointers only requires half the number of AVX-512 load store
operations.

Fixes: a4e480de268e ("net/ice: optimize Tx by using AVX512")
Cc: stable@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/ice/ice_rxtx_vec_avx512.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index 04148e8ea2..add095ef06 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -907,6 +907,7 @@ ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)
 		uint32_t copied = 0;
 		/* n is multiple of 32 */
 		while (copied < n) {
+#ifdef RTE_ARCH_64
 			const __m512i a = _mm512_loadu_si512(&txep[copied]);
 			const __m512i b = _mm512_loadu_si512(&txep[copied + 8]);
 			const __m512i c = _mm512_loadu_si512(&txep[copied + 16]);
@@ -916,6 +917,12 @@ ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)
 			_mm512_storeu_si512(&cache_objs[copied + 8], b);
 			_mm512_storeu_si512(&cache_objs[copied + 16], c);
 			_mm512_storeu_si512(&cache_objs[copied + 24], d);
+#else
+			const __m512i a = _mm512_loadu_si512(&txep[copied]);
+			const __m512i b = _mm512_loadu_si512(&txep[copied + 16]);
+			_mm512_storeu_si512(&cache_objs[copied], a);
+			_mm512_storeu_si512(&cache_objs[copied + 16], b);
+#endif
 			copied += 32;
 		}
 		cache->len += n;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 3/4] net/iavf: fix AVX-512 pointer copy on 32-bit
       [not found] <20240906141127.628873-1-bruce.richardson@intel.com>
  2024-09-06 14:11 ` [PATCH 1/4] net/i40e: fix AVX-512 pointer copy on 32-bit Bruce Richardson
  2024-09-06 14:11 ` [PATCH 2/4] net/ice: " Bruce Richardson
@ 2024-09-06 14:11 ` Bruce Richardson
  2024-09-06 14:11 ` [PATCH 4/4] common/idpf: " Bruce Richardson
  3 siblings, 0 replies; 6+ messages in thread
From: Bruce Richardson @ 2024-09-06 14:11 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, stable

The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
copying 32 pointers only requires half the number of AVX-512 load store
operations.

Fixes: 9ab9514c150e ("net/iavf: enable AVX512 for Tx")
Cc: stable@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index 3bb6f305df..d6a861bf80 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -1892,6 +1892,7 @@ iavf_tx_free_bufs_avx512(struct iavf_tx_queue *txq)
 		uint32_t copied = 0;
 		/* n is multiple of 32 */
 		while (copied < n) {
+#ifdef RTE_ARCH_64
 			const __m512i a = _mm512_loadu_si512(&txep[copied]);
 			const __m512i b = _mm512_loadu_si512(&txep[copied + 8]);
 			const __m512i c = _mm512_loadu_si512(&txep[copied + 16]);
@@ -1901,6 +1902,12 @@ iavf_tx_free_bufs_avx512(struct iavf_tx_queue *txq)
 			_mm512_storeu_si512(&cache_objs[copied + 8], b);
 			_mm512_storeu_si512(&cache_objs[copied + 16], c);
 			_mm512_storeu_si512(&cache_objs[copied + 24], d);
+#else
+			const __m512i a = _mm512_loadu_si512(&txep[copied]);
+			const __m512i b = _mm512_loadu_si512(&txep[copied + 16]);
+			_mm512_storeu_si512(&cache_objs[copied], a);
+			_mm512_storeu_si512(&cache_objs[copied + 16], b);
+#endif
 			copied += 32;
 		}
 		cache->len += n;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 4/4] common/idpf: fix AVX-512 pointer copy on 32-bit
       [not found] <20240906141127.628873-1-bruce.richardson@intel.com>
                   ` (2 preceding siblings ...)
  2024-09-06 14:11 ` [PATCH 3/4] net/iavf: " Bruce Richardson
@ 2024-09-06 14:11 ` Bruce Richardson
  3 siblings, 0 replies; 6+ messages in thread
From: Bruce Richardson @ 2024-09-06 14:11 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, stable

The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
copying 32 pointers only requires half the number of AVX-512 load store
operations.

Fixes: 5bf87b45b2c8 ("net/idpf: add AVX512 data path for single queue model")
Cc: stable@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index 3b5e124ec8..b8450b03ae 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -1043,6 +1043,7 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq)
 		uint32_t copied = 0;
 		/* n is multiple of 32 */
 		while (copied < n) {
+#ifdef RTE_ARCH_64
 			const __m512i a = _mm512_loadu_si512(&txep[copied]);
 			const __m512i b = _mm512_loadu_si512(&txep[copied + 8]);
 			const __m512i c = _mm512_loadu_si512(&txep[copied + 16]);
@@ -1052,6 +1053,12 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq)
 			_mm512_storeu_si512(&cache_objs[copied + 8], b);
 			_mm512_storeu_si512(&cache_objs[copied + 16], c);
 			_mm512_storeu_si512(&cache_objs[copied + 24], d);
+#else
+			const __m512i a = _mm512_loadu_si512(&txep[copied]);
+			const __m512i b = _mm512_loadu_si512(&txep[copied + 16]);
+			_mm512_storeu_si512(&cache_objs[copied], a);
+			_mm512_storeu_si512(&cache_objs[copied + 16], b);
+#endif
 			copied += 32;
 		}
 		cache->len += n;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/4] net/i40e: fix AVX-512 pointer copy on 32-bit
  2024-09-06 14:11 ` [PATCH 1/4] net/i40e: fix AVX-512 pointer copy on 32-bit Bruce Richardson
@ 2024-09-30 13:27   ` Stokes, Ian
  0 siblings, 0 replies; 6+ messages in thread
From: Stokes, Ian @ 2024-09-30 13:27 UTC (permalink / raw)
  To: Richardson, Bruce, dev; +Cc: Richardson, Bruce, stable

> The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
> copying 32 pointers only requires half the number of AVX-512 load store
> operations.
>
> Fixes: 5171b4ee6b6b ("net/i40e: optimize Tx by using AVX512")
> Cc: stable@dpdk.org
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
>  drivers/net/i40e/i40e_rxtx_vec_avx512.c | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> index 0238b03f8a..3b2750221b 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> +++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> @@ -799,6 +799,7 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
>               uint32_t copied = 0;
>               /* n is multiple of 32 */
>               while (copied < n) {
> +#ifdef RTE_ARCH_64
>                       const __m512i a = _mm512_load_si512(&txep[copied]);
>                       const __m512i b = _mm512_load_si512(&txep[copied +
> 8]);
>                       const __m512i c = _mm512_load_si512(&txep[copied +
> 16]);
> @@ -808,6 +809,12 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
>                       _mm512_storeu_si512(&cache_objs[copied + 8], b);
>                       _mm512_storeu_si512(&cache_objs[copied + 16], c);
>                       _mm512_storeu_si512(&cache_objs[copied + 24], d);
> +#else
> +                     const __m512i a = _mm512_load_si512(&txep[copied]);
> +                     const __m512i b = _mm512_load_si512(&txep[copied +
> 16]);
> +                     _mm512_storeu_si512(&cache_objs[copied], a);
> +                     _mm512_storeu_si512(&cache_objs[copied + 16], b);
> +#endif
>                       copied += 32;
>               }
>               cache->len += n;
> --
> 2.43.0

Looks good to me, ACKED.

Thanks
Ian


^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 2/4] net/ice: fix AVX-512 pointer copy on 32-bit
  2024-09-06 14:11 ` [PATCH 2/4] net/ice: " Bruce Richardson
@ 2024-09-30 13:29   ` Stokes, Ian
  0 siblings, 0 replies; 6+ messages in thread
From: Stokes, Ian @ 2024-09-30 13:29 UTC (permalink / raw)
  To: Richardson, Bruce, dev; +Cc: Richardson, Bruce, stable

> The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
> copying 32 pointers only requires half the number of AVX-512 load store
> operations.
>
> Fixes: a4e480de268e ("net/ice: optimize Tx by using AVX512")
> Cc: stable@dpdk.org
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
>  drivers/net/ice/ice_rxtx_vec_avx512.c | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c
> b/drivers/net/ice/ice_rxtx_vec_avx512.c
> index 04148e8ea2..add095ef06 100644
> --- a/drivers/net/ice/ice_rxtx_vec_avx512.c
> +++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
> @@ -907,6 +907,7 @@ ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)
>               uint32_t copied = 0;
>               /* n is multiple of 32 */
>               while (copied < n) {
> +#ifdef RTE_ARCH_64
>                       const __m512i a =
> _mm512_loadu_si512(&txep[copied]);
>                       const __m512i b = _mm512_loadu_si512(&txep[copied
> + 8]);
>                       const __m512i c = _mm512_loadu_si512(&txep[copied +
> 16]);
> @@ -916,6 +917,12 @@ ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)
>                       _mm512_storeu_si512(&cache_objs[copied + 8], b);
>                       _mm512_storeu_si512(&cache_objs[copied + 16], c);
>                       _mm512_storeu_si512(&cache_objs[copied + 24], d);
> +#else
> +                     const __m512i a =
> _mm512_loadu_si512(&txep[copied]);
> +                     const __m512i b = _mm512_loadu_si512(&txep[copied
> + 16]);
> +                     _mm512_storeu_si512(&cache_objs[copied], a);
> +                     _mm512_storeu_si512(&cache_objs[copied + 16], b);
> +#endif
>                       copied += 32;
>               }
>               cache->len += n;
> --
 LGTM, Acked.

Thanks
Ian


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-09-30 13:29 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20240906141127.628873-1-bruce.richardson@intel.com>
2024-09-06 14:11 ` [PATCH 1/4] net/i40e: fix AVX-512 pointer copy on 32-bit Bruce Richardson
2024-09-30 13:27   ` Stokes, Ian
2024-09-06 14:11 ` [PATCH 2/4] net/ice: " Bruce Richardson
2024-09-30 13:29   ` Stokes, Ian
2024-09-06 14:11 ` [PATCH 3/4] net/iavf: " Bruce Richardson
2024-09-06 14:11 ` [PATCH 4/4] common/idpf: " Bruce Richardson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).