DPDK patches and discussions
 help / color / Atom feed
* [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs
@ 2019-10-15  7:49 Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (68 more replies)
  0 siblings, 69 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-15  7:49 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

There are a lot functions of bit operations scattered in
PMDs, consolidate them into a common API family and applied
in different PMDs to reduce code duplication.

Joyce Kong (5):
  lib/eal: implement the family of rte bit operation APIs
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead

 drivers/net/axgbe/axgbe_common.h           |  29 +----
 drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
 drivers/net/axgbe/axgbe_mdio.c             |  14 +-
 drivers/net/bnx2x/bnx2x.c                  | 202 +++++++++++++----------------
 drivers/net/bnx2x/bnx2x.h                  |   5 +-
 drivers/net/bnx2x/ecore_sp.h               |   8 +-
 drivers/net/hinic/base/hinic_compat.h      |  35 +----
 drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +--
 drivers/net/qede/base/bcm_osal.c           |  20 ---
 drivers/net/qede/base/bcm_osal.h           |  10 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h |  56 ++++++++
 lib/librte_eal/common/meson.build          |   1 +
 13 files changed, 180 insertions(+), 231 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-10-15  7:49 ` Joyce Kong
  2019-10-15 16:53   ` Stephen Hemminger
                     ` (3 more replies)
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 2/5] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (67 subsequent siblings)
  68 siblings, 4 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-15  7:49 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, the bit operation is
mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
to ensure the ordering.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 lib/librte_eal/common/Makefile             |  1 +
 lib/librte_eal/common/include/rte_bitops.h | 56 ++++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |  1 +
 3 files changed, 58 insertions(+)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index a00d4fc..8586ca8 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_bitops.h b/lib/librte_eal/common/include/rte_bitops.h
new file mode 100644
index 0000000..4d7c5a3
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitops.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Corporation
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a generic API for bit operations.
+ */
+
+#include <stdint.h>
+#include <rte_atomic.h>
+
+static inline void
+rte_set_bit(unsigned int nr, unsigned long *addr)
+{
+	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
+}
+
+static inline void
+rte_clear_bit(int nr, unsigned long *addr)
+{
+	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
+}
+
+static inline int
+rte_test_bit(int nr, unsigned long *addr)
+{
+	int res;
+	rte_mb();
+	res = ((*addr) & (1UL << nr)) != 0;
+	rte_mb();
+
+	return res;
+}
+
+static inline int
+rte_test_and_set_bit(int nr, unsigned long *addr)
+{
+	unsigned long mask = (1UL << nr);
+
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+static inline int
+rte_test_and_clear_bit(int nr, unsigned long *addr)
+{
+	unsigned long mask = (1UL << nr);
+
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 386577c..a277cdf 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -52,6 +52,7 @@ common_headers = files(
 	'include/rte_alarm.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
+	'include/rte_bitops.h',
 	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v1 2/5] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-10-15  7:49 ` Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 3/5] net/bnx2x: " Joyce Kong
                   ` (66 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-15  7:49 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f1..9cabda8 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e..cd990f5 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_test_bit(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5c..6f3b3f2 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v1 3/5] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 2/5] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-10-15  7:49 ` " Joyce Kong
  2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 4/5] net/hinic: " Joyce Kong
                   ` (65 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-15  7:49 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 202 +++++++++++++++++++------------------------
 drivers/net/bnx2x/bnx2x.h    |   5 +-
 drivers/net/bnx2x/ecore_sp.h |   8 +-
 3 files changed, 94 insertions(+), 121 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index e1dfe60..92c77d1 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -129,32 +129,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1401,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1432,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		ret_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1462,19 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1486,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1516,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1545,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit(RAMROD_RX, &ramrod_flags);
+	rte_set_bit(RAMROD_TX, &ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1672,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1696,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1757,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1771,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1783,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,8 +1840,8 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	if (rte_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
+		rte_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
 	} else {
 		bnx2x_set_storm_rx_mode(sc);
 	}
@@ -1960,12 +1934,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1948,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1958,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4262,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,10 +4293,10 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
 		bnx2x_set_storm_rx_mode(sc);
 	}
 }
@@ -4693,7 +4667,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4962,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5777,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6353,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6391,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6399,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6409,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6422,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6551,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6619,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6647,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6704,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_test_bit(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6735,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6867,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6905,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6922,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6944,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 43c6040..aa2d251 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -15,6 +15,7 @@
 #define __BNX2X_H__
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_bus_pci.h>
 #include <rte_io.h>
@@ -1809,10 +1810,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db37..72697c2 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -73,10 +73,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_test_bit(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_test_and_clear_bit(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v1 4/5] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (2 preceding siblings ...)
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 3/5] net/bnx2x: " Joyce Kong
@ 2019-10-15  7:50 ` " Joyce Kong
  2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 5/5] net/qede: " Joyce Kong
                   ` (64 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-15  7:50 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/hinic/base/hinic_compat.h | 35 +----------------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++++--------
 2 files changed, 9 insertions(+), 42 deletions(-)

diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index f599947..ce1fdc6 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,40 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index c9a400e..dcdcfb9 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -227,7 +227,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -907,7 +907,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1030,7 +1030,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1073,7 +1073,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2197,9 +2197,9 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -2236,7 +2236,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -2316,7 +2316,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v1 5/5] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (3 preceding siblings ...)
  2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 4/5] net/hinic: " Joyce Kong
@ 2019-10-15  7:50 ` " Joyce Kong
  2019-10-15 16:51 ` [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Stephen Hemminger
                   ` (63 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-15  7:50 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 9915df4..665833c 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -45,26 +45,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 51edc41..9f2be0a 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_test_bit(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (4 preceding siblings ...)
  2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 5/5] net/qede: " Joyce Kong
@ 2019-10-15 16:51 ` Stephen Hemminger
  2019-10-18  9:01   ` Joyce Kong (Arm Technology China)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 0/6] " Joyce Kong
                   ` (62 subsequent siblings)
  68 siblings, 1 reply; 139+ messages in thread
From: Stephen Hemminger @ 2019-10-15 16:51 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	honnappa.nagarahalli, gavin.hu

On Tue, 15 Oct 2019 15:49:56 +0800
Joyce Kong <joyce.kong@arm.com> wrote:

> There are a lot functions of bit operations scattered in
> PMDs, consolidate them into a common API family and applied
> in different PMDs to reduce code duplication.
> 
> Joyce Kong (5):
>   lib/eal: implement the family of rte bit operation APIs
>   net/axgbe: use common rte bit operation APIs instead
>   net/bnx2x: use common rte bit operation APIs instead
>   net/hinic: use common rte bit operation APIs instead
>   net/qede: use common rte bit operation APIs instead
> 
>  drivers/net/axgbe/axgbe_common.h           |  29 +----
>  drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
>  drivers/net/axgbe/axgbe_mdio.c             |  14 +-
>  drivers/net/bnx2x/bnx2x.c                  | 202 +++++++++++++----------------
>  drivers/net/bnx2x/bnx2x.h                  |   5 +-
>  drivers/net/bnx2x/ecore_sp.h               |   8 +-
>  drivers/net/hinic/base/hinic_compat.h      |  35 +----
>  drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +--
>  drivers/net/qede/base/bcm_osal.c           |  20 ---
>  drivers/net/qede/base/bcm_osal.h           |  10 +-
>  lib/librte_eal/common/Makefile             |   1 +
>  lib/librte_eal/common/include/rte_bitops.h |  56 ++++++++
>  lib/librte_eal/common/meson.build          |   1 +
>  13 files changed, 180 insertions(+), 231 deletions(-)
>  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> 


This is a really good idea, and should have been done long ago.
Could you add tests for these as well?

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-10-15 16:53   ` Stephen Hemminger
  2019-10-18  9:00     ` Joyce Kong (Arm Technology China)
  2019-10-16  7:54   ` Jerin Jacob
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 139+ messages in thread
From: Stephen Hemminger @ 2019-10-15 16:53 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	honnappa.nagarahalli, gavin.hu

On Tue, 15 Oct 2019 15:49:57 +0800
Joyce Kong <joyce.kong@arm.com> wrote:

> +static inline void
> +rte_set_bit(unsigned int nr, unsigned long *addr)
> +{
> +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +static inline void
> +rte_clear_bit(int nr, unsigned long *addr)
> +{
> +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +static inline int
> +rte_test_bit(int nr, unsigned long *addr)
> +{
> +	int res;
> +	rte_mb();
> +	res = ((*addr) & (1UL << nr)) != 0;
> +	rte_mb();
> +
> +	return res;
> +}
> +
> +static inline int
> +rte_test_and_set_bit(int nr, unsigned long *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +static inline int
> +rte_test_and_clear_bit(int nr, unsigned long *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}

These functions need to be part of API, and have doxygen comments?

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-15 16:53   ` Stephen Hemminger
@ 2019-10-16  7:54   ` Jerin Jacob
  2019-10-18  9:02     ` Joyce Kong (Arm Technology China)
  2019-10-16 19:05   ` Stephen Hemminger
  2019-10-17 13:32   ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
  3 siblings, 1 reply; 139+ messages in thread
From: Jerin Jacob @ 2019-10-16  7:54 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dpdk-dev, nd, Thomas Monjalon, Jerin Jacob, ravi1.kumar,
	Ziyang Xuan, Xiaoyun Wang, Guoyang Zhou, Rasesh Mody,
	Shahed Shaikh, Honnappa Nagarahalli, Gavin Hu

On Tue, Oct 15, 2019 at 1:20 PM Joyce Kong <joyce.kong@arm.com> wrote:
>
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
>
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> ---
>  lib/librte_eal/common/Makefile             |  1 +
>  lib/librte_eal/common/include/rte_bitops.h | 56 ++++++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build          |  1 +
> +
> +static inline void
> +rte_set_bit(unsigned int nr, unsigned long *addr)
> +{
> +       __atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
> +}

If it is specific for IO the IMO, it makes sense call the API to
rte_io_set_bit() like rte_io_rmb
and change the header file to rte_io_bitops.h.

The barries are only needed for IO operations. Explicitly is not
conveying it in API name
would call for using it for normal cases.

Other option could be to introduce, generic and IO specific bit
operations operations
separately.

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-15 16:53   ` Stephen Hemminger
  2019-10-16  7:54   ` Jerin Jacob
@ 2019-10-16 19:05   ` Stephen Hemminger
  2019-10-17 13:32   ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
  3 siblings, 0 replies; 139+ messages in thread
From: Stephen Hemminger @ 2019-10-16 19:05 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	honnappa.nagarahalli, gavin.hu

On Tue, 15 Oct 2019 15:49:57 +0800
Joyce Kong <joyce.kong@arm.com> wrote:

> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
	'include/rte_common.h',

Patchwork reports several build failures for this patch set.

/tmp/UB1604-64_K4.4.0_Clang3.8.0/x86_64-native-linuxapp-clang/62c86b2c1091439598f2f1688566632c/dpdk/x86_64-native-linuxapp-clang/lib/librte_pmd_bnx2x.a(bnx2x.o): In function `bnx2x_set_storm_rx_mode':
/tmp/UB1604-64_K4.4.0_Clang3.8.0/x86_64-native-linuxapp-clang/62c86b2c1091439598f2f1688566632c/dpdk/drivers/net/bnx2x/bnx2x.c:(.text+0x1602): undefined reference to `ret_set_bit'


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                     ` (2 preceding siblings ...)
  2019-10-16 19:05   ` Stephen Hemminger
@ 2019-10-17 13:32   ` Morten Brørup
  2019-10-18  8:58     ` Joyce Kong (Arm Technology China)
  3 siblings, 1 reply; 139+ messages in thread
From: Morten Brørup @ 2019-10-17 13:32 UTC (permalink / raw)
  To: Joyce Kong, dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu,
	Stephen Hemminger

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> Sent: Tuesday, October 15, 2019 9:50 AM
> 
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.

Good initiative.

> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> ---
>  lib/librte_eal/common/Makefile             |  1 +
>  lib/librte_eal/common/include/rte_bitops.h | 56
> ++++++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build          |  1 +
>  3 files changed, 58 insertions(+)
>  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> 
> diff --git a/lib/librte_eal/common/Makefile
> b/lib/librte_eal/common/Makefile
> index a00d4fc..8586ca8 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
>  INC += rte_service.h rte_service_component.h
>  INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
>  INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
> +INC += rte_bitops.h
> 
>  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> rte_prefetch.h
>  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> diff --git a/lib/librte_eal/common/include/rte_bitops.h
> b/lib/librte_eal/common/include/rte_bitops.h
> new file mode 100644
> index 0000000..4d7c5a3
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_bitops.h
> @@ -0,0 +1,56 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Corporation
> + */
> +
> +#ifndef _RTE_BITOPS_H_
> +#define _RTE_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a generic API for bit operations.
> + */
> +
> +#include <stdint.h>
> +#include <rte_atomic.h>
> +
> +static inline void
> +rte_set_bit(unsigned int nr, unsigned long *addr)
> +{
> +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +static inline void
> +rte_clear_bit(int nr, unsigned long *addr)
> +{
> +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +static inline int
> +rte_test_bit(int nr, unsigned long *addr)
> +{
> +	int res;
> +	rte_mb();
> +	res = ((*addr) & (1UL << nr)) != 0;
> +	rte_mb();
> +
> +	return res;
> +}

Why does rte_test_bit() not use any of the __atomic_xx functions instead? E.g.:

static inline int
rte_test_bit(int nr, unsigned long *addr)
{
	return __atomic_load_n(addr, __ATOMIC_ACQUIRE);
}

> +
> +static inline int
> +rte_test_and_set_bit(int nr, unsigned long *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +static inline int
> +rte_test_and_clear_bit(int nr, unsigned long *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +#endif /* _RTE_BITOPS_H_ */
> diff --git a/lib/librte_eal/common/meson.build
> b/lib/librte_eal/common/meson.build
> index 386577c..a277cdf 100644
> --- a/lib/librte_eal/common/meson.build
> +++ b/lib/librte_eal/common/meson.build
> @@ -52,6 +52,7 @@ common_headers = files(
>  	'include/rte_alarm.h',
>  	'include/rte_branch_prediction.h',
>  	'include/rte_bus.h',
> +	'include/rte_bitops.h',
>  	'include/rte_bitmap.h',
>  	'include/rte_class.h',
>  	'include/rte_common.h',
> --
> 2.7.4
> 

These functions use unsigned long as the type of their value, like they do in the PMDs.

However, a generic bit operations library should preferably work with multiple types, like the __atomic_xx functions. Or use an well defined uint_NN_t type. Or have individually named functions for each type size, e.g. rte_set_bit_32() and rte_set_bit_64().


Med venlig hilsen / kind regards
- Morten Brørup


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-17 13:32   ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
@ 2019-10-18  8:58     ` Joyce Kong (Arm Technology China)
  2019-10-23  3:07       ` Joyce Kong (Arm Technology China)
  0 siblings, 1 reply; 139+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-18  8:58 UTC (permalink / raw)
  To: Morten Brørup, dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China),
	Stephen Hemminger

Hi Morten,

> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Thursday, October 17, 2019 9:32 PM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>;
> dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>; Stephen Hemminger
> <stephen@networkplumber.org>
> Subject: RE: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bitoperation APIs
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > Sent: Tuesday, October 15, 2019 9:50 AM
> >
> > There are a lot functions of bit operations scattered and duplicated
> > in PMDs, consolidating them into a common API family is necessary.
> > Furthermore, the bit operation is mostly applied to the IO devices, so
> > use __ATOMIC_ACQ_REL to ensure the ordering.
> 
> Good initiative.
> 
> >
> > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > ---
> >  lib/librte_eal/common/Makefile             |  1 +
> >  lib/librte_eal/common/include/rte_bitops.h | 56
> > ++++++++++++++++++++++++++++++
> >  lib/librte_eal/common/meson.build          |  1 +
> >  3 files changed, 58 insertions(+)
> >  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> >
> > diff --git a/lib/librte_eal/common/Makefile
> > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > --- a/lib/librte_eal/common/Makefile
> > +++ b/lib/librte_eal/common/Makefile
> > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h  INC
> > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > rte_fbarray.h rte_uuid.h
> > +INC += rte_bitops.h
> >
> >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff --git
> > a/lib/librte_eal/common/include/rte_bitops.h
> > b/lib/librte_eal/common/include/rte_bitops.h
> > new file mode 100644
> > index 0000000..4d7c5a3
> > --- /dev/null
> > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > @@ -0,0 +1,56 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2019 Arm Corporation
> > + */
> > +
> > +#ifndef _RTE_BITOPS_H_
> > +#define _RTE_BITOPS_H_
> > +
> > +/**
> > + * @file
> > + * Bit Operations
> > + *
> > + * This file defines a generic API for bit operations.
> > + */
> > +
> > +#include <stdint.h>
> > +#include <rte_atomic.h>
> > +
> > +static inline void
> > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > +
> > +static inline void
> > +rte_clear_bit(int nr, unsigned long *addr) {
> > +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> > +
> > +static inline int
> > +rte_test_bit(int nr, unsigned long *addr) {
> > +	int res;
> > +	rte_mb();
> > +	res = ((*addr) & (1UL << nr)) != 0;
> > +	rte_mb();
> > +
> > +	return res;
> > +}
> 
> Why does rte_test_bit() not use any of the __atomic_xx functions instead?
> E.g.:
> 
> static inline int
> rte_test_bit(int nr, unsigned long *addr) {
> 	return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> 
You re right, it's better to use __atomic_xx here to keep the consistent with other APIs.

> > +
> > +static inline int
> > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > +	unsigned long mask = (1UL << nr);
> > +
> > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> mask; }
> > +
> > +static inline int
> > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > +	unsigned long mask = (1UL << nr);
> > +
> > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> mask; }
> > +#endif /* _RTE_BITOPS_H_ */
> > diff --git a/lib/librte_eal/common/meson.build
> > b/lib/librte_eal/common/meson.build
> > index 386577c..a277cdf 100644
> > --- a/lib/librte_eal/common/meson.build
> > +++ b/lib/librte_eal/common/meson.build
> > @@ -52,6 +52,7 @@ common_headers = files(
> >  	'include/rte_alarm.h',
> >  	'include/rte_branch_prediction.h',
> >  	'include/rte_bus.h',
> > +	'include/rte_bitops.h',
> >  	'include/rte_bitmap.h',
> >  	'include/rte_class.h',
> >  	'include/rte_common.h',
> > --
> > 2.7.4
> >
> 
> These functions use unsigned long as the type of their value, like they do in
> the PMDs.
> 
> However, a generic bit operations library should preferably work with
> multiple types, like the __atomic_xx functions. Or use an well defined
> uint_NN_t type. Or have individually named functions for each type size, e.g.
> rte_set_bit_32() and rte_set_bit_64().
> 
Good suggestion! And will do this in next version.

> Med venlig hilsen / kind regards
> - Morten Brørup


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15 16:53   ` Stephen Hemminger
@ 2019-10-18  9:00     ` Joyce Kong (Arm Technology China)
  0 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-18  9:00 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	Honnappa Nagarahalli, Gavin Hu (Arm Technology China)

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Wednesday, October 16, 2019 12:54 AM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; thomas@monjalon.net;
> jerinj@marvell.com; ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bit operation APIs
> 
> On Tue, 15 Oct 2019 15:49:57 +0800
> Joyce Kong <joyce.kong@arm.com> wrote:
> 
> > +static inline void
> > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > +
> > +static inline void
> > +rte_clear_bit(int nr, unsigned long *addr) {
> > +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> > +
> > +static inline int
> > +rte_test_bit(int nr, unsigned long *addr) {
> > +	int res;
> > +	rte_mb();
> > +	res = ((*addr) & (1UL << nr)) != 0;
> > +	rte_mb();
> > +
> > +	return res;
> > +}
> > +
> > +static inline int
> > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > +	unsigned long mask = (1UL << nr);
> > +
> > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> mask; }
> > +
> > +static inline int
> > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > +	unsigned long mask = (1UL << nr);
> > +
> > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> mask; }
> 
> These functions need to be part of API, and have doxygen comments?

Will add doxygen comments in next version.

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs
  2019-10-15 16:51 ` [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Stephen Hemminger
@ 2019-10-18  9:01   ` Joyce Kong (Arm Technology China)
  0 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-18  9:01 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	Honnappa Nagarahalli, Gavin Hu (Arm Technology China)

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Wednesday, October 16, 2019 12:51 AM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; thomas@monjalon.net;
> jerinj@marvell.com; ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation
> APIs in PMDs
> 
> On Tue, 15 Oct 2019 15:49:56 +0800
> Joyce Kong <joyce.kong@arm.com> wrote:
> 
> > There are a lot functions of bit operations scattered in PMDs,
> > consolidate them into a common API family and applied in different
> > PMDs to reduce code duplication.
> >
> > Joyce Kong (5):
> >   lib/eal: implement the family of rte bit operation APIs
> >   net/axgbe: use common rte bit operation APIs instead
> >   net/bnx2x: use common rte bit operation APIs instead
> >   net/hinic: use common rte bit operation APIs instead
> >   net/qede: use common rte bit operation APIs instead
> >
> >  drivers/net/axgbe/axgbe_common.h           |  29 +----
> >  drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
> >  drivers/net/axgbe/axgbe_mdio.c             |  14 +-
> >  drivers/net/bnx2x/bnx2x.c                  | 202 +++++++++++++----------------
> >  drivers/net/bnx2x/bnx2x.h                  |   5 +-
> >  drivers/net/bnx2x/ecore_sp.h               |   8 +-
> >  drivers/net/hinic/base/hinic_compat.h      |  35 +----
> >  drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +--
> >  drivers/net/qede/base/bcm_osal.c           |  20 ---
> >  drivers/net/qede/base/bcm_osal.h           |  10 +-
> >  lib/librte_eal/common/Makefile             |   1 +
> >  lib/librte_eal/common/include/rte_bitops.h |  56 ++++++++
> >  lib/librte_eal/common/meson.build          |   1 +
> >  13 files changed, 180 insertions(+), 231 deletions(-)  create mode
> > 100644 lib/librte_eal/common/include/rte_bitops.h
> >
> 
> 
> This is a really good idea, and should have been done long ago.
> Could you add tests for these as well?

Yes. Will add some tests for these APIs in next version.

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-16  7:54   ` Jerin Jacob
@ 2019-10-18  9:02     ` Joyce Kong (Arm Technology China)
  2019-10-23  3:12       ` Joyce Kong (Arm Technology China)
  0 siblings, 1 reply; 139+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-18  9:02 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: dpdk-dev, nd, thomas, jerinj, ravi1.kumar, Ziyang Xuan,
	Xiaoyun Wang, Guoyang Zhou, Rasesh Mody, Shahed Shaikh,
	Honnappa Nagarahalli, Gavin Hu (Arm Technology China)

> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Wednesday, October 16, 2019 3:54 PM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> Cc: dpdk-dev <dev@dpdk.org>; nd <nd@arm.com>; thomas@monjalon.net;
> jerinj@marvell.com; ravi1.kumar@amd.com; Ziyang Xuan
> <xuanziyang2@huawei.com>; Xiaoyun Wang
> <cloud.wangxiaoyun@huawei.com>; Guoyang Zhou
> <zhouguoyang@huawei.com>; Rasesh Mody <rmody@marvell.com>;
> Shahed Shaikh <shshaikh@marvell.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bit operation APIs
> 
> On Tue, Oct 15, 2019 at 1:20 PM Joyce Kong <joyce.kong@arm.com> wrote:
> >
> > There are a lot functions of bit operations scattered and duplicated
> > in PMDs, consolidating them into a common API family is necessary.
> > Furthermore, the bit operation is mostly applied to the IO devices, so
> > use __ATOMIC_ACQ_REL to ensure the ordering.
> >
> > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > ---
> >  lib/librte_eal/common/Makefile             |  1 +
> >  lib/librte_eal/common/include/rte_bitops.h | 56
> ++++++++++++++++++++++++++++++
> >  lib/librte_eal/common/meson.build          |  1 +
> > +
> > +static inline void
> > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > +       __atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> 
> If it is specific for IO the IMO, it makes sense call the API to
> rte_io_set_bit() like rte_io_rmb
> and change the header file to rte_io_bitops.h.
> 
> The barries are only needed for IO operations. Explicitly is not conveying it in
> API name would call for using it for normal cases.
> 
> Other option could be to introduce, generic and IO specific bit operations
> operations separately.

Would do some related changes in next version.

^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (5 preceding siblings ...)
  2019-10-15 16:51 ` [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Stephen Hemminger
@ 2019-10-23  2:54 ` " Joyce Kong
  2019-10-25 13:14   ` David Marchand
  2019-10-29 16:42   ` Thomas Monjalon
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (61 subsequent siblings)
  68 siblings, 2 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

There are a lot functions of bit operations scattered in PMDs,
consolidate them into a common API family and applied in different
PMDs to reduce code duplication.

v2:
  1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
  2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
  3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
     as the API uses barriers inside and the barriers are only needed for IO operations
     (suggested by Jerin Jacob).
  4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Joyce Kong (6):
  lib/eal: implement the family of rte bit operation APIs
  test/iobitops: add io bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead

 app/test/Makefile                             |   1 +
 app/test/test_io_bitops.c                     |  86 +++++++++++
 drivers/net/axgbe/axgbe_common.h              |  29 +---
 drivers/net/axgbe/axgbe_ethdev.c              |  14 +-
 drivers/net/axgbe/axgbe_mdio.c                |  14 +-
 drivers/net/bnx2x/bnx2x.c                     | 209 ++++++++++++--------------
 drivers/net/bnx2x/bnx2x.h                     |   4 -
 drivers/net/bnx2x/ecore_sp.h                  |   9 +-
 drivers/net/hinic/base/hinic_compat.h         |  35 +----
 drivers/net/hinic/hinic_pmd_ethdev.c          |  16 +-
 drivers/net/qede/base/bcm_osal.c              |  20 ---
 drivers/net/qede/base/bcm_osal.h              |  10 +-
 lib/librte_eal/common/Makefile                |   1 +
 lib/librte_eal/common/include/rte_io_bitops.h | 112 ++++++++++++++
 lib/librte_eal/common/meson.build             |   1 +
 15 files changed, 327 insertions(+), 234 deletions(-)
 create mode 100644 app/test/test_io_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h

-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (6 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 0/6] " Joyce Kong
@ 2019-10-23  2:54 ` Joyce Kong
  2019-10-23  3:09   ` Honnappa Nagarahalli
                     ` (2 more replies)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 2/6] test/iobitops: add io bit operation test case Joyce Kong
                   ` (60 subsequent siblings)
  68 siblings, 3 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, the bit operation is
mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
to ensure the ordering.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 lib/librte_eal/common/Makefile                |   1 +
 lib/librte_eal/common/include/rte_io_bitops.h | 112 ++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build             |   1 +
 3 files changed, 114 insertions(+)
 create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index a00d4fc..3831313 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_io_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_io_bitops.h b/lib/librte_eal/common/include/rte_io_bitops.h
new file mode 100644
index 0000000..5f778b8
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_io_bitops.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_IO_BITOPS_H_
+#define _RTE_IO_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a generic API for bit operations.
+ */
+
+#include <rte_lcore.h>
+
+/**
+ * Get a bit.
+ *
+ * @param nr
+ *   The bit to get.
+ * @param addr
+ *   The address to count from.
+ * @return
+ *   The value of the bit.
+ */
+static inline int32_t
+rte_io_get_bit(uint32_t nr, uint64_t *addr)
+{
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr);
+}
+
+/**
+ * Set a bit to 1.
+ *
+ * @param nr
+ *   The bit to set.
+ * @param addr
+ *   The address to count from.
+ */
+static inline void
+rte_io_set_bit(uint32_t nr, uint64_t *addr)
+{
+	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
+}
+
+/**
+ * Set a bit to 0.
+ *
+ * @param nr
+ *   The bit to set.
+ * @param addr
+ *   The address to count from.
+ */
+static inline void
+rte_io_clear_bit(int32_t nr, uint64_t *addr)
+{
+	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
+}
+
+/**
+ * Test if a bit is 1.
+ *
+ * @param nr
+ *   The bit to test.
+ * @param addr
+ *   The address to count from.
+ * @return
+ *   1 if the bit is 1; else 0.
+ */
+static inline int32_t
+rte_io_test_bit(int32_t nr, uint64_t *addr)
+{
+	return (__atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr)) != 0;
+}
+
+/**
+ * Set a bit to 1 and return its old value.
+ *
+ * @param nr
+ *   The bit to set.
+ * @param addr
+ *   The address to count from.
+ * @return
+ *   The old value of the bit.
+ */
+static inline int32_t
+rte_io_test_and_set_bit(int32_t nr, uint64_t *addr)
+{
+	unsigned long mask = (1UL << nr);
+
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * Set a bit to 0 and return its old value.
+ *
+ * @param nr
+ *   The bit to set.
+ * @param addr
+ *   The address to count from.
+ * @return
+ *   The old value of the bit.
+ */
+static inline int32_t
+rte_io_test_and_clear_bit(int32_t nr, uint64_t *addr)
+{
+	unsigned long mask = (1UL << nr);
+
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_IO_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 386577c..0a65d04 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -52,6 +52,7 @@ common_headers = files(
 	'include/rte_alarm.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
+	'include/rte_io_bitops.h',
 	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v2 2/6] test/iobitops: add io bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (7 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-10-23  2:54 ` Joyce Kong
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (59 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Add test cases for set bit, clear bit, test and set bit,
test and clear bit operations.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 app/test/Makefile         |  1 +
 app/test/test_io_bitops.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 app/test/test_io_bitops.c

diff --git a/app/test/Makefile b/app/test/Makefile
index df7f77f..3e47c94 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_io_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/test_io_bitops.c b/app/test/test_io_bitops.c
new file mode 100644
index 0000000..c61bec7
--- /dev/null
+++ b/app/test/test_io_bitops.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_io_bitops.h>
+#include <rte_malloc.h>
+
+#include "test.h"
+
+#define MAX_BITS 32
+
+static int
+test_io_bitops_set(unsigned long *addr)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS; i++)
+		rte_io_set_bit(i, addr);
+
+	for (i = 0; i < MAX_BITS; i++)
+		if (!rte_io_get_bit(i, addr)) {
+			printf("Failed to set bit.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_io_bitops_clear(unsigned long *addr)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS; i++)
+		rte_io_clear_bit(i, addr);
+
+	for (i = 0; i < MAX_BITS; i++)
+		if (rte_io_get_bit(i, addr)) {
+			printf("Failed to clear bit.\n");
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_io_bitops_test_set_clear(unsigned long *addr)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS; i++)
+		rte_io_test_and_set_bit(i, addr);
+
+	for (i = 0; i < MAX_BITS; i++)
+		if (!rte_io_test_and_clear_bit(i, addr)) {
+			printf("Failed to set and test bit.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS; i++)
+		if (rte_io_get_bit(i, addr)) {
+			printf("Failed to test and clear bit.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_io_bitops(void)
+{
+	unsigned long *addr = rte_zmalloc(NULL, MAX_BITS, RTE_CACHE_LINE_SIZE);
+
+	if (test_io_bitops_set(addr) < 0)
+		return TEST_FAILED;
+
+	if (test_io_bitops_clear(addr) < 0)
+		return TEST_FAILED;
+
+	if (test_io_bitops_test_set_clear(addr) < 0)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(io_bitops_autotest, test_io_bitops);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (8 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 2/6] test/iobitops: add io bit operation test case Joyce Kong
@ 2019-10-23  2:54 ` Joyce Kong
  2019-10-23  3:16   ` Honnappa Nagarahalli
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 4/6] net/bnx2x: " Joyce Kong
                   ` (58 subsequent siblings)
  68 siblings, 1 reply; 139+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f1..e44ec7d 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_io_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e..8c8e5ff 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_io_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_io_clear_bit(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_io_test_bit(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5c..4164564 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_io_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_io_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_io_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_io_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_io_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_io_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v2 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (9 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-10-23  2:54 ` " Joyce Kong
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 5/6] net/hinic: " Joyce Kong
                   ` (57 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 209 +++++++++++++++++++------------------------
 drivers/net/bnx2x/bnx2x.h    |   4 -
 drivers/net/bnx2x/ecore_sp.h |   9 +-
 3 files changed, 98 insertions(+), 124 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index e1dfe60..623b2ed 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <zlib.h>
 #include <rte_string_fns.h>
+#include <rte_io_bitops.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
 #define BNX2X_PMD_VERSION_MAJOR 1
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1402,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_io_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_io_set_bit(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1433,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1463,20 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_io_set_bit(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_io_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1488,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags;
+		rte_io_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1518,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_io_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1547,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_io_set_bit(RAMROD_RX, &ramrod_flags);
+	rte_io_set_bit(RAMROD_TX, &ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1674,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_io_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1698,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1759,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1773,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_io_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1785,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,11 +1842,10 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_io_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state))
+		rte_io_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1960,12 +1935,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_io_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_io_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1949,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_io_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1959,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_io_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4263,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_io_set_bit(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,12 +4294,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_io_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_io_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4693,7 +4668,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4963,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_io_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_io_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5778,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6354,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_io_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_io_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_io_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_io_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6392,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_io_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6400,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_io_set_bit(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6410,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_io_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6423,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_io_set_bit(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6552,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6620,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_io_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_io_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6648,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_io_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6705,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_io_test_bit(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_io_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6736,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6868,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_io_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_io_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_io_set_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_io_set_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6906,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_io_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_io_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6923,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_io_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_io_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6945,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_io_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_io_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_io_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_io_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_io_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_io_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 43c6040..010699a 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1809,10 +1809,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db37..5f1e74f 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_io_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_io_test_bit(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_io_set_bit(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_io_clear_bit(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_io_test_and_clear_bit(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v2 5/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (10 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 4/6] net/bnx2x: " Joyce Kong
@ 2019-10-23  2:54 ` " Joyce Kong
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 6/6] net/qede: " Joyce Kong
                   ` (56 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/hinic/base/hinic_compat.h | 35 +----------------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++++--------
 2 files changed, 9 insertions(+), 42 deletions(-)

diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index f599947..d3957c8 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_io_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,40 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index c9a400e..c6dcfae 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -227,7 +227,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_io_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -907,7 +907,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_io_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1030,7 +1030,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_io_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1073,7 +1073,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_io_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2197,9 +2197,9 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_io_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_io_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -2236,7 +2236,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_io_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -2316,7 +2316,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_io_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v2 6/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (11 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 5/6] net/hinic: " Joyce Kong
@ 2019-10-23  2:54 ` " Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (55 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 9915df4..665833c 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -45,26 +45,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 51edc41..d6107c3 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_io_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_io_set_bit(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_io_clear_bit(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_io_test_bit(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-18  8:58     ` Joyce Kong (Arm Technology China)
@ 2019-10-23  3:07       ` Joyce Kong (Arm Technology China)
  2019-10-23  7:45         ` Morten Brørup
  0 siblings, 1 reply; 139+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-23  3:07 UTC (permalink / raw)
  To: Morten Brørup, dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China),
	Stephen Hemminger

> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > > Sent: Tuesday, October 15, 2019 9:50 AM
> > >
> > > There are a lot functions of bit operations scattered and duplicated
> > > in PMDs, consolidating them into a common API family is necessary.
> > > Furthermore, the bit operation is mostly applied to the IO devices,
> > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> >
> > Good initiative.
> >
> > >
> > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > ---
> > >  lib/librte_eal/common/Makefile             |  1 +
> > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > ++++++++++++++++++++++++++++++
> > >  lib/librte_eal/common/meson.build          |  1 +
> > >  3 files changed, 58 insertions(+)
> > >  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> > >
> > > diff --git a/lib/librte_eal/common/Makefile
> > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > > --- a/lib/librte_eal/common/Makefile
> > > +++ b/lib/librte_eal/common/Makefile
> > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
> > > INC
> > > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > > rte_fbarray.h rte_uuid.h
> > > +INC += rte_bitops.h
> > >
> > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff
> > > --git a/lib/librte_eal/common/include/rte_bitops.h
> > > b/lib/librte_eal/common/include/rte_bitops.h
> > > new file mode 100644
> > > index 0000000..4d7c5a3
> > > --- /dev/null
> > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > @@ -0,0 +1,56 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2019 Arm Corporation  */
> > > +
> > > +#ifndef _RTE_BITOPS_H_
> > > +#define _RTE_BITOPS_H_
> > > +
> > > +/**
> > > + * @file
> > > + * Bit Operations
> > > + *
> > > + * This file defines a generic API for bit operations.
> > > + */
> > > +
> > > +#include <stdint.h>
> > > +#include <rte_atomic.h>
> > > +
> > > +static inline void
> > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > +
> > > +static inline void
> > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> > > +
> > > +static inline int
> > > +rte_test_bit(int nr, unsigned long *addr) {
> > > +	int res;
> > > +	rte_mb();
> > > +	res = ((*addr) & (1UL << nr)) != 0;
> > > +	rte_mb();
> > > +
> > > +	return res;
> > > +}
> >
> > Why does rte_test_bit() not use any of the __atomic_xx functions instead?
> > E.g.:
> >
> > static inline int
> > rte_test_bit(int nr, unsigned long *addr) {
> > 	return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> >
> You re right, it's better to use __atomic_xx here to keep the consistent with
> other APIs.
> 
> > > +
> > > +static inline int
> > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > +	unsigned long mask = (1UL << nr);
> > > +
> > > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > mask; }
> > > +
> > > +static inline int
> > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > +	unsigned long mask = (1UL << nr);
> > > +
> > > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> > mask; }
> > > +#endif /* _RTE_BITOPS_H_ */
> > > diff --git a/lib/librte_eal/common/meson.build
> > > b/lib/librte_eal/common/meson.build
> > > index 386577c..a277cdf 100644
> > > --- a/lib/librte_eal/common/meson.build
> > > +++ b/lib/librte_eal/common/meson.build
> > > @@ -52,6 +52,7 @@ common_headers = files(
> > >  	'include/rte_alarm.h',
> > >  	'include/rte_branch_prediction.h',
> > >  	'include/rte_bus.h',
> > > +	'include/rte_bitops.h',
> > >  	'include/rte_bitmap.h',
> > >  	'include/rte_class.h',
> > >  	'include/rte_common.h',
> > > --
> > > 2.7.4
> > >
> >
> > These functions use unsigned long as the type of their value, like
> > they do in the PMDs.
> >
> > However, a generic bit operations library should preferably work with
> > multiple types, like the __atomic_xx functions. Or use an well defined
> > uint_NN_t type. Or have individually named functions for each type size,
> e.g.
> > rte_set_bit_32() and rte_set_bit_64().
> >
> Good suggestion! And will do this in next version.

The PMDs which use the common API now are all 32bit operation, so change
the definition to uint_32_t type instead of individually naming functions for
each type size.


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-10-23  3:09   ` Honnappa Nagarahalli
  2019-10-23  4:56   ` Jerin Jacob
  2019-10-23  7:46   ` Morten Brørup
  2 siblings, 0 replies; 139+ messages in thread
From: Honnappa Nagarahalli @ 2019-10-23  3:09 UTC (permalink / raw)
  To: Joyce Kong (Arm Technology China), dev
  Cc: nd, thomas, jerinj, stephen, mb, Gavin Hu (Arm Technology China),
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Honnappa Nagarahalli, nd

Hi Joyce,
	Thanks for the patch, few comments.

<snip>

> 
> There are a lot functions of bit operations scattered and duplicated in PMDs,
> consolidating them into a common API family is necessary. Furthermore, the
> bit operation is mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
The APIs are not taking memory ordering as a parameter. This presents the same problem as the rte_atomic_xxx APIs. IMO, the APIs should take memory ordering as a parameter.

> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  lib/librte_eal/common/Makefile                |   1 +
>  lib/librte_eal/common/include/rte_io_bitops.h | 112
> ++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build             |   1 +
>  3 files changed, 114 insertions(+)
>  create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h
> 
> diff --git a/lib/librte_eal/common/Makefile
> b/lib/librte_eal/common/Makefile index a00d4fc..3831313 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h  INC +=
> rte_service.h rte_service_component.h  INC += rte_bitmap.h rte_vfio.h
> rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
> +INC += rte_io_bitops.h
> 
>  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
> GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff --git
> a/lib/librte_eal/common/include/rte_io_bitops.h
> b/lib/librte_eal/common/include/rte_io_bitops.h
> new file mode 100644
> index 0000000..5f778b8
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_io_bitops.h
> @@ -0,0 +1,112 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#ifndef _RTE_IO_BITOPS_H_
> +#define _RTE_IO_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a generic API for bit operations.
> + */
> +
> +#include <rte_lcore.h>
> +
> +/**
> + * Get a bit.
> + *
> + * @param nr
> + *   The bit to get.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The value of the bit.
> + */
> +static inline int32_t
> +rte_io_get_bit(uint32_t nr, uint64_t *addr) {
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr); }
Some use cases might need 'relaxed' memory order for this API. So, the user of this API should be able to provide the memory order.

> +
> +/**
> + * Set a bit to 1.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + */
> +static inline void
> +rte_io_set_bit(uint32_t nr, uint64_t *addr) {
> +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
Some use cases might need 'release' or 'relaxed' memory order.
Similar requirements apply to other APIs too.

> +
> +/**
> + * Set a bit to 0.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + */
> +static inline void
> +rte_io_clear_bit(int32_t nr, uint64_t *addr) {
> +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> +
> +/**
> + * Test if a bit is 1.
> + *
> + * @param nr
> + *   The bit to test.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   1 if the bit is 1; else 0.
> + */
> +static inline int32_t
> +rte_io_test_bit(int32_t nr, uint64_t *addr) {
> +	return (__atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL <<
> nr)) != 0; }
> +
> +/**
> + * Set a bit to 1 and return its old value.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The old value of the bit.
> + */
> +static inline int32_t
> +rte_io_test_and_set_bit(int32_t nr, uint64_t *addr) {
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> mask; }
> +
> +/**
> + * Set a bit to 0 and return its old value.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The old value of the bit.
> + */
> +static inline int32_t
> +rte_io_test_and_clear_bit(int32_t nr, uint64_t *addr) {
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> mask; }
> +#endif /* _RTE_IO_BITOPS_H_ */
> diff --git a/lib/librte_eal/common/meson.build
> b/lib/librte_eal/common/meson.build
> index 386577c..0a65d04 100644
> --- a/lib/librte_eal/common/meson.build
> +++ b/lib/librte_eal/common/meson.build
> @@ -52,6 +52,7 @@ common_headers = files(
>  	'include/rte_alarm.h',
>  	'include/rte_branch_prediction.h',
>  	'include/rte_bus.h',
> +	'include/rte_io_bitops.h',
>  	'include/rte_bitmap.h',
>  	'include/rte_class.h',
>  	'include/rte_common.h',
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-18  9:02     ` Joyce Kong (Arm Technology China)
@ 2019-10-23  3:12       ` Joyce Kong (Arm Technology China)
  0 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-23  3:12 UTC (permalink / raw)
  To: Jerin Jacob, dpdk-dev
  Cc: nd, thomas, jerinj, ravi1.kumar, Ziyang Xuan, Xiaoyun Wang,
	Guoyang Zhou, Rasesh Mody, Shahed Shaikh, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China)

> > On Tue, Oct 15, 2019 at 1:20 PM Joyce Kong <joyce.kong@arm.com> wrote:
> > >
> > > There are a lot functions of bit operations scattered and duplicated
> > > in PMDs, consolidating them into a common API family is necessary.
> > > Furthermore, the bit operation is mostly applied to the IO devices,
> > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > >
> > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > ---
> > >  lib/librte_eal/common/Makefile             |  1 +
> > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > ++++++++++++++++++++++++++++++
> > >  lib/librte_eal/common/meson.build          |  1 +
> > > +
> > > +static inline void
> > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > +       __atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> >
> > If it is specific for IO the IMO, it makes sense call the API to
> > rte_io_set_bit() like rte_io_rmb
> > and change the header file to rte_io_bitops.h.
> >
> > The barries are only needed for IO operations. Explicitly is not
> > conveying it in API name would call for using it for normal cases.
> >
> > Other option could be to introduce, generic and IO specific bit
> > operations operations separately.
> 
> Would do some related changes in next version.

As bit operations are mostly applied to IO devices, change the header file
to rte_io_bitops.h to introduce IO specific bit operations now. And do this
change in v2.


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-10-23  3:16   ` Honnappa Nagarahalli
  0 siblings, 0 replies; 139+ messages in thread
From: Honnappa Nagarahalli @ 2019-10-23  3:16 UTC (permalink / raw)
  To: Joyce Kong (Arm Technology China), dev
  Cc: nd, thomas, jerinj, stephen, mb, Gavin Hu (Arm Technology China),
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Honnappa Nagarahalli, nd

<snip>

> 
> Remove its own bit operation APIs and use the common one, this can reduce
> the code duplication largely.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
> drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
>  drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
>  3 files changed, 15 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/net/axgbe/axgbe_common.h
> b/drivers/net/axgbe/axgbe_common.h
> index 34f60f1..e44ec7d 100644
> --- a/drivers/net/axgbe/axgbe_common.h
> +++ b/drivers/net/axgbe/axgbe_common.h
> @@ -22,6 +22,7 @@
>  #include <pthread.h>
> 
>  #include <rte_byteorder.h>
> +#include <rte_io_bitops.h>
>  #include <rte_memory.h>
>  #include <rte_malloc.h>
>  #include <rte_hexdump.h>
> @@ -1674,34 +1675,6 @@ do {
> 				\
>  #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
>  #define time_before_eq(a, b)	time_after_eq(b, a)
> 
> -/*---bitmap support apis---*/
> -static inline int axgbe_test_bit(int nr, volatile unsigned long *addr) -{
> -	int res;
> -
> -	rte_mb();
> -	res = ((*addr) & (1UL << nr)) != 0;
> -	rte_mb();
> -	return res;
> -}
This function uses rte_mb before and after the load. Where as the new API uses just 'acquire', please ensure 'acquire' is enough.

> -
> -static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
> -{
> -	__sync_fetch_and_or(addr, (1UL << nr));
> -}
> -
> -static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr) -{
> -	__sync_fetch_and_and(addr, ~(1UL << nr));
> -}
> -
> -static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
> -{
> -	unsigned long mask = (1UL << nr);
> -
> -	return __sync_fetch_and_and(addr, ~mask) & mask;
> -}
> -
>  static inline unsigned long msecs_to_timer_cycles(unsigned int m)  {
>  	return rte_get_timer_hz() * (m / 1000); diff --git
> a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
> index d1f160e..8c8e5ff 100644
> --- a/drivers/net/axgbe/axgbe_ethdev.c
> +++ b/drivers/net/axgbe/axgbe_ethdev.c
> @@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
>  	axgbe_dev_enable_tx(dev);
>  	axgbe_dev_enable_rx(dev);
> 
> -	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
> -	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_io_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_io_clear_bit(AXGBE_DOWN, &pdata->dev_state);
>  	return 0;
>  }
> 
> @@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
> 
>  	rte_intr_disable(&pdata->pci_dev->intr_handle);
> 
> -	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
> +	if (rte_io_test_bit(AXGBE_STOPPED, &pdata->dev_state))
>  		return;
> 
> -	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_STOPPED, &pdata->dev_state);
>  	axgbe_dev_disable_tx(dev);
>  	axgbe_dev_disable_rx(dev);
> 
>  	pdata->phy_if.phy_stop(pdata);
>  	pdata->hw_if.exit(pdata);
>  	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
> -	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_DOWN, &pdata->dev_state);
>  }
> 
>  /* Clear all resources like TX/RX queues. */ @@ -598,8 +598,8 @@
> eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
> 
>  	pdata = eth_dev->data->dev_private;
>  	/* initial state */
> -	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
> -	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_STOPPED, &pdata->dev_state);
>  	pdata->eth_dev = eth_dev;
> 
>  	pci_dev = RTE_DEV_TO_PCI(eth_dev->device); diff --git
> a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c index
> 2721e5c..4164564 100644
> --- a/drivers/net/axgbe/axgbe_mdio.c
> +++ b/drivers/net/axgbe/axgbe_mdio.c
> @@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct
> axgbe_port *pdata)  {
>  	int ret;
> 
> -	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
>  	pdata->link_check = rte_get_timer_cycles();
> 
>  	ret = pdata->phy_if.phy_impl.an_config(pdata);
> @@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port
> *pdata)
> 
>  	ret = __axgbe_phy_config_aneg(pdata);
>  	if (ret)
> -		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> +		rte_io_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
>  	else
> -		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> +		rte_io_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> 
>  	pthread_mutex_unlock(&pdata->an_mutex);
> 
> @@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port
> *pdata)
>  	unsigned int link_aneg;
>  	int an_restart;
> 
> -	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
> +	if (rte_io_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
>  		pdata->phy.link = 0;
>  		goto adjust_link;
>  	}
> @@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port
> *pdata)
>  			return;
>  		}
>  		axgbe_phy_status_result(pdata);
> -		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
> -			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata-
> >dev_state);
> +		if (rte_io_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
> +			rte_io_clear_bit(AXGBE_LINK_INIT, &pdata-
> >dev_state);
>  	} else {
> -		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
> +		if (rte_io_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
>  			axgbe_check_link_timeout(pdata);
> 
>  			if (link_aneg)
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-23  3:09   ` Honnappa Nagarahalli
@ 2019-10-23  4:56   ` Jerin Jacob
  2019-10-23  7:46   ` Morten Brørup
  2 siblings, 0 replies; 139+ messages in thread
From: Jerin Jacob @ 2019-10-23  4:56 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dpdk-dev, nd, Thomas Monjalon, Jerin Jacob, Stephen Hemminger,
	mb, Honnappa Nagarahalli, Gavin Hu, ravi1.kumar, Rasesh Mody,
	Shahed Shaikh, Ziyang Xuan, Xiaoyun Wang, Guoyang Zhou

On Wed, Oct 23, 2019 at 8:25 AM Joyce Kong <joyce.kong@arm.com> wrote:
>
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
>
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  lib/librte_eal/common/Makefile                |   1 +
>  lib/librte_eal/common/include/rte_io_bitops.h | 112 ++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build             |   1 +


Missing doc/api/doxy-api-index.md update

>  3 files changed, 114 insertions(+)
>  create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h
>

> +
> +/**
> + * Get a bit.
> + *
> + * @param nr
> + *   The bit to get.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The value of the bit.
> + */
> +static inline int32_t

Missing __rte_experimental

> +rte_io_get_bit(uint32_t nr, uint64_t *addr)
> +{
> +       return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr);
> +}
> +

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-23  3:07       ` Joyce Kong (Arm Technology China)
@ 2019-10-23  7:45         ` Morten Brørup
  2019-10-23 17:30           ` Honnappa Nagarahalli
  0 siblings, 1 reply; 139+ messages in thread
From: Morten Brørup @ 2019-10-23  7:45 UTC (permalink / raw)
  To: Joyce Kong (Arm Technology China), dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China),
	Stephen Hemminger

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong (Arm
> Technology China)
> Sent: Wednesday, October 23, 2019 5:08 AM
> 
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > >
> > > > There are a lot functions of bit operations scattered and
> duplicated
> > > > in PMDs, consolidating them into a common API family is
> necessary.
> > > > Furthermore, the bit operation is mostly applied to the IO
> devices,
> > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > >
> > > Good initiative.
> > >
> > > >
> > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > ---
> > > >  lib/librte_eal/common/Makefile             |  1 +
> > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > ++++++++++++++++++++++++++++++
> > > >  lib/librte_eal/common/meson.build          |  1 +
> > > >  3 files changed, 58 insertions(+)
> > > >  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> > > >
> > > > diff --git a/lib/librte_eal/common/Makefile
> > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > > > --- a/lib/librte_eal/common/Makefile
> > > > +++ b/lib/librte_eal/common/Makefile
> > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
> > > > INC
> > > > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > > > rte_fbarray.h rte_uuid.h
> > > > +INC += rte_bitops.h
> > > >
> > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff
> > > > --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > new file mode 100644
> > > > index 0000000..4d7c5a3
> > > > --- /dev/null
> > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > @@ -0,0 +1,56 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > +
> > > > +#ifndef _RTE_BITOPS_H_
> > > > +#define _RTE_BITOPS_H_
> > > > +
> > > > +/**
> > > > + * @file
> > > > + * Bit Operations
> > > > + *
> > > > + * This file defines a generic API for bit operations.
> > > > + */
> > > > +
> > > > +#include <stdint.h>
> > > > +#include <rte_atomic.h>
> > > > +
> > > > +static inline void
> > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > +
> > > > +static inline void
> > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> > > > +
> > > > +static inline int
> > > > +rte_test_bit(int nr, unsigned long *addr) {
> > > > +	int res;
> > > > +	rte_mb();
> > > > +	res = ((*addr) & (1UL << nr)) != 0;
> > > > +	rte_mb();
> > > > +
> > > > +	return res;
> > > > +}
> > >
> > > Why does rte_test_bit() not use any of the __atomic_xx functions
> instead?
> > > E.g.:
> > >
> > > static inline int
> > > rte_test_bit(int nr, unsigned long *addr) {
> > > 	return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > >
> > You re right, it's better to use __atomic_xx here to keep the
> consistent with
> > other APIs.
> >
> > > > +
> > > > +static inline int
> > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > +	unsigned long mask = (1UL << nr);
> > > > +
> > > > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > mask; }
> > > > +
> > > > +static inline int
> > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > +	unsigned long mask = (1UL << nr);
> > > > +
> > > > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> > > mask; }
> > > > +#endif /* _RTE_BITOPS_H_ */
> > > > diff --git a/lib/librte_eal/common/meson.build
> > > > b/lib/librte_eal/common/meson.build
> > > > index 386577c..a277cdf 100644
> > > > --- a/lib/librte_eal/common/meson.build
> > > > +++ b/lib/librte_eal/common/meson.build
> > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > >  	'include/rte_alarm.h',
> > > >  	'include/rte_branch_prediction.h',
> > > >  	'include/rte_bus.h',
> > > > +	'include/rte_bitops.h',
> > > >  	'include/rte_bitmap.h',
> > > >  	'include/rte_class.h',
> > > >  	'include/rte_common.h',
> > > > --
> > > > 2.7.4
> > > >
> > >
> > > These functions use unsigned long as the type of their value, like
> > > they do in the PMDs.
> > >
> > > However, a generic bit operations library should preferably work
> with
> > > multiple types, like the __atomic_xx functions. Or use an well
> defined
> > > uint_NN_t type. Or have individually named functions for each type
> size,
> > e.g.
> > > rte_set_bit_32() and rte_set_bit_64().
> > >
> > Good suggestion! And will do this in next version.
> 
> The PMDs which use the common API now are all 32bit operation, so
> change
> the definition to uint_32_t type instead of individually naming
> functions for
> each type size.

Unless you are certain that all current and future I/O devices only need 32 bit, it should provide variants for different types, like the rte_atomic_xxx API.

There might also be a need to support both big and little endian byte ordering? Perhaps the CPU uses a different byte ordering than the I/O device being accessed through this API. I don't know; I'm only providing half baked feedback on this point.


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-23  3:09   ` Honnappa Nagarahalli
  2019-10-23  4:56   ` Jerin Jacob
@ 2019-10-23  7:46   ` Morten Brørup
  2 siblings, 0 replies; 139+ messages in thread
From: Morten Brørup @ 2019-10-23  7:46 UTC (permalink / raw)
  To: Joyce Kong, dev
  Cc: nd, thomas, jerinj, stephen, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

> -----Original Message-----
> From: Joyce Kong [mailto:joyce.kong@arm.com]
> Sent: Wednesday, October 23, 2019 4:55 AM
> 
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  lib/librte_eal/common/Makefile                |   1 +
>  lib/librte_eal/common/include/rte_io_bitops.h | 112
> ++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build             |   1 +
>  3 files changed, 114 insertions(+)
>  create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h
> 
> diff --git a/lib/librte_eal/common/Makefile
> b/lib/librte_eal/common/Makefile
> index a00d4fc..3831313 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
>  INC += rte_service.h rte_service_component.h
>  INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
>  INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
> +INC += rte_io_bitops.h
> 
>  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> rte_prefetch.h
>  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> diff --git a/lib/librte_eal/common/include/rte_io_bitops.h
> b/lib/librte_eal/common/include/rte_io_bitops.h
> new file mode 100644
> index 0000000..5f778b8
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_io_bitops.h
> @@ -0,0 +1,112 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#ifndef _RTE_IO_BITOPS_H_
> +#define _RTE_IO_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a generic API for bit operations.
> + */

-> This file defines a generic API for I/O device bit operations.

> +
> +#include <rte_lcore.h>

This library doesn't do any lcore operations. Please use the appropriate headers.

> +
> +/**
> + * Get a bit.
> + *
> + * @param nr
> + *   The bit to get.
> + * @param addr
> + *   The address to count from.

The address to count from. -> The address holding the bit. (Applies to all functions.)

> + * @return
> + *   The value of the bit.

The description of the return value can be misinterpreted. The return value is not the value of the bit, which is 0 or 1, but the value of the word holding the bit, masked with the bit position. (Applies to all functions returning a value.)

> + */
> +static inline int32_t
> +rte_io_get_bit(uint32_t nr, uint64_t *addr)

The return type should be an unsigned type. (Applies to all functions returning a value.)

The addr type for 32 bit operations should not be uint64_t *, but uint32_t *. The __atomic_xxx functions actually use this type for something, and I think it would access the wrong 32 bits on a big endian CPU.

In some of the functions below, nr is signed (int32_t); it should be unsigned (uint32_t).

And a suggestion: Consider changing the type of nr from uint32_t to unsigned int.

> +{
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr);
> +}
> +
> +/**
> + * Set a bit to 1.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + */
> +static inline void
> +rte_io_set_bit(uint32_t nr, uint64_t *addr)
> +{
> +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * Set a bit to 0.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + */
> +static inline void
> +rte_io_clear_bit(int32_t nr, uint64_t *addr)
> +{
> +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * Test if a bit is 1.
> + *
> + * @param nr
> + *   The bit to test.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   1 if the bit is 1; else 0.
> + */
> +static inline int32_t
> +rte_io_test_bit(int32_t nr, uint64_t *addr)
> +{
> +	return (__atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr)) !=
> 0;
> +}

All the other functions in this library return a word with the bit masked. This function returns 0 or 1. I think it should return a word value, similar to the other functions.

> +
> +/**
> + * Set a bit to 1 and return its old value.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The old value of the bit.
> + */
> +static inline int32_t
> +rte_io_test_and_set_bit(int32_t nr, uint64_t *addr)
> +{
> +	unsigned long mask = (1UL << nr);

unsigned long mask -> uint32_t mask. (Also applies to other functions.)

> +
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +/**
> + * Set a bit to 0 and return its old value.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The old value of the bit.
> + */
> +static inline int32_t
> +rte_io_test_and_clear_bit(int32_t nr, uint64_t *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +#endif /* _RTE_IO_BITOPS_H_ */
> diff --git a/lib/librte_eal/common/meson.build
> b/lib/librte_eal/common/meson.build
> index 386577c..0a65d04 100644
> --- a/lib/librte_eal/common/meson.build
> +++ b/lib/librte_eal/common/meson.build
> @@ -52,6 +52,7 @@ common_headers = files(
>  	'include/rte_alarm.h',
>  	'include/rte_branch_prediction.h',
>  	'include/rte_bus.h',
> +	'include/rte_io_bitops.h',
>  	'include/rte_bitmap.h',
>  	'include/rte_class.h',
>  	'include/rte_common.h',
> --
> 2.7.4
> 


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-23  7:45         ` Morten Brørup
@ 2019-10-23 17:30           ` Honnappa Nagarahalli
  2019-10-24  3:38             ` Gavin Hu (Arm Technology China)
  0 siblings, 1 reply; 139+ messages in thread
From: Honnappa Nagarahalli @ 2019-10-23 17:30 UTC (permalink / raw)
  To: Morten Brørup, Joyce Kong (Arm Technology China), dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Gavin Hu (Arm Technology China),
	Stephen Hemminger, Honnappa Nagarahalli, nd

> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong (Arm
> > Technology China)
> > Sent: Wednesday, October 23, 2019 5:08 AM
> >
> > > > > -----Original Message-----
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > > >
> > > > > There are a lot functions of bit operations scattered and
> > duplicated
> > > > > in PMDs, consolidating them into a common API family is
> > necessary.
> > > > > Furthermore, the bit operation is mostly applied to the IO
> > devices,
> > > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > > >
> > > > Good initiative.
> > > >
> > > > >
> > > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > > ---
> > > > >  lib/librte_eal/common/Makefile             |  1 +
> > > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > > ++++++++++++++++++++++++++++++
> > > > >  lib/librte_eal/common/meson.build          |  1 +
> > > > >  3 files changed, 58 insertions(+)  create mode 100644
> > > > > lib/librte_eal/common/include/rte_bitops.h
> > > > >
> > > > > diff --git a/lib/librte_eal/common/Makefile
> > > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > > > > --- a/lib/librte_eal/common/Makefile
> > > > > +++ b/lib/librte_eal/common/Makefile
> > > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
> > > > > INC
> > > > > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > > > > rte_fbarray.h rte_uuid.h
> > > > > +INC += rte_bitops.h
> > > > >
> > > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff
> > > > > --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > > new file mode 100644
> > > > > index 0000000..4d7c5a3
> > > > > --- /dev/null
> > > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > > @@ -0,0 +1,56 @@
> > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > > +
> > > > > +#ifndef _RTE_BITOPS_H_
> > > > > +#define _RTE_BITOPS_H_
> > > > > +
> > > > > +/**
> > > > > + * @file
> > > > > + * Bit Operations
> > > > > + *
> > > > > + * This file defines a generic API for bit operations.
> > > > > + */
> > > > > +
> > > > > +#include <stdint.h>
> > > > > +#include <rte_atomic.h>
> > > > > +
> > > > > +static inline void
> > > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > > +
> > > > > +static inline void
> > > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > > +	__atomic_fetch_and(addr, ~(1UL << nr),
> __ATOMIC_ACQ_REL); }
> > > > > +
> > > > > +static inline int
> > > > > +rte_test_bit(int nr, unsigned long *addr) {
> > > > > +	int res;
> > > > > +	rte_mb();
> > > > > +	res = ((*addr) & (1UL << nr)) != 0;
> > > > > +	rte_mb();
> > > > > +
> > > > > +	return res;
> > > > > +}
> > > >
> > > > Why does rte_test_bit() not use any of the __atomic_xx functions
> > instead?
> > > > E.g.:
> > > >
> > > > static inline int
> > > > rte_test_bit(int nr, unsigned long *addr) {
> > > > 	return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > > >
> > > You re right, it's better to use __atomic_xx here to keep the
> > consistent with
> > > other APIs.
> > >
> > > > > +
> > > > > +static inline int
> > > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > > +	unsigned long mask = (1UL << nr);
> > > > > +
> > > > > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > > mask; }
> > > > > +
> > > > > +static inline int
> > > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > > +	unsigned long mask = (1UL << nr);
> > > > > +
> > > > > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL)
> &
> > > > mask; }
> > > > > +#endif /* _RTE_BITOPS_H_ */
> > > > > diff --git a/lib/librte_eal/common/meson.build
> > > > > b/lib/librte_eal/common/meson.build
> > > > > index 386577c..a277cdf 100644
> > > > > --- a/lib/librte_eal/common/meson.build
> > > > > +++ b/lib/librte_eal/common/meson.build
> > > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > > >  	'include/rte_alarm.h',
> > > > >  	'include/rte_branch_prediction.h',
> > > > >  	'include/rte_bus.h',
> > > > > +	'include/rte_bitops.h',
> > > > >  	'include/rte_bitmap.h',
> > > > >  	'include/rte_class.h',
> > > > >  	'include/rte_common.h',
> > > > > --
> > > > > 2.7.4
> > > > >
> > > >
> > > > These functions use unsigned long as the type of their value, like
> > > > they do in the PMDs.
> > > >
> > > > However, a generic bit operations library should preferably work
> > with
> > > > multiple types, like the __atomic_xx functions. Or use an well
> > defined
> > > > uint_NN_t type. Or have individually named functions for each type
> > size,
> > > e.g.
> > > > rte_set_bit_32() and rte_set_bit_64().
> > > >
> > > Good suggestion! And will do this in next version.
> >
> > The PMDs which use the common API now are all 32bit operation, so
> > change the definition to uint_32_t type instead of individually naming
> > functions for each type size.
> 
> Unless you are certain that all current and future I/O devices only need 32 bit,
> it should provide variants for different types, like the rte_atomic_xxx API.
Why not do these using macros? The __atomic_xxx APIs anyway work with multiple types. Then we do not have to provide variants for all sizes.

> 
> There might also be a need to support both big and little endian byte ordering?
> Perhaps the CPU uses a different byte ordering than the I/O device being
> accessed through this API. I don't know; I'm only providing half baked feedback
> on this point.


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-23 17:30           ` Honnappa Nagarahalli
@ 2019-10-24  3:38             ` Gavin Hu (Arm Technology China)
  2019-11-01 13:48               ` Honnappa Nagarahalli
  0 siblings, 1 reply; 139+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-10-24  3:38 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Morten Brørup,
	Joyce Kong (Arm Technology China),
	dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Stephen Hemminger, nd, nd



> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Thursday, October 24, 2019 1:30 AM
> To: Morten Brørup <mb@smartsharesystems.com>; Joyce Kong (Arm
> Technology China) <Joyce.Kong@arm.com>; dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Gavin Hu (Arm Technology
> China) <Gavin.Hu@arm.com>; Stephen Hemminger
> <stephen@networkplumber.org>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bitoperation APIs
> 
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> (Arm
> > > Technology China)
> > > Sent: Wednesday, October 23, 2019 5:08 AM
> > >
> > > > > > -----Original Message-----
> > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce
> Kong
> > > > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > > > >
> > > > > > There are a lot functions of bit operations scattered and
> > > duplicated
> > > > > > in PMDs, consolidating them into a common API family is
> > > necessary.
> > > > > > Furthermore, the bit operation is mostly applied to the IO
> > > devices,
> > > > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > > > >
> > > > > Good initiative.
> > > > >
> > > > > >
> > > > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > > > ---
> > > > > >  lib/librte_eal/common/Makefile             |  1 +
> > > > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > > > ++++++++++++++++++++++++++++++
> > > > > >  lib/librte_eal/common/meson.build          |  1 +
> > > > > >  3 files changed, 58 insertions(+)  create mode 100644
> > > > > > lib/librte_eal/common/include/rte_bitops.h
> > > > > >
> > > > > > diff --git a/lib/librte_eal/common/Makefile
> > > > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > > > > > --- a/lib/librte_eal/common/Makefile
> > > > > > +++ b/lib/librte_eal/common/Makefile
> > > > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h
> rte_time.h
> > > > > > INC
> > > > > > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > > > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > > > > > rte_fbarray.h rte_uuid.h
> > > > > > +INC += rte_bitops.h
> > > > > >
> > > > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff
> > > > > > --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > new file mode 100644
> > > > > > index 0000000..4d7c5a3
> > > > > > --- /dev/null
> > > > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > @@ -0,0 +1,56 @@
> > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > > > +
> > > > > > +#ifndef _RTE_BITOPS_H_
> > > > > > +#define _RTE_BITOPS_H_
> > > > > > +
> > > > > > +/**
> > > > > > + * @file
> > > > > > + * Bit Operations
> > > > > > + *
> > > > > > + * This file defines a generic API for bit operations.
> > > > > > + */
> > > > > > +
> > > > > > +#include <stdint.h>
> > > > > > +#include <rte_atomic.h>
> > > > > > +
> > > > > > +static inline void
> > > > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > > > +__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > > > +
> > > > > > +static inline void
> > > > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > > > +__atomic_fetch_and(addr, ~(1UL << nr),
> > __ATOMIC_ACQ_REL); }
> > > > > > +
> > > > > > +static inline int
> > > > > > +rte_test_bit(int nr, unsigned long *addr) {
> > > > > > +int res;
> > > > > > +rte_mb();
> > > > > > +res = ((*addr) & (1UL << nr)) != 0;
> > > > > > +rte_mb();
> > > > > > +
> > > > > > +return res;
> > > > > > +}
> > > > >
> > > > > Why does rte_test_bit() not use any of the __atomic_xx functions
> > > instead?
> > > > > E.g.:
> > > > >
> > > > > static inline int
> > > > > rte_test_bit(int nr, unsigned long *addr) {
> > > > > return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > > > >
> > > > You re right, it's better to use __atomic_xx here to keep the
> > > consistent with
> > > > other APIs.
> > > >
> > > > > > +
> > > > > > +static inline int
> > > > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > > > +unsigned long mask = (1UL << nr);
> > > > > > +
> > > > > > +return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > > > mask; }
> > > > > > +
> > > > > > +static inline int
> > > > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > > > +unsigned long mask = (1UL << nr);
> > > > > > +
> > > > > > +return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL)
> > &
> > > > > mask; }
> > > > > > +#endif /* _RTE_BITOPS_H_ */
> > > > > > diff --git a/lib/librte_eal/common/meson.build
> > > > > > b/lib/librte_eal/common/meson.build
> > > > > > index 386577c..a277cdf 100644
> > > > > > --- a/lib/librte_eal/common/meson.build
> > > > > > +++ b/lib/librte_eal/common/meson.build
> > > > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > > > >  'include/rte_alarm.h',
> > > > > >  'include/rte_branch_prediction.h',
> > > > > >  'include/rte_bus.h',
> > > > > > +'include/rte_bitops.h',
> > > > > >  'include/rte_bitmap.h',
> > > > > >  'include/rte_class.h',
> > > > > >  'include/rte_common.h',
> > > > > > --
> > > > > > 2.7.4
> > > > > >
> > > > >
> > > > > These functions use unsigned long as the type of their value, like
> > > > > they do in the PMDs.
> > > > >
> > > > > However, a generic bit operations library should preferably work
> > > with
> > > > > multiple types, like the __atomic_xx functions. Or use an well
> > > defined
> > > > > uint_NN_t type. Or have individually named functions for each type
> > > size,
> > > > e.g.
> > > > > rte_set_bit_32() and rte_set_bit_64().
> > > > >
> > > > Good suggestion! And will do this in next version.
> > >
> > > The PMDs which use the common API now are all 32bit operation, so
> > > change the definition to uint_32_t type instead of individually naming
> > > functions for each type size.
> >
> > Unless you are certain that all current and future I/O devices only need 32
> bit,
> > it should provide variants for different types, like the rte_atomic_xxx API.
> Why not do these using macros? The __atomic_xxx APIs anyway work with
> multiple types. Then we do not have to provide variants for all sizes.

We really come to the point for the community to give a guideline: how to generalize APIs to support multiple-sized arguments. 
Looks like macros was disliked by the community, for readability and debuggability reasons.
Besides macros, there are an alternative: _Generic https://gcc.gnu.org/onlinedocs/gccint/GENERIC.html, but it is not supported by older gcc(<4.9), this made a hard requirement for gcc/clang.

We have to compromise over all these: code duplication, readability and debuggability.
/Gavin
> >
> > There might also be a need to support both big and little endian byte
> ordering?
> > Perhaps the CPU uses a different byte ordering than the I/O device being
> > accessed through this API. I don't know; I'm only providing half baked
> feedback
> > on this point.
> 


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 0/6] " Joyce Kong
@ 2019-10-25 13:14   ` David Marchand
  2019-10-29 16:42   ` Thomas Monjalon
  1 sibling, 0 replies; 139+ messages in thread
From: David Marchand @ 2019-10-25 13:14 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, Thomas Monjalon, Jerin Jacob Kollanukkaran,
	Stephen Hemminger, mb, Honnappa Nagarahalli, Gavin Hu,
	Ravi Kumar, Rasesh Mody, Shahed Shaikh, Ziyang Xuan,
	Xiaoyun Wang, Guoyang Zhou

On Wed, Oct 23, 2019 at 4:55 AM Joyce Kong <joyce.kong@arm.com> wrote:
>
> There are a lot functions of bit operations scattered in PMDs,
> consolidate them into a common API family and applied in different
> PMDs to reduce code duplication.
>
> v2:
>   1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
>   2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
>   3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
>      as the API uses barriers inside and the barriers are only needed for IO operations
>      (suggested by Jerin Jacob).
>   4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Thanks for working on this.
This series is a cleanup and worth looking at, yet it came rather late.

Discussion and enhancement can still continue, but it will be deferred to 20.02.


--
David Marchand


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 0/6] " Joyce Kong
  2019-10-25 13:14   ` David Marchand
@ 2019-10-29 16:42   ` Thomas Monjalon
  2019-10-30  9:55     ` Gavin Hu (Arm Technology China)
  1 sibling, 1 reply; 139+ messages in thread
From: Thomas Monjalon @ 2019-10-29 16:42 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, adrien.mazarguil

23/10/2019 04:54, Joyce Kong:
> There are a lot functions of bit operations scattered in PMDs,
> consolidate them into a common API family and applied in different
> PMDs to reduce code duplication.

Please, could you look at what Adrien did in the Mellanox PMD?

http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28




^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-29 16:42   ` Thomas Monjalon
@ 2019-10-30  9:55     ` Gavin Hu (Arm Technology China)
  2019-10-30 10:17       ` Thomas Monjalon
  2019-10-30 12:32       ` Jerin Jacob
  0 siblings, 2 replies; 139+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-10-30  9:55 UTC (permalink / raw)
  To: thomas, Joyce Kong (Arm Technology China)
  Cc: dev, nd, jerinj, stephen, mb, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil, nd

Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Wednesday, October 30, 2019 12:43 AM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; jerinj@marvell.com;
> stephen@networkplumber.org; mb@smartsharesystems.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology
> China) <Gavin.Hu@arm.com>; ravi1.kumar@amd.com; rmody@marvell.com;
> shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> adrien.mazarguil@6wind.com
> Subject: Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation
> APIs in PMDs
> 
> 23/10/2019 04:54, Joyce Kong:
> > There are a lot functions of bit operations scattered in PMDs,
> > consolidate them into a common API family and applied in different
> > PMDs to reduce code duplication.
> 
> Please, could you look at what Adrien did in the Mellanox PMD?
> 
> http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
The code has less duplication, but it requires a less natural declaration of variables
http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607 
Should we take this way?
/Gavin
> 
> 


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-30  9:55     ` Gavin Hu (Arm Technology China)
@ 2019-10-30 10:17       ` Thomas Monjalon
  2019-10-30 12:32       ` Jerin Jacob
  1 sibling, 0 replies; 139+ messages in thread
From: Thomas Monjalon @ 2019-10-30 10:17 UTC (permalink / raw)
  To: Gavin Hu (Arm Technology China)
  Cc: Joyce Kong (Arm Technology China),
	dev, nd, jerinj, stephen, mb, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil

30/10/2019 10:55, Gavin Hu (Arm Technology China):
> Hi Thomas,
> 
> From: Thomas Monjalon <thomas@monjalon.net>
> > 23/10/2019 04:54, Joyce Kong:
> > > There are a lot functions of bit operations scattered in PMDs,
> > > consolidate them into a common API family and applied in different
> > > PMDs to reduce code duplication.
> > 
> > Please, could you look at what Adrien did in the Mellanox PMD?
> > 
> > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
> The code has less duplication, but it requires a less natural declaration of variables
> http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607 
> Should we take this way?

I don't know which way is best.
I suggested to read this code for 2 reasons:
1. we can be inspired
2. it may be replaced by the new common API as you did for other drivers



^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-30  9:55     ` Gavin Hu (Arm Technology China)
  2019-10-30 10:17       ` Thomas Monjalon
@ 2019-10-30 12:32       ` Jerin Jacob
  2019-10-30 13:02         ` Morten Brørup
  1 sibling, 1 reply; 139+ messages in thread
From: Jerin Jacob @ 2019-10-30 12:32 UTC (permalink / raw)
  To: Gavin Hu (Arm Technology China)
  Cc: thomas, Joyce Kong (Arm Technology China),
	dev, nd, jerinj, stephen, mb, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil

On Wed, Oct 30, 2019 at 3:25 PM Gavin Hu (Arm Technology China)
<Gavin.Hu@arm.com> wrote:
>
> Hi Thomas,
>
> > -----Original Message-----
> > From: Thomas Monjalon <thomas@monjalon.net>
> > Sent: Wednesday, October 30, 2019 12:43 AM
> > To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> > Cc: dev@dpdk.org; nd <nd@arm.com>; jerinj@marvell.com;
> > stephen@networkplumber.org; mb@smartsharesystems.com; Honnappa
> > Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology
> > China) <Gavin.Hu@arm.com>; ravi1.kumar@amd.com; rmody@marvell.com;
> > shshaikh@marvell.com; xuanziyang2@huawei.com;
> > cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> > adrien.mazarguil@6wind.com
> > Subject: Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation
> > APIs in PMDs
> >
> > 23/10/2019 04:54, Joyce Kong:
> > > There are a lot functions of bit operations scattered in PMDs,
> > > consolidate them into a common API family and applied in different
> > > PMDs to reduce code duplication.
> >
> > Please, could you look at what Adrien did in the Mellanox PMD?
> >
> > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
> The code has less duplication, but it requires a less natural declaration of variables
> http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607
> Should we take this way?


IMO, We need to consider the MACRO based scheme only as of the last resort.


> /Gavin
> >
> >
>

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-30 12:32       ` Jerin Jacob
@ 2019-10-30 13:02         ` Morten Brørup
  2019-10-31 10:39           ` Gavin Hu (Arm Technology China)
  0 siblings, 1 reply; 139+ messages in thread
From: Morten Brørup @ 2019-10-30 13:02 UTC (permalink / raw)
  To: Jerin Jacob, Gavin Hu (Arm Technology China)
  Cc: thomas, Joyce Kong (Arm Technology China),
	dev, nd, jerinj, stephen, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jerin Jacob
> Sent: Wednesday, October 30, 2019 1:33 PM
> 
> On Wed, Oct 30, 2019 at 3:25 PM Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com> wrote:
> >
> > Hi Thomas,
> >
> > > -----Original Message-----
> > > From: Thomas Monjalon <thomas@monjalon.net>
> > > Sent: Wednesday, October 30, 2019 12:43 AM
> > >
> > > 23/10/2019 04:54, Joyce Kong:
> > > > There are a lot functions of bit operations scattered in PMDs,
> > > > consolidate them into a common API family and applied in different
> > > > PMDs to reduce code duplication.
> > >
> > > Please, could you look at what Adrien did in the Mellanox PMD?
> > >
> > >
> http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
> > The code has less duplication, but it requires a less natural declaration
> of variables
> > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607
> > Should we take this way?
> 
> 
> IMO, We need to consider the MACRO based scheme only as of the last resort.
> 

I agree.

The EAL library already has an I/O device memory access API, i.e. with functionality closely related to the proposed I/O device bit operation API:
http://code.dpdk.org/dpdk/latest/source/lib/librte_eal/common/include/generic/rte_io.h

I would prefer a similar approach, and API familiarity would be my strongest argument.


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-30 13:02         ` Morten Brørup
@ 2019-10-31 10:39           ` Gavin Hu (Arm Technology China)
  0 siblings, 0 replies; 139+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-10-31 10:39 UTC (permalink / raw)
  To: Morten Brørup, Jerin Jacob
  Cc: thomas, Joyce Kong (Arm Technology China),
	dev, nd, jerinj, stephen, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil, nd

> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Wednesday, October 30, 2019 9:02 PM
> To: Jerin Jacob <jerinjacobk@gmail.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>
> Cc: thomas@monjalon.net; Joyce Kong (Arm Technology China)
> <Joyce.Kong@arm.com>; dev@dpdk.org; nd <nd@arm.com>;
> jerinj@marvell.com; stephen@networkplumber.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; ravi1.kumar@amd.com;
> rmody@marvell.com; shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> adrien.mazarguil@6wind.com
> Subject: RE: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation
> APIs in PMDs
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jerin Jacob
> > Sent: Wednesday, October 30, 2019 1:33 PM
> >
> > On Wed, Oct 30, 2019 at 3:25 PM Gavin Hu (Arm Technology China)
> > <Gavin.Hu@arm.com> wrote:
> > >
> > > Hi Thomas,
> > >
> > > > -----Original Message-----
> > > > From: Thomas Monjalon <thomas@monjalon.net>
> > > > Sent: Wednesday, October 30, 2019 12:43 AM
> > > >
> > > > 23/10/2019 04:54, Joyce Kong:
> > > > > There are a lot functions of bit operations scattered in PMDs,
> > > > > consolidate them into a common API family and applied in different
> > > > > PMDs to reduce code duplication.
> > > >
> > > > Please, could you look at what Adrien did in the Mellanox PMD?
> > > >
> > > >
> > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
> > > The code has less duplication, but it requires a less natural declaration
> > of variables
> > > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607
> > > Should we take this way?
> >
> >
> > IMO, We need to consider the MACRO based scheme only as of the last resort.
> >
> 
> I agree.
> 
> The EAL library already has an I/O device memory access API, i.e. with
> functionality closely related to the proposed I/O device bit operation API:
> http://code.dpdk.org/dpdk/latest/source/lib/librte_eal/common/include/gene
> ric/rte_io.h
> 
> I would prefer a similar approach, and API familiarity would be my strongest
> argument.
Yes, this is a more natural way, and engineers are more familiar with the APIs.
We will take this way as more people vote for this.
Thanks Thomas also for your comment, we are inspired by this code, we will add assert() also to guarantee the 'bit' argument is in the valid range.
We used this common API for some PMDs, but not extensively, the reason is we want to finalize the API firstly(with your comments coming) and then propagate later.
/Gavin   



^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-24  3:38             ` Gavin Hu (Arm Technology China)
@ 2019-11-01 13:48               ` Honnappa Nagarahalli
  2019-11-03 15:45                 ` Gavin Hu (Arm Technology China)
  0 siblings, 1 reply; 139+ messages in thread
From: Honnappa Nagarahalli @ 2019-11-01 13:48 UTC (permalink / raw)
  To: Gavin Hu (Arm Technology China),
	Morten Brørup, Joyce Kong (Arm Technology China),
	dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Stephen Hemminger,
	Honnappa Nagarahalli, nd

> >
> > >
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > (Arm
> > > > Technology China)
> > > > Sent: Wednesday, October 23, 2019 5:08 AM
> > > >
> > > > > > > -----Original Message-----
> > > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce
> > Kong
> > > > > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > > > > >
> > > > > > > There are a lot functions of bit operations scattered and
> > > > duplicated
> > > > > > > in PMDs, consolidating them into a common API family is
> > > > necessary.
> > > > > > > Furthermore, the bit operation is mostly applied to the IO
> > > > devices,
> > > > > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > > > > >
> > > > > > Good initiative.
> > > > > >
> > > > > > >
> > > > > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > > > > ---
> > > > > > >  lib/librte_eal/common/Makefile             |  1 +
> > > > > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > > > > ++++++++++++++++++++++++++++++
> > > > > > >  lib/librte_eal/common/meson.build          |  1 +
> > > > > > >  3 files changed, 58 insertions(+)  create mode 100644
> > > > > > > lib/librte_eal/common/include/rte_bitops.h
> > > > > > >
> > > > > > > diff --git a/lib/librte_eal/common/Makefile
> > > > > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8
> > > > > > > 100644
> > > > > > > --- a/lib/librte_eal/common/Makefile
> > > > > > > +++ b/lib/librte_eal/common/Makefile
> > > > > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h
> > rte_time.h
> > > > > > > INC
> > > > > > > += rte_service.h rte_service_component.h  INC +=
> > > > > > > +rte_bitmap.h
> > > > > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC +=
> > > > > > > rte_reciprocal.h rte_fbarray.h rte_uuid.h
> > > > > > > +INC += rte_bitops.h
> > > > > > >
> > > > > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> > > > > > > diff --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > new file mode 100644
> > > > > > > index 0000000..4d7c5a3
> > > > > > > --- /dev/null
> > > > > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > @@ -0,0 +1,56 @@
> > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > > > > +
> > > > > > > +#ifndef _RTE_BITOPS_H_
> > > > > > > +#define _RTE_BITOPS_H_
> > > > > > > +
> > > > > > > +/**
> > > > > > > + * @file
> > > > > > > + * Bit Operations
> > > > > > > + *
> > > > > > > + * This file defines a generic API for bit operations.
> > > > > > > + */
> > > > > > > +
> > > > > > > +#include <stdint.h>
> > > > > > > +#include <rte_atomic.h>
> > > > > > > +
> > > > > > > +static inline void
> > > > > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > > > > +__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > > > > +
> > > > > > > +static inline void
> > > > > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > > > > +__atomic_fetch_and(addr, ~(1UL << nr),
> > > __ATOMIC_ACQ_REL); }
> > > > > > > +
> > > > > > > +static inline int
> > > > > > > +rte_test_bit(int nr, unsigned long *addr) { int res;
> > > > > > > +rte_mb(); res = ((*addr) & (1UL << nr)) != 0; rte_mb();
> > > > > > > +
> > > > > > > +return res;
> > > > > > > +}
> > > > > >
> > > > > > Why does rte_test_bit() not use any of the __atomic_xx
> > > > > > functions
> > > > instead?
> > > > > > E.g.:
> > > > > >
> > > > > > static inline int
> > > > > > rte_test_bit(int nr, unsigned long *addr) { return
> > > > > > __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > > > > >
> > > > > You re right, it's better to use __atomic_xx here to keep the
> > > > consistent with
> > > > > other APIs.
> > > > >
> > > > > > > +
> > > > > > > +static inline int
> > > > > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > > > > +unsigned long mask = (1UL << nr);
> > > > > > > +
> > > > > > > +return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > > > > mask; }
> > > > > > > +
> > > > > > > +static inline int
> > > > > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > > > > +unsigned long mask = (1UL << nr);
> > > > > > > +
> > > > > > > +return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL)
> > > &
> > > > > > mask; }
> > > > > > > +#endif /* _RTE_BITOPS_H_ */
> > > > > > > diff --git a/lib/librte_eal/common/meson.build
> > > > > > > b/lib/librte_eal/common/meson.build
> > > > > > > index 386577c..a277cdf 100644
> > > > > > > --- a/lib/librte_eal/common/meson.build
> > > > > > > +++ b/lib/librte_eal/common/meson.build
> > > > > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > > > > > 'include/rte_alarm.h',  'include/rte_branch_prediction.h',
> > > > > > >  'include/rte_bus.h',
> > > > > > > +'include/rte_bitops.h',
> > > > > > >  'include/rte_bitmap.h',
> > > > > > >  'include/rte_class.h',
> > > > > > >  'include/rte_common.h',
> > > > > > > --
> > > > > > > 2.7.4
> > > > > > >
> > > > > >
> > > > > > These functions use unsigned long as the type of their value,
> > > > > > like they do in the PMDs.
> > > > > >
> > > > > > However, a generic bit operations library should preferably
> > > > > > work
> > > > with
> > > > > > multiple types, like the __atomic_xx functions. Or use an well
> > > > defined
> > > > > > uint_NN_t type. Or have individually named functions for each
> > > > > > type
> > > > size,
> > > > > e.g.
> > > > > > rte_set_bit_32() and rte_set_bit_64().
> > > > > >
> > > > > Good suggestion! And will do this in next version.
> > > >
> > > > The PMDs which use the common API now are all 32bit operation, so
> > > > change the definition to uint_32_t type instead of individually
> > > > naming functions for each type size.
> > >
> > > Unless you are certain that all current and future I/O devices only
> > > need 32
> > bit,
> > > it should provide variants for different types, like the rte_atomic_xxx API.
> > Why not do these using macros? The __atomic_xxx APIs anyway work with
> > multiple types. Then we do not have to provide variants for all sizes.
> 
> We really come to the point for the community to give a guideline: how to
> generalize APIs to support multiple-sized arguments.
> Looks like macros was disliked by the community, for readability and
> debuggability reasons.
IMO, it should not be considered as a blanket ban on using macros. It should be considered case by case basis. For ex: I do not see a point in writing the same API for 32b/64b/128b especially when the APIs are one liners.

> Besides macros, there are an alternative: _Generic
> https://gcc.gnu.org/onlinedocs/gccint/GENERIC.html, but it is not supported
> by older gcc(<4.9), this made a hard requirement for gcc/clang.
> 
> We have to compromise over all these: code duplication, readability and
> debuggability.
> /Gavin
> > >
> > > There might also be a need to support both big and little endian
> > > byte
> > ordering?
> > > Perhaps the CPU uses a different byte ordering than the I/O device
> > > being accessed through this API. I don't know; I'm only providing
> > > half baked
> > feedback
> > > on this point.
> >
> 


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-11-01 13:48               ` Honnappa Nagarahalli
@ 2019-11-03 15:45                 ` Gavin Hu (Arm Technology China)
  0 siblings, 0 replies; 139+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-11-03 15:45 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Morten Brørup,
	Joyce Kong (Arm Technology China),
	dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Stephen Hemminger, nd, nd

Hi Honnappa,
> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Friday, November 1, 2019 9:48 PM
> To: Gavin Hu (Arm Technology China) <Gavin.Hu@arm.com>; Morten
> Brørup <mb@smartsharesystems.com>; Joyce Kong (Arm Technology China)
> <Joyce.Kong@arm.com>; dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Stephen Hemminger
> <stephen@networkplumber.org>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bitoperation APIs
> 
> > >
> > > >
> > > > > -----Original Message-----
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > > (Arm
> > > > > Technology China)
> > > > > Sent: Wednesday, October 23, 2019 5:08 AM
> > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce
> > > Kong
> > > > > > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > > > > > >
> > > > > > > > There are a lot functions of bit operations scattered and
> > > > > duplicated
> > > > > > > > in PMDs, consolidating them into a common API family is
> > > > > necessary.
> > > > > > > > Furthermore, the bit operation is mostly applied to the IO
> > > > > devices,
> > > > > > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > > > > > >
> > > > > > > Good initiative.
> > > > > > >
> > > > > > > >
> > > > > > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > > > > > ---
> > > > > > > >  lib/librte_eal/common/Makefile             |  1 +
> > > > > > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > > > > > ++++++++++++++++++++++++++++++
> > > > > > > >  lib/librte_eal/common/meson.build          |  1 +
> > > > > > > >  3 files changed, 58 insertions(+)  create mode 100644
> > > > > > > > lib/librte_eal/common/include/rte_bitops.h
> > > > > > > >
> > > > > > > > diff --git a/lib/librte_eal/common/Makefile
> > > > > > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8
> > > > > > > > 100644
> > > > > > > > --- a/lib/librte_eal/common/Makefile
> > > > > > > > +++ b/lib/librte_eal/common/Makefile
> > > > > > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h
> > > rte_time.h
> > > > > > > > INC
> > > > > > > > += rte_service.h rte_service_component.h  INC +=
> > > > > > > > +rte_bitmap.h
> > > > > > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC +=
> > > > > > > > rte_reciprocal.h rte_fbarray.h rte_uuid.h
> > > > > > > > +INC += rte_bitops.h
> > > > > > > >
> > > > > > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > > > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> > > > > > > > diff --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > > new file mode 100644
> > > > > > > > index 0000000..4d7c5a3
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > > @@ -0,0 +1,56 @@
> > > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > > > > > +
> > > > > > > > +#ifndef _RTE_BITOPS_H_
> > > > > > > > +#define _RTE_BITOPS_H_
> > > > > > > > +
> > > > > > > > +/**
> > > > > > > > + * @file
> > > > > > > > + * Bit Operations
> > > > > > > > + *
> > > > > > > > + * This file defines a generic API for bit operations.
> > > > > > > > + */
> > > > > > > > +
> > > > > > > > +#include <stdint.h>
> > > > > > > > +#include <rte_atomic.h>
> > > > > > > > +
> > > > > > > > +static inline void
> > > > > > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > > > > > +__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > > > > > +
> > > > > > > > +static inline void
> > > > > > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > > > > > +__atomic_fetch_and(addr, ~(1UL << nr),
> > > > __ATOMIC_ACQ_REL); }
> > > > > > > > +
> > > > > > > > +static inline int
> > > > > > > > +rte_test_bit(int nr, unsigned long *addr) { int res;
> > > > > > > > +rte_mb(); res = ((*addr) & (1UL << nr)) != 0; rte_mb();
> > > > > > > > +
> > > > > > > > +return res;
> > > > > > > > +}
> > > > > > >
> > > > > > > Why does rte_test_bit() not use any of the __atomic_xx
> > > > > > > functions
> > > > > instead?
> > > > > > > E.g.:
> > > > > > >
> > > > > > > static inline int
> > > > > > > rte_test_bit(int nr, unsigned long *addr) { return
> > > > > > > __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > > > > > >
> > > > > > You re right, it's better to use __atomic_xx here to keep the
> > > > > consistent with
> > > > > > other APIs.
> > > > > >
> > > > > > > > +
> > > > > > > > +static inline int
> > > > > > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > > > > > +unsigned long mask = (1UL << nr);
> > > > > > > > +
> > > > > > > > +return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > > > > > mask; }
> > > > > > > > +
> > > > > > > > +static inline int
> > > > > > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > > > > > +unsigned long mask = (1UL << nr);
> > > > > > > > +
> > > > > > > > +return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL)
> > > > &
> > > > > > > mask; }
> > > > > > > > +#endif /* _RTE_BITOPS_H_ */
> > > > > > > > diff --git a/lib/librte_eal/common/meson.build
> > > > > > > > b/lib/librte_eal/common/meson.build
> > > > > > > > index 386577c..a277cdf 100644
> > > > > > > > --- a/lib/librte_eal/common/meson.build
> > > > > > > > +++ b/lib/librte_eal/common/meson.build
> > > > > > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > > > > > > 'include/rte_alarm.h',  'include/rte_branch_prediction.h',
> > > > > > > >  'include/rte_bus.h',
> > > > > > > > +'include/rte_bitops.h',
> > > > > > > >  'include/rte_bitmap.h',
> > > > > > > >  'include/rte_class.h',
> > > > > > > >  'include/rte_common.h',
> > > > > > > > --
> > > > > > > > 2.7.4
> > > > > > > >
> > > > > > >
> > > > > > > These functions use unsigned long as the type of their value,
> > > > > > > like they do in the PMDs.
> > > > > > >
> > > > > > > However, a generic bit operations library should preferably
> > > > > > > work
> > > > > with
> > > > > > > multiple types, like the __atomic_xx functions. Or use an well
> > > > > defined
> > > > > > > uint_NN_t type. Or have individually named functions for each
> > > > > > > type
> > > > > size,
> > > > > > e.g.
> > > > > > > rte_set_bit_32() and rte_set_bit_64().
> > > > > > >
> > > > > > Good suggestion! And will do this in next version.
> > > > >
> > > > > The PMDs which use the common API now are all 32bit operation, so
> > > > > change the definition to uint_32_t type instead of individually
> > > > > naming functions for each type size.
> > > >
> > > > Unless you are certain that all current and future I/O devices only
> > > > need 32
> > > bit,
> > > > it should provide variants for different types, like the rte_atomic_xxx
> API.
> > > Why not do these using macros? The __atomic_xxx APIs anyway work
> with
> > > multiple types. Then we do not have to provide variants for all sizes.
> >
> > We really come to the point for the community to give a guideline: how to
> > generalize APIs to support multiple-sized arguments.
> > Looks like macros was disliked by the community, for readability and
> > debuggability reasons.
> IMO, it should not be considered as a blanket ban on using macros. It should
> be considered case by case basis. For ex: I do not see a point in writing the
> same API for 32b/64b/128b especially when the APIs are one liners.
Jerin and Morten have different opinions, they thought the MACRO based scheme only as of the last resort. 
Another argument is the API familiarity(similar to rte io read APIs).
Joyce made a new version and let's see how the community balance the duplication and other considerations. 
/Gavin
> 
> > Besides macros, there are an alternative: _Generic
> > https://gcc.gnu.org/onlinedocs/gccint/GENERIC.html, but it is not
> supported
> > by older gcc(<4.9), this made a hard requirement for gcc/clang.
> >
> > We have to compromise over all these: code duplication, readability and
> > debuggability.
> > /Gavin
> > > >
> > > > There might also be a need to support both big and little endian
> > > > byte
> > > ordering?
> > > > Perhaps the CPU uses a different byte ordering than the I/O device
> > > > being accessed through this API. I don't know; I'm only providing
> > > > half baked
> > > feedback
> > > > on this point.
> > >
> >
> 


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v3 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (12 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 6/6] net/qede: " Joyce Kong
@ 2019-11-18 10:06 ` Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (54 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered in PMDs,
consolidate them into a common API family and applied in different
PMDs to reduce code duplication.

v3:
  1. Change the API's head file back to rte_bitops.h, then implement both 32-bit and
     64-bit operations with and without C11 atomic memory ordering.
  2. Add multi-core test case for bit operations which implemented with memory ordering.
  3. Modify the doc of both APIs and test cases.

v2:
  1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
  2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
  3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
     as the API uses barriers inside and the barriers are only needed for IO operations
     (suggested by Jerin Jacob).
  4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Joyce Kong (6):
  lib/eal: implement the family of rte bit operation APIs
  test/bitops: add bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead

 app/test/Makefile                          |   1 +
 app/test/autotest_data.py                  |   6 +
 app/test/meson.build                       |   2 +
 app/test/test_bitops.c                     | 303 +++++++++++++
 doc/api/doxy-api-index.md                  |   3 +-
 drivers/net/axgbe/axgbe_common.h           |  29 +-
 drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
 drivers/net/axgbe/axgbe_mdio.c             |  14 +-
 drivers/net/bnx2x/bnx2x.c                  | 209 ++++-----
 drivers/net/bnx2x/bnx2x.h                  |   4 -
 drivers/net/bnx2x/ecore_sp.h               |   9 +-
 drivers/net/hinic/Makefile                 |   1 +
 drivers/net/hinic/base/hinic_compat.h      |  33 +-
 drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +-
 drivers/net/hinic/meson.build              |   2 +
 drivers/net/qede/base/bcm_osal.c           |  20 -
 drivers/net/qede/base/bcm_osal.h           |  10 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   1 +
 20 files changed, 919 insertions(+), 233 deletions(-)
 create mode 100644 app/test/test_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (13 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-11-18 10:06 ` Joyce Kong
  2019-11-18 10:52   ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 2/6] test/bitops: add bit operation test case Joyce Kong
                   ` (53 subsequent siblings)
  68 siblings, 1 reply; 139+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, when the bit operation
is applied to the IO devices, use __ATOMIC_ACQ_REL to
ensure the ordering for io bit operation.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 doc/api/doxy-api-index.md                  |   3 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   1 +
 4 files changed, 478 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index dff496be0..1aed266d3 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -181,4 +181,5 @@ The public API headers are grouped by topics:
   [common]             (@ref rte_common.h),
   [experimental APIs]  (@ref rte_compat.h),
   [ABI versioning]     (@ref rte_function_versioning.h),
-  [version]            (@ref rte_version.h)
+  [version]            (@ref rte_version.h),
+  [bitops]             (@ref rte_bitops.h)
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index c2c6d92cd..dd025c130 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_bitops.h b/lib/librte_eal/common/include/rte_bitops.h
new file mode 100644
index 000000000..16c0a23f7
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitops.h
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a API for bit operations without/with memory ordering.
+ */
+
+#include <stdint.h>
+#include <assert.h>
+#include <rte_compat.h>
+
+/*---------------------------- 32 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/*---------------------------- 64 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index d6a149bec..e2f9c163c 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -52,6 +52,7 @@ common_headers = files(
 	'include/rte_alarm.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
+	'include/rte_bitops.h',
 	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v3 2/6] test/bitops: add bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (14 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-18 10:06 ` Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (52 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Add test cases for set bit, clear bit, test and set bit,
test and clear bit operations.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 app/test/Makefile         |   1 +
 app/test/autotest_data.py |   6 +
 app/test/meson.build      |   2 +
 app/test/test_bitops.c    | 303 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 312 insertions(+)
 create mode 100644 app/test/test_bitops.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 57930c00b..4f3327492 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 6deb97bcc..e06344087 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -404,6 +404,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Bitops autotest",
+        "Command": "bitops_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Hash multiwriter autotest",
         "Command": "hash_multiwriter_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index ff59c3131..33b41353c 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -14,6 +14,7 @@ test_sources = files('commands.c',
 	'test_atomic.c',
 	'test_barrier.c',
 	'test_bitratestats.c',
+	'test_bitops.c',
 	'test_bpf.c',
 	'test_byteorder.c',
 	'test_cmdline.c',
@@ -167,6 +168,7 @@ fast_test_names = [
         'alarm_autotest',
         'atomic_autotest',
         'byteorder_autotest',
+        'bitops_autotest',
         'cmdline_autotest',
         'common_autotest',
         'cpuflags_autotest',
diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
new file mode 100644
index 000000000..9d0ac0299
--- /dev/null
+++ b/app/test/test_bitops.c
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_bitops.h>
+#include <rte_launch.h>
+#include "test.h"
+
+unsigned long val32 = 1UL << 10;
+unsigned long val64 = 1UL << 33;
+unsigned int synchro;
+unsigned int count32;
+unsigned int count64;
+
+#define MAX_BITS_32 32
+#define MAX_BITS_64 64
+/*
+ * Bitops functions
+ * ================
+ *
+ * - The main test function performs several subtests.
+ * - For relaxed version, check bit operations on one core.
+ *   - Initialize valXX to specified values, then set each bit of valXX
+ *     to 1 one by one in "test_bitops_set_relaxed".
+ *
+ *   - Clear each bit of valXX to 0 one by one in "test_bitops_clear_relaxed".
+ *
+ *   - Function "test_bitops_test_set_clear_relaxed" checks whether each bit
+ *     of valXX can do "test and set" and "test and clear" correctly.
+ *
+ * - For C11 atomic barrier version, check bit operations on multi cores.
+ *   - Per bit of valXX is set to 1, then cleared to 0 on each core in
+ *     "test_bitops_set_clear". The function checks that once all lcores finish
+ *     their set_clear, the value of valXX would still be zero.
+ *
+ *   - The cores are waiting for a synchro which is triggered by the main test
+ *     function. Then all cores would do "rte_test_and_set_bitXX" or
+ *     "rte_test_and_clear_bitXX" at the same time, "countXX" which is checked
+ *     as the result later would inc by one or not according to the original
+ *     bit value.
+ *
+ */
+
+static int
+test_bitops_set_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_test_set_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_test_and_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_clear_bit32_relaxed(i, &val32)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_test_and_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_clear_bit64_relaxed(i, &val64)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_set_clear(__attribute__((unused)) void *arg)
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32(i, &val32);
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32(i, &val32);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64(i, &val64);
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64(i, &val64);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then set it to 1.
+ * This functions checks that if the target bit is equal to 0, set it to 1 and
+ * increase the variable of "countXX" by one. If it is equal to 1, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_set(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_set_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_set_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then clear it to 0.
+ * This functions checks that if the target bit is equal to 1, clear it to 0 and
+ * increase the variable of "countXX" by one. If it is equal to 0, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_clear(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_test_and_clear_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_test_and_clear_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops(void)
+{
+	 __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+	 __atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	 __atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	if (test_bitops_set_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_test_set_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+
+	rte_eal_mp_remote_launch(test_bitops_set_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32(i, &val32)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64(i, &val64)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_set_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 0, set it to 1 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 1, still set
+	 * it to 1 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 0.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_set_bitXX" correctly.
+	 */
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_set, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_clear_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 1, clear it to 0 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 0, still clear
+	 * it to 0 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 1.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_clear_bitXX" correctly.
+	 */
+
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(bitops_autotest, test_bitops);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v3 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (15 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 2/6] test/bitops: add bit operation test case Joyce Kong
@ 2019-11-18 10:06 ` Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 4/6] net/bnx2x: " Joyce Kong
                   ` (51 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f156..9cabda875 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e79..c3744bbf9 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit32(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit32(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_get_bit32(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit32(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit32(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit32(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit32(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5cc9..af7a1ecb7 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit32(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit32(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit32(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_get_bit32(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_get_bit32(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit32(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_get_bit32(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v3 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (16 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-11-18 10:06 ` " Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 5/6] net/qede: " Joyce Kong
                   ` (50 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 209 ++++++++++++++++-------------------
 drivers/net/bnx2x/bnx2x.h    |   4 -
 drivers/net/bnx2x/ecore_sp.h |   9 +-
 3 files changed, 98 insertions(+), 124 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index ed31335ac..979eed988 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <zlib.h>
 #include <rte_string_fns.h>
+#include <rte_bitops.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
 #define BNX2X_PMD_VERSION_MAJOR 1
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1402,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit32(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1433,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1463,20 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit32(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit32(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1488,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1518,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit32(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1547,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32(RAMROD_RX, &ramrod_flags);
+	rte_set_bit32(RAMROD_TX, &ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1674,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit32(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1698,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1759,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1773,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit32(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1785,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,11 +1842,10 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_get_bit32(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state))
+		rte_set_bit32(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1960,12 +1935,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit32(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit32(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1949,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit32(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1959,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit32(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4263,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit32(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,12 +4294,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit32(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit32(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4693,7 +4668,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4963,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit32(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit32(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5778,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6354,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6392,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit32(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6400,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit32(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit32(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6410,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit32(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit32(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6423,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit32(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit32(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit32(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit32(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6552,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6620,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit32(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit32(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6648,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit32(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6705,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_get_bit32(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit32(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6736,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6868,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit32(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6906,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_get_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_get_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6923,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_get_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_test_and_clear_bit32(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6945,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_get_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_get_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_get_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_get_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_get_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_get_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 3383c7675..e6e66e870 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1809,10 +1809,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db377a..59a2e8e39 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_get_bit32(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit32(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit32(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_test_and_clear_bit32(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v3 5/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (17 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 4/6] net/bnx2x: " Joyce Kong
@ 2019-11-18 10:06 ` " Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 6/6] net/hinic: " Joyce Kong
                   ` (49 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 48d016e24..19457d7c0 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -46,26 +46,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 0f09557cf..d2975c8cd 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit32(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit32(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_get_bit32(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v3 6/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (18 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 5/6] net/qede: " Joyce Kong
@ 2019-11-18 10:06 ` " Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (48 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/hinic/Makefile            |  1 +
 drivers/net/hinic/base/hinic_compat.h | 33 +--------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++-------
 drivers/net/hinic/meson.build         |  2 ++
 4 files changed, 12 insertions(+), 40 deletions(-)

diff --git a/drivers/net/hinic/Makefile b/drivers/net/hinic/Makefile
index b78fd8d53..97f429804 100644
--- a/drivers/net/hinic/Makefile
+++ b/drivers/net/hinic/Makefile
@@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_hinic.a
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 
 ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index e4a7f12d1..c0a0b3ea2 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,38 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	res = ((*addr) & (1UL << nr)) != 0;
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index 072fec339..313d8ff2d 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -269,7 +269,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_get_bit32(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -1075,7 +1075,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit32(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1200,7 +1200,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit32(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1245,7 +1245,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit32(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2844,7 +2844,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit32(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -3045,7 +3045,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit32(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* initialize filter info */
 	filter_info = &nic_dev->filter;
@@ -3057,7 +3057,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 	TAILQ_INIT(&nic_dev->filter_fdir_rule_list);
 	TAILQ_INIT(&nic_dev->hinic_flow_list);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit32(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -3113,7 +3113,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit32(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/net/hinic/meson.build b/drivers/net/hinic/meson.build
index bc7e24639..8c7ee9dfc 100644
--- a/drivers/net/hinic/meson.build
+++ b/drivers/net/hinic/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Huawei Technologies Co., Ltd
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bitoperation APIs
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-18 10:52   ` Morten Brørup
  2019-11-19  9:22     ` Joyce Kong (Arm Technology China)
  0 siblings, 1 reply; 139+ messages in thread
From: Morten Brørup @ 2019-11-18 10:52 UTC (permalink / raw)
  To: Joyce Kong, dev
  Cc: nd, thomas, jerinj, stephen, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> Sent: Monday, November 18, 2019 11:07 AM
> 

[snip]

> +++ b/lib/librte_eal/common/include/rte_bitops.h
> @@ -0,0 +1,474 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#ifndef _RTE_BITOPS_H_
> +#define _RTE_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a API for bit operations without/with memory
> ordering.
> + */
> +
> +#include <stdint.h>
> +#include <assert.h>
> +#include <rte_compat.h>
> +
> +/*---------------------------- 32 bit operations ---------------------
> -------*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 32-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_get_bit32_relaxed(unsigned int nr, unsigned long *addr)
> +{
> +	assert(nr < 32);
> +
> +	uint32_t mask = 1UL << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
> +}

Address pointer should be: uint32_t *addr.
Likewise in the other 32 bit functions.

Use RTE_ASSERT() instead of assert().
Likewise in all other functions.

When setting the mask, consider using UINT32_C(1) from <stdint.h> instead of 1UL.

[snip]

> +
> +/*---------------------------- 64 bit operations ---------------------
> -------*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 64-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_get_bit64_relaxed(unsigned int nr, unsigned long *addr)
> +{
> +	assert(nr < 64);
> +
> +	uint64_t mask = 1UL << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
> +}

Address pointer should be: uint64_t *addr.
Likewise in the other 64 bit functions.

Mask should be 1ULL, not 1UL. Or use UINT64_C(1) from <stdint.h> instead.
Likewise in the other 64 bit functions.

[snip]


Med venlig hilsen / kind regards
- Morten Brørup




^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bitoperation APIs
  2019-11-18 10:52   ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
@ 2019-11-19  9:22     ` Joyce Kong (Arm Technology China)
  0 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-11-19  9:22 UTC (permalink / raw)
  To: Morten Brørup, dev
  Cc: nd, thomas, jerinj, stephen, david.marchand,
	Honnappa Nagarahalli, Gavin Hu (Arm Technology China),
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Hi Morten,

> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Monday, November 18, 2019 6:52 PM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>;
> dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> stephen@networkplumber.org; david.marchand@redhat.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology
> China) <Gavin.Hu@arm.com>; ravi1.kumar@amd.com; rmody@marvell.com;
> shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com
> Subject: RE: [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte
> bitoperation APIs
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > Sent: Monday, November 18, 2019 11:07 AM
> >
> 
> [snip]
> 
> > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > @@ -0,0 +1,474 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2019 Arm Limited
> > + */
> > +
> > +#ifndef _RTE_BITOPS_H_
> > +#define _RTE_BITOPS_H_
> > +
> > +/**
> > + * @file
> > + * Bit Operations
> > + *
> > + * This file defines a API for bit operations without/with memory
> > ordering.
> > + */
> > +
> > +#include <stdint.h>
> > +#include <assert.h>
> > +#include <rte_compat.h>
> > +
> > +/*---------------------------- 32 bit operations
> > +---------------------
> > -------*/
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> > notice
> > + *
> > + * Get the target bit from a 32-bit value without memory ordering.
> > + *
> > + * @param nr
> > + *   The target bit to get.
> > + * @param addr
> > + *   The address holding the bit.
> > + * @return
> > + *   The target bit.
> > + */
> > +__rte_experimental
> > +static inline uint32_t
> > +rte_get_bit32_relaxed(unsigned int nr, unsigned long *addr) {
> > +	assert(nr < 32);
> > +
> > +	uint32_t mask = 1UL << nr;
> > +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask; }
> 
> Address pointer should be: uint32_t *addr.
> Likewise in the other 32 bit functions.
> 
> Use RTE_ASSERT() instead of assert().
> Likewise in all other functions.
> 
> When setting the mask, consider using UINT32_C(1) from <stdint.h> instead
> of 1UL.
> 
> [snip]
> 
> > +
> > +/*---------------------------- 64 bit operations
> > +---------------------
> > -------*/
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> > notice
> > + *
> > + * Get the target bit from a 64-bit value without memory ordering.
> > + *
> > + * @param nr
> > + *   The target bit to get.
> > + * @param addr
> > + *   The address holding the bit.
> > + * @return
> > + *   The target bit.
> > + */
> > +__rte_experimental
> > +static inline uint64_t
> > +rte_get_bit64_relaxed(unsigned int nr, unsigned long *addr) {
> > +	assert(nr < 64);
> > +
> > +	uint64_t mask = 1UL << nr;
> > +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask; }
> 
> Address pointer should be: uint64_t *addr.
> Likewise in the other 64 bit functions.
> 
> Mask should be 1ULL, not 1UL. Or use UINT64_C(1) from <stdint.h> instead.
> Likewise in the other 64 bit functions.
> 
> [snip]
> 
> 
> Med venlig hilsen / kind regards
> - Morten Brørup
> 
> 
Thanks! I shall address above comments in Patch v4 for both 32-bit and 64-bit functions.


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v4 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (19 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 6/6] net/hinic: " Joyce Kong
@ 2019-11-20 10:12 ` Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (47 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered in PMDs,
consolidate them into a common API family and applied in different
PMDs to reduce code duplication.

v4:
  Introduce uint32_t/uint64_t *addr when definiting bit operation APIs(suggested by
  Morten Brørup).

v3:
  1. Change the API's head file back to rte_bitops.h, then implement both 32-bit and
     64-bit operations with and without C11 atomic memory ordering.
  2. Add multi-core test case for bit operations which implemented with memory ordering.
  3. Modify the doc of both APIs and test cases.

v2:
  1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
  2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
  3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
     as the API uses barriers inside and the barriers are only needed for IO operations
     (suggested by Jerin Jacob).
  4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Joyce Kong (6):
  lib/eal: implement the family of rte bit operation APIs
  test/bitops: add bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead

 MAINTAINERS                                |   5 +
 app/test/Makefile                          |   1 +
 app/test/autotest_data.py                  |   6 +
 app/test/meson.build                       |   2 +
 app/test/test_bitops.c                     | 305 +++++++++++++++++++
 doc/api/doxy-api-index.md                  |   5 +-
 drivers/net/axgbe/axgbe_common.h           |  29 +-
 drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
 drivers/net/axgbe/axgbe_mdio.c             |  14 +-
 drivers/net/bnx2x/bnx2x.c                  | 209 ++++++-------
 drivers/net/bnx2x/bnx2x.h                  |   4 -
 drivers/net/bnx2x/ecore_sp.h               |   9 +-
 drivers/net/hinic/Makefile                 |   1 +
 drivers/net/hinic/base/hinic_compat.h      |  33 +-
 drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +-
 drivers/net/hinic/meson.build              |   2 +
 drivers/net/qede/base/bcm_osal.c           |  20 --
 drivers/net/qede/base/bcm_osal.h           |  10 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 21 files changed, 928 insertions(+), 235 deletions(-)
 create mode 100644 app/test/test_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (20 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-11-20 10:12 ` Joyce Kong
  2019-11-20 13:40   ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 2/6] test/bitops: add bit operation test case Joyce Kong
                   ` (46 subsequent siblings)
  68 siblings, 1 reply; 139+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, when the bit operation
is applied to the IO devices, use __ATOMIC_ACQ_REL to
ensure the ordering for io bit operation.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 MAINTAINERS                                |   5 +
 doc/api/doxy-api-index.md                  |   5 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 5 files changed, 485 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

diff --git a/MAINTAINERS b/MAINTAINERS
index f2fdb93..4ee2712 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -233,6 +233,11 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 F: lib/librte_eal/common/include/rte_bitmap.h
 F: app/test/test_bitmap.c
 
+Bitops
+M: Joyce Kong <joyce.kong@arm.com>
+F: lib/librte_eal/common/include/rte_bitops.h
+F: app/test/test_bitops.c
+
 MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/common/include/generic/rte_mcslock.h
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index dff496b..ade7c01 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -133,12 +133,13 @@ The public API headers are grouped by topics:
   [BPF]                (@ref rte_bpf.h)
 
 - **containers**:
+  [bitmap]             (@ref rte_bitmap.h),
+  [bitops]             (@ref rte_bitops.h),
   [mbuf]               (@ref rte_mbuf.h),
   [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
   [ring]               (@ref rte_ring.h),
   [stack]              (@ref rte_stack.h),
-  [tailq]              (@ref rte_tailq.h),
-  [bitmap]             (@ref rte_bitmap.h)
+  [tailq]              (@ref rte_tailq.h)
 
 - **packet framework**:
   * [port]             (@ref rte_port.h):
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index c2c6d92..dd025c1 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_bitops.h b/lib/librte_eal/common/include/rte_bitops.h
new file mode 100644
index 0000000..34158d1
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitops.h
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a API for bit operations without/with memory ordering.
+ */
+
+#include <stdint.h>
+#include <rte_debug.h>
+#include <rte_compat.h>
+
+/*---------------------------- 32 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/*---------------------------- 64 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index d6a149b..8a5197b 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -50,9 +50,10 @@ common_objs += eal_common_arch_objs
 
 common_headers = files(
 	'include/rte_alarm.h',
+	'include/rte_bitmap.h',
+	'include/rte_bitops.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
-	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
 	'include/rte_compat.h',
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v4 2/6] test/bitops: add bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (21 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-20 10:12 ` Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (45 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Add test cases for set bit, clear bit, test and set bit,
test and clear bit operations.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 app/test/Makefile         |   1 +
 app/test/autotest_data.py |   6 +
 app/test/meson.build      |   2 +
 app/test/test_bitops.c    | 305 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 314 insertions(+)
 create mode 100644 app/test/test_bitops.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 57930c0..4f33274 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 6deb97b..7db2df1 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -405,6 +405,12 @@
         "Report":  None,
     },
     {
+        "Name":    "Bitops test",
+        "Command": "bitops_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
+    {
         "Name":    "Hash multiwriter autotest",
         "Command": "hash_multiwriter_autotest",
         "Func":    default_autotest,
diff --git a/app/test/meson.build b/app/test/meson.build
index ff59c31..33b4135 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -14,6 +14,7 @@ test_sources = files('commands.c',
 	'test_atomic.c',
 	'test_barrier.c',
 	'test_bitratestats.c',
+	'test_bitops.c',
 	'test_bpf.c',
 	'test_byteorder.c',
 	'test_cmdline.c',
@@ -167,6 +168,7 @@ fast_test_names = [
         'alarm_autotest',
         'atomic_autotest',
         'byteorder_autotest',
+        'bitops_autotest',
         'cmdline_autotest',
         'common_autotest',
         'cpuflags_autotest',
diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
new file mode 100644
index 0000000..3859ca8
--- /dev/null
+++ b/app/test/test_bitops.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_bitops.h>
+#include <rte_launch.h>
+#include "test.h"
+
+uint32_t val32;
+uint64_t val64;
+unsigned int synchro;
+unsigned int count32;
+unsigned int count64;
+
+#define MAX_BITS_32 32
+#define MAX_BITS_64 64
+/*
+ * Bitops functions
+ * ================
+ *
+ * - The main test function performs several subtests.
+ * - For relaxed version, check bit operations on one core.
+ *   - Initialize valXX to specified values, then set each bit of valXX
+ *     to 1 one by one in "test_bitops_set_relaxed".
+ *
+ *   - Clear each bit of valXX to 0 one by one in "test_bitops_clear_relaxed".
+ *
+ *   - Function "test_bitops_test_set_clear_relaxed" checks whether each bit
+ *     of valXX can do "test and set" and "test and clear" correctly.
+ *
+ * - For C11 atomic barrier version, check bit operations on multi cores.
+ *   - Per bit of valXX is set to 1, then cleared to 0 on each core in
+ *     "test_bitops_set_clear". The function checks that once all lcores finish
+ *     their set_clear, the value of valXX would still be zero.
+ *
+ *   - The cores are waiting for a synchro which is triggered by the main test
+ *     function. Then all cores would do "rte_test_and_set_bitXX" or
+ *     "rte_test_and_clear_bitXX" at the same time, "countXX" which is checked
+ *     as the result later would inc by one or not according to the original
+ *     bit value.
+ *
+ */
+
+static int
+test_bitops_set_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_test_set_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_test_and_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_clear_bit32_relaxed(i, &val32)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_test_and_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_clear_bit64_relaxed(i, &val64)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_set_clear(__attribute__((unused)) void *arg)
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32(i, &val32);
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32(i, &val32);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64(i, &val64);
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64(i, &val64);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then set it to 1.
+ * This functions checks that if the target bit is equal to 0, set it to 1 and
+ * increase the variable of "countXX" by one. If it is equal to 1, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_set(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_set_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_set_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then clear it to 0.
+ * This functions checks that if the target bit is equal to 1, clear it to 0 and
+ * increase the variable of "countXX" by one. If it is equal to 0, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_clear(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_test_and_clear_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_test_and_clear_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops(void)
+{
+	__atomic_store_n(&val32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&val64, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&synchro, 0,  __ATOMIC_RELAXED);
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RTELAXED);
+
+	if (test_bitops_set_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_test_set_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+
+	rte_eal_mp_remote_launch(test_bitops_set_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32(i, &val32)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64(i, &val64)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_set_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 0, set it to 1 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 1, still set
+	 * it to 1 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 0.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_set_bitXX" correctly.
+	 */
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_set, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_clear_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 1, clear it to 0 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 0, still clear
+	 * it to 0 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 1.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_clear_bitXX" correctly.
+	 */
+
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(bitops_autotest, test_bitops);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v4 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (22 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 2/6] test/bitops: add bit operation test case Joyce Kong
@ 2019-11-20 10:12 ` Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 4/6] net/bnx2x: " Joyce Kong
                   ` (44 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f1..9cabda8 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e..fa597f3 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit64(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit64(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_get_bit64(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5c..00394a7 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_get_bit64(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v4 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (23 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-11-20 10:12 ` " Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 5/6] net/qede: " Joyce Kong
                   ` (43 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 209 +++++++++++++++++++------------------------
 drivers/net/bnx2x/bnx2x.h    |   4 -
 drivers/net/bnx2x/ecore_sp.h |   9 +-
 3 files changed, 98 insertions(+), 124 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index ed31335..1c00a67 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <zlib.h>
 #include <rte_string_fns.h>
+#include <rte_bitops.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
 #define BNX2X_PMD_VERSION_MAJOR 1
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1402,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit64(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1433,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1463,20 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit64(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1488,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1518,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1547,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_RX, &ramrod_flags);
+	rte_set_bit64(RAMROD_TX, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1674,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit64(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1698,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1759,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1773,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit64(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1785,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,11 +1842,10 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_get_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state))
+		rte_set_bit64(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1960,12 +1935,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit64(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit64(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1949,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit64(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1959,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit64(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4263,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit64(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,12 +4294,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit64(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4693,7 +4668,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4963,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit64(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit64(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5778,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6354,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6392,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit64(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6400,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit64(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit64(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6410,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit64(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit64(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6423,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit64(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit64(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit64(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit64(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6552,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6620,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6648,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6705,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_get_bit64(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit64(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6736,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6868,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit64(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6906,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6923,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_test_and_clear_bit64(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6945,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 3383c76..e6e66e8 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1809,10 +1809,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db37..efbfdad 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_get_bit64(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit64(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit64(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_test_and_clear_bit64(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v4 5/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (24 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 4/6] net/bnx2x: " Joyce Kong
@ 2019-11-20 10:12 ` " Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 6/6] net/hinic: " Joyce Kong
                   ` (42 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 48d016e..19457d7 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -46,26 +46,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 0f09557..e7a7392 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit64(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit64(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_get_bit64(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v4 6/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (25 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 5/6] net/qede: " Joyce Kong
@ 2019-11-20 10:12 ` " Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (41 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/hinic/Makefile            |  1 +
 drivers/net/hinic/base/hinic_compat.h | 33 +--------------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++++--------
 drivers/net/hinic/meson.build         |  2 ++
 4 files changed, 12 insertions(+), 40 deletions(-)

diff --git a/drivers/net/hinic/Makefile b/drivers/net/hinic/Makefile
index b78fd8d..97f4298 100644
--- a/drivers/net/hinic/Makefile
+++ b/drivers/net/hinic/Makefile
@@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_hinic.a
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 
 ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index e4a7f12..c0a0b3e 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,38 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	res = ((*addr) & (1UL << nr)) != 0;
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index 072fec3..8181564 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -269,7 +269,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_get_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -1075,7 +1075,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1200,7 +1200,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit64(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1245,7 +1245,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2844,7 +2844,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit64(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -3045,7 +3045,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* initialize filter info */
 	filter_info = &nic_dev->filter;
@@ -3057,7 +3057,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 	TAILQ_INIT(&nic_dev->filter_fdir_rule_list);
 	TAILQ_INIT(&nic_dev->hinic_flow_list);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -3113,7 +3113,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit64(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/net/hinic/meson.build b/drivers/net/hinic/meson.build
index bc7e246..8c7ee9d 100644
--- a/drivers/net/hinic/meson.build
+++ b/drivers/net/hinic/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Huawei Technologies Co., Ltd
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bitoperation APIs
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-20 13:40   ` Morten Brørup
  0 siblings, 0 replies; 139+ messages in thread
From: Morten Brørup @ 2019-11-20 13:40 UTC (permalink / raw)
  To: Joyce Kong, dev
  Cc: nd, thomas, jerinj, stephen, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> Sent: Wednesday, November 20, 2019 11:12 AM
> 
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, when the bit operation
> is applied to the IO devices, use __ATOMIC_ACQ_REL to
> ensure the ordering for io bit operation.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> ---
>  MAINTAINERS                                |   5 +
>  doc/api/doxy-api-index.md                  |   5 +-
>  lib/librte_eal/common/Makefile             |   1 +
>  lib/librte_eal/common/include/rte_bitops.h | 474
> +++++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build          |   3 +-
>  5 files changed, 485 insertions(+), 3 deletions(-)
>  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f2fdb93..4ee2712 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -233,6 +233,11 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
>  F: lib/librte_eal/common/include/rte_bitmap.h
>  F: app/test/test_bitmap.c
> 
> +Bitops
> +M: Joyce Kong <joyce.kong@arm.com>
> +F: lib/librte_eal/common/include/rte_bitops.h
> +F: app/test/test_bitops.c
> +
>  MCSlock - EXPERIMENTAL
>  M: Phil Yang <phil.yang@arm.com>
>  F: lib/librte_eal/common/include/generic/rte_mcslock.h
> diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
> index dff496b..ade7c01 100644
> --- a/doc/api/doxy-api-index.md
> +++ b/doc/api/doxy-api-index.md
> @@ -133,12 +133,13 @@ The public API headers are grouped by topics:
>    [BPF]                (@ref rte_bpf.h)
> 
>  - **containers**:
> +  [bitmap]             (@ref rte_bitmap.h),
> +  [bitops]             (@ref rte_bitops.h),
>    [mbuf]               (@ref rte_mbuf.h),
>    [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
>    [ring]               (@ref rte_ring.h),
>    [stack]              (@ref rte_stack.h),
> -  [tailq]              (@ref rte_tailq.h),
> -  [bitmap]             (@ref rte_bitmap.h)
> +  [tailq]              (@ref rte_tailq.h)
> 
>  - **packet framework**:
>    * [port]             (@ref rte_port.h):
> diff --git a/lib/librte_eal/common/Makefile
> b/lib/librte_eal/common/Makefile
> index c2c6d92..dd025c1 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
>  INC += rte_service.h rte_service_component.h
>  INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
>  INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
> +INC += rte_bitops.h
> 
>  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
>  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> diff --git a/lib/librte_eal/common/include/rte_bitops.h
> b/lib/librte_eal/common/include/rte_bitops.h
> new file mode 100644
> index 0000000..34158d1
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_bitops.h
> @@ -0,0 +1,474 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#ifndef _RTE_BITOPS_H_
> +#define _RTE_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a API for bit operations without/with memory
> ordering.
> + */
> +
> +#include <stdint.h>
> +#include <rte_debug.h>
> +#include <rte_compat.h>
> +
> +/*---------------------------- 32 bit operations -------------------------
> ---*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 32-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Set the target bit in a 32-bit value to 1 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Clear the target bit in a 32-bit value to 0 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 32-bit value, then set it to 1 without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 32-bit value, then clear it to 0 without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 32-bit value with memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_get_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Set the target bit in a 32-bit value to 1 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Clear the target bit in a 32-bit value to 0 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 32-bit value, then set it to 1 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_set_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 32-bit value, then clear it to 0 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_clear_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +/*---------------------------- 64 bit operations -------------------------
> ---*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 64-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_get_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Set the target bit in a 64-bit value to 1 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Clear the target bit in a 64-bit value to 0 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 64-bit value, then set it to 1 without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 64-bit value, then clear it to 0 without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 64-bit value with memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_get_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Set the target bit in a 64-bit value to 1 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Clear the target bit in a 64-bit value to 0 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 64-bit value, then set it to 1 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_set_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 64-bit value, then clear it to 0 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_clear_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +#endif /* _RTE_BITOPS_H_ */
> diff --git a/lib/librte_eal/common/meson.build
> b/lib/librte_eal/common/meson.build
> index d6a149b..8a5197b 100644
> --- a/lib/librte_eal/common/meson.build
> +++ b/lib/librte_eal/common/meson.build
> @@ -50,9 +50,10 @@ common_objs += eal_common_arch_objs
> 
>  common_headers = files(
>  	'include/rte_alarm.h',
> +	'include/rte_bitmap.h',
> +	'include/rte_bitops.h',
>  	'include/rte_branch_prediction.h',
>  	'include/rte_bus.h',
> -	'include/rte_bitmap.h',
>  	'include/rte_class.h',
>  	'include/rte_common.h',
>  	'include/rte_compat.h',
> --
> 2.7.4
> 

Acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v5 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (26 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 6/6] net/hinic: " Joyce Kong
@ 2019-11-28  6:44 ` Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (40 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered in PMDs,
consolidate them into a common API family and applied in different
PMDs to reduce code duplication.

v5:
 Correct the spelling mistake in test_bitops.c

v4:
  Introduce uint32_t/uint64_t *addr when definiting bit operation APIs(suggested by
  Morten Brørup).

v3:
  1. Change the API's head file back to rte_bitops.h, then implement both 32-bit and
     64-bit operations with and without C11 atomic memory ordering.
  2. Add multi-core test case for bit operations which implemented with memory ordering.
  3. Modify the doc of both APIs and test cases.

v2:
  1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
  2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
  3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
     as the API uses barriers inside and the barriers are only needed for IO operations
     (suggested by Jerin Jacob).
  4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Joyce Kong (6):
  lib/eal: implement the family of rte bit operation APIs
  test/bitops: add bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead

 MAINTAINERS                                |   5 +
 app/test/Makefile                          |   1 +
 app/test/autotest_data.py                  |   6 +
 app/test/meson.build                       |   2 +
 app/test/test_bitops.c                     | 305 +++++++++++++++++++
 doc/api/doxy-api-index.md                  |   5 +-
 drivers/net/axgbe/axgbe_common.h           |  29 +-
 drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
 drivers/net/axgbe/axgbe_mdio.c             |  14 +-
 drivers/net/bnx2x/bnx2x.c                  | 209 ++++++-------
 drivers/net/bnx2x/bnx2x.h                  |   4 -
 drivers/net/bnx2x/ecore_sp.h               |   9 +-
 drivers/net/hinic/Makefile                 |   1 +
 drivers/net/hinic/base/hinic_compat.h      |  33 +-
 drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +-
 drivers/net/hinic/meson.build              |   2 +
 drivers/net/qede/base/bcm_osal.c           |  20 --
 drivers/net/qede/base/bcm_osal.h           |  10 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 21 files changed, 928 insertions(+), 235 deletions(-)
 create mode 100644 app/test/test_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v5 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (27 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-11-28  6:44 ` Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 2/6] test/bitops: add bit operation test case Joyce Kong
                   ` (39 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, when the bit operation
is applied to the IO devices, use __ATOMIC_ACQ_REL to
ensure the ordering for io bit operation.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 MAINTAINERS                                |   5 +
 doc/api/doxy-api-index.md                  |   5 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 5 files changed, 485 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 4a0c9a4..043902b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -236,6 +236,11 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 F: lib/librte_eal/common/include/rte_bitmap.h
 F: app/test/test_bitmap.c
 
+Bitops
+M: Joyce Kong <joyce.kong@arm.com>
+F: lib/librte_eal/common/include/rte_bitops.h
+F: app/test/test_bitops.c
+
 MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/common/include/generic/rte_mcslock.h
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index dff496b..ade7c01 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -133,12 +133,13 @@ The public API headers are grouped by topics:
   [BPF]                (@ref rte_bpf.h)
 
 - **containers**:
+  [bitmap]             (@ref rte_bitmap.h),
+  [bitops]             (@ref rte_bitops.h),
   [mbuf]               (@ref rte_mbuf.h),
   [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
   [ring]               (@ref rte_ring.h),
   [stack]              (@ref rte_stack.h),
-  [tailq]              (@ref rte_tailq.h),
-  [bitmap]             (@ref rte_bitmap.h)
+  [tailq]              (@ref rte_tailq.h)
 
 - **packet framework**:
   * [port]             (@ref rte_port.h):
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index c2c6d92..dd025c1 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_bitops.h b/lib/librte_eal/common/include/rte_bitops.h
new file mode 100644
index 0000000..34158d1
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitops.h
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a API for bit operations without/with memory ordering.
+ */
+
+#include <stdint.h>
+#include <rte_debug.h>
+#include <rte_compat.h>
+
+/*---------------------------- 32 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/*---------------------------- 64 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 2b97715..766edbd 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -50,9 +50,10 @@ common_objs += eal_common_arch_objs
 
 common_headers = files(
 	'include/rte_alarm.h',
+	'include/rte_bitmap.h',
+	'include/rte_bitops.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
-	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
 	'include/rte_compat.h',
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v5 2/6] test/bitops: add bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (28 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-28  6:44 ` Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (38 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Add test cases for set bit, clear bit, test and set bit,
test and clear bit operations.

Change-Id: I5a00e885ea8455636c11cb9455b7e6102c9312b4
Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 app/test/Makefile         |   1 +
 app/test/autotest_data.py |   6 +
 app/test/meson.build      |   2 +
 app/test/test_bitops.c    | 305 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 314 insertions(+)
 create mode 100644 app/test/test_bitops.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 57930c0..4f33274 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 6deb97b..7db2df1 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -405,6 +405,12 @@
         "Report":  None,
     },
     {
+        "Name":    "Bitops test",
+        "Command": "bitops_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
+    {
         "Name":    "Hash multiwriter autotest",
         "Command": "hash_multiwriter_autotest",
         "Func":    default_autotest,
diff --git a/app/test/meson.build b/app/test/meson.build
index fb49d80..ef340ca 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -14,6 +14,7 @@ test_sources = files('commands.c',
 	'test_atomic.c',
 	'test_barrier.c',
 	'test_bitratestats.c',
+	'test_bitops.c',
 	'test_bpf.c',
 	'test_byteorder.c',
 	'test_cmdline.c',
@@ -167,6 +168,7 @@ fast_test_names = [
         'alarm_autotest',
         'atomic_autotest',
         'byteorder_autotest',
+        'bitops_autotest',
         'cmdline_autotest',
         'common_autotest',
         'cpuflags_autotest',
diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
new file mode 100644
index 0000000..2a648aa
--- /dev/null
+++ b/app/test/test_bitops.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_bitops.h>
+#include <rte_launch.h>
+#include "test.h"
+
+uint32_t val32;
+uint64_t val64;
+unsigned int synchro;
+unsigned int count32;
+unsigned int count64;
+
+#define MAX_BITS_32 32
+#define MAX_BITS_64 64
+/*
+ * Bitops functions
+ * ================
+ *
+ * - The main test function performs several subtests.
+ * - For relaxed version, check bit operations on one core.
+ *   - Initialize valXX to specified values, then set each bit of valXX
+ *     to 1 one by one in "test_bitops_set_relaxed".
+ *
+ *   - Clear each bit of valXX to 0 one by one in "test_bitops_clear_relaxed".
+ *
+ *   - Function "test_bitops_test_set_clear_relaxed" checks whether each bit
+ *     of valXX can do "test and set" and "test and clear" correctly.
+ *
+ * - For C11 atomic barrier version, check bit operations on multi cores.
+ *   - Per bit of valXX is set to 1, then cleared to 0 on each core in
+ *     "test_bitops_set_clear". The function checks that once all lcores finish
+ *     their set_clear, the value of valXX would still be zero.
+ *
+ *   - The cores are waiting for a synchro which is triggered by the main test
+ *     function. Then all cores would do "rte_test_and_set_bitXX" or
+ *     "rte_test_and_clear_bitXX" at the same time, "countXX" which is checked
+ *     as the result later would inc by one or not according to the original
+ *     bit value.
+ *
+ */
+
+static int
+test_bitops_set_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_test_set_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_test_and_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_clear_bit32_relaxed(i, &val32)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_test_and_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_clear_bit64_relaxed(i, &val64)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_set_clear(__attribute__((unused)) void *arg)
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32(i, &val32);
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32(i, &val32);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64(i, &val64);
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64(i, &val64);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then set it to 1.
+ * This functions checks that if the target bit is equal to 0, set it to 1 and
+ * increase the variable of "countXX" by one. If it is equal to 1, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_set(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_set_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_set_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then clear it to 0.
+ * This functions checks that if the target bit is equal to 1, clear it to 0 and
+ * increase the variable of "countXX" by one. If it is equal to 0, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_clear(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_test_and_clear_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_test_and_clear_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops(void)
+{
+	__atomic_store_n(&val32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&val64, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&synchro, 0,  __ATOMIC_RELAXED);
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	if (test_bitops_set_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_test_set_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+
+	rte_eal_mp_remote_launch(test_bitops_set_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32(i, &val32)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64(i, &val64)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_set_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 0, set it to 1 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 1, still set
+	 * it to 1 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 0.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_set_bitXX" correctly.
+	 */
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_set, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_clear_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 1, clear it to 0 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 0, still clear
+	 * it to 0 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 1.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_clear_bitXX" correctly.
+	 */
+
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(bitops_autotest, test_bitops);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (29 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 2/6] test/bitops: add bit operation test case Joyce Kong
@ 2019-11-28  6:44 ` Joyce Kong
  2019-12-02  6:09   ` Gavin Hu (Arm Technology China)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 4/6] net/bnx2x: use common rte bit operation " Joyce Kong
                   ` (37 subsequent siblings)
  68 siblings, 1 reply; 139+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f1..9cabda8 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e..fa597f3 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit64(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit64(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_get_bit64(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5c..00394a7 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_get_bit64(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v5 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (30 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-11-28  6:44 ` " Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 5/6] net/qede: " Joyce Kong
                   ` (36 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 209 +++++++++++++++++++------------------------
 drivers/net/bnx2x/bnx2x.h    |   4 -
 drivers/net/bnx2x/ecore_sp.h |   9 +-
 3 files changed, 98 insertions(+), 124 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index ed31335..1c00a67 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <zlib.h>
 #include <rte_string_fns.h>
+#include <rte_bitops.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
 #define BNX2X_PMD_VERSION_MAJOR 1
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1402,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit64(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1433,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1463,20 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit64(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1488,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1518,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1547,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_RX, &ramrod_flags);
+	rte_set_bit64(RAMROD_TX, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1674,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit64(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1698,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1759,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1773,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit64(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1785,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,11 +1842,10 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_get_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state))
+		rte_set_bit64(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1960,12 +1935,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit64(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit64(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1949,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit64(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1959,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit64(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4263,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit64(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,12 +4294,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit64(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4693,7 +4668,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4963,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit64(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit64(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5778,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6354,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6392,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit64(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6400,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit64(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit64(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6410,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit64(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit64(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6423,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit64(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit64(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit64(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit64(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6552,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6620,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6648,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6705,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_get_bit64(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit64(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6736,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6868,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit64(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6906,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6923,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_test_and_clear_bit64(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6945,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 3383c76..e6e66e8 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1809,10 +1809,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db37..efbfdad 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_get_bit64(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit64(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit64(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_test_and_clear_bit64(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v5 5/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (31 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 4/6] net/bnx2x: use common rte bit operation " Joyce Kong
@ 2019-11-28  6:44 ` " Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 6/6] net/hinic: " Joyce Kong
                   ` (35 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 48d016e..19457d7 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -46,26 +46,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 0f09557..e7a7392 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit64(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit64(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_get_bit64(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v5 6/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (32 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 5/6] net/qede: " Joyce Kong
@ 2019-11-28  6:44 ` " Joyce Kong
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (34 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/hinic/Makefile            |  1 +
 drivers/net/hinic/base/hinic_compat.h | 33 +--------------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++++--------
 drivers/net/hinic/meson.build         |  2 ++
 4 files changed, 12 insertions(+), 40 deletions(-)

diff --git a/drivers/net/hinic/Makefile b/drivers/net/hinic/Makefile
index 87fd843..f087baa 100644
--- a/drivers/net/hinic/Makefile
+++ b/drivers/net/hinic/Makefile
@@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_hinic.a
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 
 ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index e4a7f12..c0a0b3e 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,38 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	res = ((*addr) & (1UL << nr)) != 0;
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index 803a39e..6858535 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -269,7 +269,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_get_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -1076,7 +1076,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1201,7 +1201,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit64(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1246,7 +1246,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2845,7 +2845,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit64(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -3042,7 +3042,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* initialize filter info */
 	filter_info = &nic_dev->filter;
@@ -3054,7 +3054,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 	TAILQ_INIT(&nic_dev->filter_fdir_rule_list);
 	TAILQ_INIT(&nic_dev->hinic_flow_list);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -3110,7 +3110,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit64(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/net/hinic/meson.build b/drivers/net/hinic/meson.build
index bc7e246..8c7ee9d 100644
--- a/drivers/net/hinic/meson.build
+++ b/drivers/net/hinic/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Huawei Technologies Co., Ltd
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-12-02  6:09   ` Gavin Hu (Arm Technology China)
  2019-12-02  9:12     ` Thomas Monjalon
  0 siblings, 1 reply; 139+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-12-02  6:09 UTC (permalink / raw)
  To: Joyce Kong (Arm Technology China),
	dev, jerinj, stephen, thomas, Bruce Richardson,
	Morten Brørup
  Cc: nd, mb, david.marchand, Honnappa Nagarahalli, ravi1.kumar, rmody,
	shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang, nd

Hi Bruce, Thomas,

This series of patches was reported a compilation issue[1] on 32bit Ubuntu. 
On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit 'unsigned long' arguments. 
This is where the error happens. 

My question is how 32-bit OSes shall we support, put another way, can we ignore this compilation issue? 
If we still need to care, how about making 'obsolete' of 'unsigned long' and use 'uint32' instead to be multi-OS friendly? 

*Meson Build Failed #1:
OS: UB1604-32
Target:build-gcc-static
[1] http://mails.dpdk.org/archives/test-report/2019-November/109515.html 

> -----Original Message-----
> From: Joyce Kong <joyce.kong@arm.com>
> Sent: Thursday, November 28, 2019 2:44 PM
> To: dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> stephen@networkplumber.org; mb@smartsharesystems.com;
> david.marchand@redhat.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>; ravi1.kumar@amd.com; rmody@marvell.com;
> shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com
> Subject: [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs
> instead
> 
> Remove its own bit operation APIs and use the common one,
> this can reduce the code duplication largely.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
>  drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
>  drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
>  3 files changed, 15 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/net/axgbe/axgbe_common.h
> b/drivers/net/axgbe/axgbe_common.h
> index 34f60f1..9cabda8 100644
> --- a/drivers/net/axgbe/axgbe_common.h
> +++ b/drivers/net/axgbe/axgbe_common.h
> @@ -22,6 +22,7 @@
>  #include <pthread.h>
> 
>  #include <rte_byteorder.h>
> +#include <rte_bitops.h>
>  #include <rte_memory.h>
>  #include <rte_malloc.h>
>  #include <rte_hexdump.h>
> @@ -1674,34 +1675,6 @@ do {
> 			\
>  #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
>  #define time_before_eq(a, b)	time_after_eq(b, a)
> 
> -/*---bitmap support apis---*/
> -static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
> -{
> -	int res;
> -
> -	rte_mb();
> -	res = ((*addr) & (1UL << nr)) != 0;
> -	rte_mb();
> -	return res;
> -}
> -
> -static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
> -{
> -	__sync_fetch_and_or(addr, (1UL << nr));
> -}
> -
> -static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
> -{
> -	__sync_fetch_and_and(addr, ~(1UL << nr));
> -}
> -
> -static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long
> *addr)
> -{
> -	unsigned long mask = (1UL << nr);
> -
> -	return __sync_fetch_and_and(addr, ~mask) & mask;
> -}
> -
>  static inline unsigned long msecs_to_timer_cycles(unsigned int m)
>  {
>  	return rte_get_timer_hz() * (m / 1000);
> diff --git a/drivers/net/axgbe/axgbe_ethdev.c
> b/drivers/net/axgbe/axgbe_ethdev.c
> index d1f160e..fa597f3 100644
> --- a/drivers/net/axgbe/axgbe_ethdev.c
> +++ b/drivers/net/axgbe/axgbe_ethdev.c
> @@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
>  	axgbe_dev_enable_tx(dev);
>  	axgbe_dev_enable_rx(dev);
> 
> -	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
> -	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_clear_bit64(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_clear_bit64(AXGBE_DOWN, &pdata->dev_state);
>  	return 0;
>  }
> 
> @@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
> 
>  	rte_intr_disable(&pdata->pci_dev->intr_handle);
> 
> -	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
> +	if (rte_get_bit64(AXGBE_STOPPED, &pdata->dev_state))
>  		return;
> 
> -	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
>  	axgbe_dev_disable_tx(dev);
>  	axgbe_dev_disable_rx(dev);
> 
>  	pdata->phy_if.phy_stop(pdata);
>  	pdata->hw_if.exit(pdata);
>  	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
> -	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
>  }
> 
>  /* Clear all resources like TX/RX queues. */
> @@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
> 
>  	pdata = eth_dev->data->dev_private;
>  	/* initial state */
> -	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
> -	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
>  	pdata->eth_dev = eth_dev;
> 
>  	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
> diff --git a/drivers/net/axgbe/axgbe_mdio.c
> b/drivers/net/axgbe/axgbe_mdio.c
> index 2721e5c..00394a7 100644
> --- a/drivers/net/axgbe/axgbe_mdio.c
> +++ b/drivers/net/axgbe/axgbe_mdio.c
> @@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct
> axgbe_port *pdata)
>  {
>  	int ret;
> 
> -	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
>  	pdata->link_check = rte_get_timer_cycles();
> 
>  	ret = pdata->phy_if.phy_impl.an_config(pdata);
> @@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port
> *pdata)
> 
>  	ret = __axgbe_phy_config_aneg(pdata);
>  	if (ret)
> -		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> +		rte_set_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
>  	else
> -		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> +		rte_clear_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
> 
>  	pthread_mutex_unlock(&pdata->an_mutex);
> 
> @@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port
> *pdata)
>  	unsigned int link_aneg;
>  	int an_restart;
> 
> -	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
> +	if (rte_get_bit64(AXGBE_LINK_ERR, &pdata->dev_state)) {
>  		pdata->phy.link = 0;
>  		goto adjust_link;
>  	}
> @@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port
> *pdata)
>  			return;
>  		}
>  		axgbe_phy_status_result(pdata);
> -		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
> -			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata-
> >dev_state);
> +		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state))
> +			rte_clear_bit64(AXGBE_LINK_INIT, &pdata-
> >dev_state);
>  	} else {
> -		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
> +		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state)) {
>  			axgbe_check_link_timeout(pdata);
> 
>  			if (link_aneg)
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-12-02  6:09   ` Gavin Hu (Arm Technology China)
@ 2019-12-02  9:12     ` Thomas Monjalon
  2019-12-02  9:24       ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation " Morten Brørup
  0 siblings, 1 reply; 139+ messages in thread
From: Thomas Monjalon @ 2019-12-02  9:12 UTC (permalink / raw)
  To: Gavin Hu (Arm Technology China)
  Cc: Joyce Kong (Arm Technology China),
	dev, jerinj, stephen, Bruce Richardson, Morten Brørup, nd,
	david.marchand, Honnappa Nagarahalli, ravi1.kumar, rmody,
	shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang

02/12/2019 07:09, Gavin Hu (Arm Technology China):
> Hi Bruce, Thomas,
> 
> This series of patches was reported a compilation issue[1] on 32bit Ubuntu. 
> On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit 'unsigned long' arguments. 
> This is where the error happens. 

Please could you be more specific? What is the exact error?

> My question is how 32-bit OSes shall we support, put another way, can we ignore this compilation issue? 
> If we still need to care, how about making 'obsolete' of 'unsigned long' and use 'uint32' instead to be multi-OS friendly? 

Which unsigned long?
If it is in the (not merged) bit API, it can still be changed no?




^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation APIs instead
  2019-12-02  9:12     ` Thomas Monjalon
@ 2019-12-02  9:24       ` " Morten Brørup
  2019-12-02  9:30         ` Thomas Monjalon
  2019-12-02 16:53         ` Stephen Hemminger
  0 siblings, 2 replies; 139+ messages in thread
From: Morten Brørup @ 2019-12-02  9:24 UTC (permalink / raw)
  To: Thomas Monjalon, Gavin Hu (Arm Technology China)
  Cc: Joyce Kong (Arm Technology China),
	dev, jerinj, stephen, Bruce Richardson, nd, david.marchand,
	Honnappa Nagarahalli, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang

Thomas,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Monday, December 2, 2019 10:12 AM
> 
> 02/12/2019 07:09, Gavin Hu (Arm Technology China):
> > Hi Bruce, Thomas,
> >
> > This series of patches was reported a compilation issue[1] on 32bit
> Ubuntu.
> > On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we
> uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit
> 'unsigned long' arguments.
> > This is where the error happens.
> 
> Please could you be more specific? What is the exact error?

The PMD has a private structure with an unsigned long field.

The patch for the PMD uses the 64 bit operations on this field. The patch fails to compile for a 32 bit target, because the struct field is only 32 bit there.

> 
> > My question is how 32-bit OSes shall we support, put another way, can
> we ignore this compilation issue?
> > If we still need to care, how about making 'obsolete' of 'unsigned
> long' and use 'uint32' instead to be multi-OS friendly?
> 
> Which unsigned long?
> If it is in the (not merged) bit API, it can still be changed no?
> 

The patch for the PMD can be changed to use the 64 or 32 bit operations depending on whether it is being compiled for a 64 or 32 bit target.

However, the question seems to be if we want to either 1) do something like that, or 2) drop support for 32 bit targets, or 3) make these target dependent fields obsolete (i.e. ban the use of unsigned long) and require explicit sizes, e.g. uint32_t.



^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation APIs instead
  2019-12-02  9:24       ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation " Morten Brørup
@ 2019-12-02  9:30         ` Thomas Monjalon
  2019-12-02 16:53         ` Stephen Hemminger
  1 sibling, 0 replies; 139+ messages in thread
From: Thomas Monjalon @ 2019-12-02  9:30 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Gavin Hu (Arm Technology China),
	Joyce Kong (Arm Technology China),
	dev, jerinj, stephen, Bruce Richardson, nd, david.marchand,
	Honnappa Nagarahalli, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang

02/12/2019 10:24, Morten Brørup:
> Thomas,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> > Sent: Monday, December 2, 2019 10:12 AM
> > 
> > 02/12/2019 07:09, Gavin Hu (Arm Technology China):
> > > Hi Bruce, Thomas,
> > >
> > > This series of patches was reported a compilation issue[1] on 32bit
> > Ubuntu.
> > > On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we
> > uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit
> > 'unsigned long' arguments.
> > > This is where the error happens.
> > 
> > Please could you be more specific? What is the exact error?
> 
> The PMD has a private structure with an unsigned long field.
> 
> The patch for the PMD uses the 64 bit operations on this field. The patch fails to compile for a 32 bit target, because the struct field is only 32 bit there.
> 
> > 
> > > My question is how 32-bit OSes shall we support, put another way, can
> > we ignore this compilation issue?
> > > If we still need to care, how about making 'obsolete' of 'unsigned
> > long' and use 'uint32' instead to be multi-OS friendly?
> > 
> > Which unsigned long?
> > If it is in the (not merged) bit API, it can still be changed no?
> > 
> 
> The patch for the PMD can be changed to use the 64 or 32 bit operations depending on whether it is being compiled for a 64 or 32 bit target.
> 
> However, the question seems to be if we want to either 1) do something like that, or 2) drop support for 32 bit targets, or 3) make these target dependent fields obsolete (i.e. ban the use of unsigned long) and require explicit sizes, e.g. uint32_t.

We should support both,
and use the appropriate instruction.

But I wonder why this field has not a fixed size.
It would be probably better to change the field to uint32_t or uint64_t.



^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation APIs instead
  2019-12-02  9:24       ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation " Morten Brørup
  2019-12-02  9:30         ` Thomas Monjalon
@ 2019-12-02 16:53         ` Stephen Hemminger
  2019-12-03  6:52           ` Gavin Hu (Arm Technology China)
  1 sibling, 1 reply; 139+ messages in thread
From: Stephen Hemminger @ 2019-12-02 16:53 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Thomas Monjalon, Gavin Hu (Arm Technology China),
	Joyce Kong (Arm Technology China),
	dev, jerinj, Bruce Richardson, nd, david.marchand,
	Honnappa Nagarahalli, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang

On Mon, 2 Dec 2019 10:24:32 +0100
Morten Brørup <mb@smartsharesystems.com> wrote:

> Thomas,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> > Sent: Monday, December 2, 2019 10:12 AM
> > 
> > 02/12/2019 07:09, Gavin Hu (Arm Technology China):  
> > > Hi Bruce, Thomas,
> > >
> > > This series of patches was reported a compilation issue[1] on 32bit  
> > Ubuntu.  
> > > On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we  
> > uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit
> > 'unsigned long' arguments.  
> > > This is where the error happens.  
> > 
> > Please could you be more specific? What is the exact error?  
> 
> The PMD has a private structure with an unsigned long field.
> 
> The patch for the PMD uses the 64 bit operations on this field. The patch fails to compile for a 32 bit target, because the struct field is only 32 bit there.
> 
> >   
> > > My question is how 32-bit OSes shall we support, put another way, can  
> > we ignore this compilation issue?  
> > > If we still need to care, how about making 'obsolete' of 'unsigned  
> > long' and use 'uint32' instead to be multi-OS friendly?
> > 
> > Which unsigned long?
> > If it is in the (not merged) bit API, it can still be changed no?
> >   
> 
> The patch for the PMD can be changed to use the 64 or 32 bit operations depending on whether it is being compiled for a 64 or 32 bit target.
> 
> However, the question seems to be if we want to either 1) do something like that, or 2) drop support for 32 bit targets, or 3) make these target dependent fields obsolete (i.e. ban the use of unsigned long) and require explicit sizes, e.g. uint32_t.

The bitop library should not assume sizeof(unsigned long) == 32 bit.

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation APIs instead
  2019-12-02 16:53         ` Stephen Hemminger
@ 2019-12-03  6:52           ` Gavin Hu (Arm Technology China)
  0 siblings, 0 replies; 139+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-12-03  6:52 UTC (permalink / raw)
  To: Stephen Hemminger, Morten Brørup
  Cc: thomas, Joyce Kong (Arm Technology China),
	dev, jerinj, Bruce Richardson, nd, david.marchand,
	Honnappa Nagarahalli, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Gavin Hu (Arm Technology China),
	nd



> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, December 3, 2019 12:53 AM
> To: Morten Brørup <mb@smartsharesystems.com>
> Cc: thomas@monjalon.net; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>; Joyce Kong (Arm Technology China)
> <Joyce.Kong@arm.com>; dev@dpdk.org; jerinj@marvell.com; Bruce
> Richardson <bruce.richardson@intel.com>; nd <nd@arm.com>;
> david.marchand@redhat.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; ravi1.kumar@amd.com;
> rmody@marvell.com; shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com
> Subject: Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte
> bitoperation APIs instead
> 
> On Mon, 2 Dec 2019 10:24:32 +0100
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > Thomas,
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas
> Monjalon
> > > Sent: Monday, December 2, 2019 10:12 AM
> > >
> > > 02/12/2019 07:09, Gavin Hu (Arm Technology China):
> > > > Hi Bruce, Thomas,
> > > >
> > > > This series of patches was reported a compilation issue[1] on 32bit
> > > Ubuntu.
> > > > On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we
> > > uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit
> > > 'unsigned long' arguments.
> > > > This is where the error happens.
> > >
> > > Please could you be more specific? What is the exact error?
> >
> > The PMD has a private structure with an unsigned long field.
> >
> > The patch for the PMD uses the 64 bit operations on this field. The patch
> fails to compile for a 32 bit target, because the struct field is only 32 bit there.
> >
> > >
> > > > My question is how 32-bit OSes shall we support, put another way, can
> > > we ignore this compilation issue?
> > > > If we still need to care, how about making 'obsolete' of 'unsigned
> > > long' and use 'uint32' instead to be multi-OS friendly?
> > >
> > > Which unsigned long?
> > > If it is in the (not merged) bit API, it can still be changed no?
> > >
> >
> > The patch for the PMD can be changed to use the 64 or 32 bit operations
> depending on whether it is being compiled for a 64 or 32 bit target.
> >
> > However, the question seems to be if we want to either 1) do something like
> that, or 2) drop support for 32 bit targets, or 3) make these target dependent
> fields obsolete (i.e. ban the use of unsigned long) and require explicit sizes, e.g.
> uint32_t.
> 
> The bitop library should not assume sizeof(unsigned long) == 32 bit.
As discussed, both 32-bit and 64-bit OSes should be supported, and their sizes of "unsigned long" are not fixed.
Taking all these into considerations, we will use "unsigned int" or uint32_t instead of "unsigned long" in the PMDs to be compatible across 32- or 64-bit OSes.
/Gavin

^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v6 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (33 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 6/6] net/hinic: " Joyce Kong
@ 2019-12-18  6:00 ` Joyce Kong
  2019-12-18  6:55   ` Gavin Hu
  2020-01-17 13:03   ` David Marchand
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (33 subsequent siblings)
  68 siblings, 2 replies; 139+ messages in thread
From: Joyce Kong @ 2019-12-18  6:00 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

There are a lot functions of bit operations scattered in PMDs, consolidate
them into a common API family and applied in different PMDs to reduce code
duplication.

v6:
 Trim 'unsigned long' in PMDs down to 'uint32_t', as on mainstream 64-bit OS,
 'unsigned long' is 64-bit in size, but the 32-bit OS expects 32-bit 'unsigned
 long' argument.

v5:
 Correct the spelling mistake in test_bitops.c

v4:
  Introduce uint32_t/uint64_t *addr when definiting bit operation APIs(suggested by
  Morten Brørup).

v3:
  1. Change the API's head file back to rte_bitops.h, then implement both 32-bit and
     64-bit operations with and without C11 atomic memory ordering.
  2. Add multi-core test case for bit operations which implemented with memory ordering.
  3. Modify the doc of both APIs and test cases.

v2:
  1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
  2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
  3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
     as the API uses barriers inside and the barriers are only needed for IO operations
     (suggested by Jerin Jacob).
  4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Joyce Kong (6):
  lib/eal: implement the family of rte bit operation APIs
  test/bitops: add bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead

 MAINTAINERS                                |   5 +
 app/test/Makefile                          |   1 +
 app/test/autotest_data.py                  |   6 +
 app/test/meson.build                       |   2 +
 app/test/test_bitops.c                     | 305 +++++++++++++++++++
 doc/api/doxy-api-index.md                  |   5 +-
 drivers/net/axgbe/axgbe_common.h           |  29 +-
 drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
 drivers/net/axgbe/axgbe_ethdev.h           |   2 +-
 drivers/net/axgbe/axgbe_mdio.c             |  14 +-
 drivers/net/bnx2x/bnx2x.c                  | 232 +++++++-------
 drivers/net/bnx2x/bnx2x.h                  |  10 +-
 drivers/net/bnx2x/ecore_sp.h               |  47 +--
 drivers/net/hinic/Makefile                 |   1 +
 drivers/net/hinic/base/hinic_compat.h      |  33 +-
 drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +-
 drivers/net/hinic/hinic_pmd_ethdev.h       |   2 +-
 drivers/net/hinic/meson.build              |   2 +
 drivers/net/qede/base/bcm_osal.c           |  22 +-
 drivers/net/qede/base/bcm_osal.h           |  14 +-
 drivers/net/qede/base/ecore.h              |   6 +-
 drivers/net/qede/base/ecore_cxt.c          |   6 +-
 drivers/net/qede/base/ecore_dcbx.c         |   8 +-
 drivers/net/qede/base/ecore_dev.c          |  38 +--
 drivers/net/qede/base/ecore_dev_api.h      |   2 +-
 drivers/net/qede/base/ecore_l2.c           |   6 +-
 drivers/net/qede/base/ecore_mcp.c          |   4 +-
 drivers/net/qede/base/ecore_sp_commands.c  |  12 +-
 drivers/net/qede/base/ecore_spq.c          |   2 +-
 drivers/net/qede/base/ecore_spq.h          |  10 +-
 drivers/net/qede/qede_main.c               |   4 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 34 files changed, 1015 insertions(+), 323 deletions(-)
 create mode 100644 app/test/test_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (34 preceding siblings ...)
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-12-18  6:00 ` Joyce Kong
  2019-12-20  6:52   ` Honnappa Nagarahalli
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 2/6] test/bitops: add bit operation test case Joyce Kong
                   ` (32 subsequent siblings)
  68 siblings, 1 reply; 139+ messages in thread
From: Joyce Kong @ 2019-12-18  6:00 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, when the bit operation
is applied to the IO devices, use __ATOMIC_ACQ_REL to
ensure the ordering for io bit operation.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 MAINTAINERS                                |   5 +
 doc/api/doxy-api-index.md                  |   5 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 5 files changed, 485 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 4395d8d..d2a29a2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -236,6 +236,11 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 F: lib/librte_eal/common/include/rte_bitmap.h
 F: app/test/test_bitmap.c
 
+Bitops
+M: Joyce Kong <joyce.kong@arm.com>
+F: lib/librte_eal/common/include/rte_bitops.h
+F: app/test/test_bitops.c
+
 MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/common/include/generic/rte_mcslock.h
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index dff496b..ade7c01 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -133,12 +133,13 @@ The public API headers are grouped by topics:
   [BPF]                (@ref rte_bpf.h)
 
 - **containers**:
+  [bitmap]             (@ref rte_bitmap.h),
+  [bitops]             (@ref rte_bitops.h),
   [mbuf]               (@ref rte_mbuf.h),
   [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
   [ring]               (@ref rte_ring.h),
   [stack]              (@ref rte_stack.h),
-  [tailq]              (@ref rte_tailq.h),
-  [bitmap]             (@ref rte_bitmap.h)
+  [tailq]              (@ref rte_tailq.h)
 
 - **packet framework**:
   * [port]             (@ref rte_port.h):
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index c2c6d92..dd025c1 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_bitops.h b/lib/librte_eal/common/include/rte_bitops.h
new file mode 100644
index 0000000..34158d1
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitops.h
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a API for bit operations without/with memory ordering.
+ */
+
+#include <stdint.h>
+#include <rte_debug.h>
+#include <rte_compat.h>
+
+/*---------------------------- 32 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/*---------------------------- 64 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 2b97715..766edbd 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -50,9 +50,10 @@ common_objs += eal_common_arch_objs
 
 common_headers = files(
 	'include/rte_alarm.h',
+	'include/rte_bitmap.h',
+	'include/rte_bitops.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
-	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
 	'include/rte_compat.h',
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v6 2/6] test/bitops: add bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (35 preceding siblings ...)
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-12-18  6:00 ` Joyce Kong
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (31 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-12-18  6:00 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Add test cases for set bit, clear bit, test and set bit,
test and clear bit operations.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 app/test/Makefile         |   1 +
 app/test/autotest_data.py |   6 +
 app/test/meson.build      |   2 +
 app/test/test_bitops.c    | 305 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 314 insertions(+)
 create mode 100644 app/test/test_bitops.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 57930c0..4f33274 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 6deb97b..7db2df1 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -405,6 +405,12 @@
         "Report":  None,
     },
     {
+        "Name":    "Bitops test",
+        "Command": "bitops_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
+    {
         "Name":    "Hash multiwriter autotest",
         "Command": "hash_multiwriter_autotest",
         "Func":    default_autotest,
diff --git a/app/test/meson.build b/app/test/meson.build
index fb49d80..ef340ca 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -14,6 +14,7 @@ test_sources = files('commands.c',
 	'test_atomic.c',
 	'test_barrier.c',
 	'test_bitratestats.c',
+	'test_bitops.c',
 	'test_bpf.c',
 	'test_byteorder.c',
 	'test_cmdline.c',
@@ -167,6 +168,7 @@ fast_test_names = [
         'alarm_autotest',
         'atomic_autotest',
         'byteorder_autotest',
+        'bitops_autotest',
         'cmdline_autotest',
         'common_autotest',
         'cpuflags_autotest',
diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
new file mode 100644
index 0000000..2a648aa
--- /dev/null
+++ b/app/test/test_bitops.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_bitops.h>
+#include <rte_launch.h>
+#include "test.h"
+
+uint32_t val32;
+uint64_t val64;
+unsigned int synchro;
+unsigned int count32;
+unsigned int count64;
+
+#define MAX_BITS_32 32
+#define MAX_BITS_64 64
+/*
+ * Bitops functions
+ * ================
+ *
+ * - The main test function performs several subtests.
+ * - For relaxed version, check bit operations on one core.
+ *   - Initialize valXX to specified values, then set each bit of valXX
+ *     to 1 one by one in "test_bitops_set_relaxed".
+ *
+ *   - Clear each bit of valXX to 0 one by one in "test_bitops_clear_relaxed".
+ *
+ *   - Function "test_bitops_test_set_clear_relaxed" checks whether each bit
+ *     of valXX can do "test and set" and "test and clear" correctly.
+ *
+ * - For C11 atomic barrier version, check bit operations on multi cores.
+ *   - Per bit of valXX is set to 1, then cleared to 0 on each core in
+ *     "test_bitops_set_clear". The function checks that once all lcores finish
+ *     their set_clear, the value of valXX would still be zero.
+ *
+ *   - The cores are waiting for a synchro which is triggered by the main test
+ *     function. Then all cores would do "rte_test_and_set_bitXX" or
+ *     "rte_test_and_clear_bitXX" at the same time, "countXX" which is checked
+ *     as the result later would inc by one or not according to the original
+ *     bit value.
+ *
+ */
+
+static int
+test_bitops_set_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_test_set_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_test_and_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_clear_bit32_relaxed(i, &val32)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_test_and_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_clear_bit64_relaxed(i, &val64)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_set_clear(__attribute__((unused)) void *arg)
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32(i, &val32);
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32(i, &val32);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64(i, &val64);
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64(i, &val64);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then set it to 1.
+ * This functions checks that if the target bit is equal to 0, set it to 1 and
+ * increase the variable of "countXX" by one. If it is equal to 1, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_set(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_set_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_set_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then clear it to 0.
+ * This functions checks that if the target bit is equal to 1, clear it to 0 and
+ * increase the variable of "countXX" by one. If it is equal to 0, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_clear(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_test_and_clear_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_test_and_clear_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops(void)
+{
+	__atomic_store_n(&val32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&val64, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&synchro, 0,  __ATOMIC_RELAXED);
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	if (test_bitops_set_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_test_set_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+
+	rte_eal_mp_remote_launch(test_bitops_set_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32(i, &val32)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64(i, &val64)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_set_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 0, set it to 1 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 1, still set
+	 * it to 1 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 0.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_set_bitXX" correctly.
+	 */
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_set, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_clear_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 1, clear it to 0 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 0, still clear
+	 * it to 0 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 1.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_clear_bitXX" correctly.
+	 */
+
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(bitops_autotest, test_bitops);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v6 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (36 preceding siblings ...)
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 2/6] test/bitops: add bit operation test case Joyce Kong
@ 2019-12-18  6:00 ` Joyce Kong
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 4/6] net/bnx2x: " Joyce Kong
                   ` (30 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-12-18  6:00 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_ethdev.h |  2 +-
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 4 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f1..9cabda8 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e..c3744bb 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit32(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit32(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_get_bit32(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit32(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit32(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit32(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit32(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_ethdev.h b/drivers/net/axgbe/axgbe_ethdev.h
index e3cfaf3..5e2c7e4 100644
--- a/drivers/net/axgbe/axgbe_ethdev.h
+++ b/drivers/net/axgbe/axgbe_ethdev.h
@@ -464,7 +464,7 @@ struct axgbe_port {
 	unsigned int xpcs_window_mask;
 
 	/* Flags representing axgbe_state */
-	unsigned long dev_state;
+	uint32_t dev_state;
 
 	struct axgbe_hw_if hw_if;
 	struct axgbe_phy_if phy_if;
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5c..af7a1ec 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit32(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit32(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit32(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_get_bit32(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_get_bit32(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit32(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_get_bit32(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v6 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (37 preceding siblings ...)
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-12-18  6:00 ` " Joyce Kong
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 5/6] net/qede: " Joyce Kong
                   ` (29 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-12-18  6:00 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 232 +++++++++++++++++++------------------------
 drivers/net/bnx2x/bnx2x.h    |  10 +-
 drivers/net/bnx2x/ecore_sp.h |  47 ++++-----
 3 files changed, 131 insertions(+), 158 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index ed31335..9760d94 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <zlib.h>
 #include <rte_string_fns.h>
+#include <rte_bitops.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
 #define BNX2X_PMD_VERSION_MAJOR 1
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1422,16 +1397,16 @@ static int
 bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 		 int mac_type, uint8_t wait_for_comp)
 {
-	unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
+	uint32_t ramrod_flags = 0, vlan_mac_flags = 0;
 	int rc;
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit32(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1442,8 +1417,7 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 static int
 bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
-			unsigned long *rx_accept_flags,
-			unsigned long *tx_accept_flags)
+			uint32_t *rx_accept_flags, uint32_t *tx_accept_flags)
 {
 	/* Clear the flags first */
 	*rx_accept_flags = 0;
@@ -1458,26 +1432,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1462,20 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit32(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit32(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1487,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1517,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit32(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1561,8 +1536,8 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 
 int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 {
-	unsigned long rx_mode_flags = 0, ramrod_flags = 0;
-	unsigned long rx_accept_flags = 0, tx_accept_flags = 0;
+	uint32_t rx_mode_flags = 0, ramrod_flags = 0;
+	uint32_t rx_accept_flags = 0, tx_accept_flags = 0;
 	int rc;
 
 	rc = bnx2x_fill_accept_flags(sc, sc->rx_mode, &rx_accept_flags,
@@ -1571,9 +1546,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32(RAMROD_RX, &ramrod_flags);
+	rte_set_bit32(RAMROD_TX, &ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1673,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit32(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1697,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1758,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1772,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit32(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1784,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,11 +1841,10 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_get_bit32(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state))
+		rte_set_bit32(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1952,7 +1926,7 @@ static void bnx2x_disable_close_the_gate(struct bnx2x_softc *sc)
  */
 static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 {
-	unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
+	uint32_t ramrod_flags = 0, vlan_mac_flags = 0;
 	struct ecore_mcast_ramrod_params rparam = { NULL };
 	struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj;
 	int rc;
@@ -1960,12 +1934,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit32(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit32(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1948,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit32(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1958,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit32(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4282,13 +4256,13 @@ static void bnx2x_handle_mcast_eqe(struct bnx2x_softc *sc)
 static void
 bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *elem)
 {
-	unsigned long ramrod_flags = 0;
+	uint32_t ramrod_flags = 0;
 	int rc = 0;
 	uint32_t cid = elem->message.data.eth_event.echo & BNX2X_SWCID_MASK;
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit32(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,12 +4293,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit32(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit32(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4693,7 +4667,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4941,7 +4915,7 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 {
 	struct bnx2x_fastpath *fp = &sc->fp[idx];
 	uint32_t cids[ECORE_MULTI_TX_COS] = { 0 };
-	unsigned long q_type = 0;
+	uint32_t q_type = 0;
 	int cos;
 
 	fp->sc = sc;
@@ -4988,8 +4962,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit32(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit32(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5777,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6353,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6414,10 +6388,10 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 static unsigned long
 bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 {
-	unsigned long flags = 0;
+	uint32_t flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit32(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6399,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit32(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit32(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,28 +6409,28 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit32(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit32(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
 
 static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 {
-	unsigned long flags = 0;
+	uint32_t flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit32(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit32(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit32(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit32(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6551,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6619,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit32(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit32(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6647,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit32(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6718,7 +6692,7 @@ static int bnx2x_init_rss_pf(struct bnx2x_softc *sc)
 static int
 bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 		struct ecore_vlan_mac_obj *obj, uint8_t set, int mac_type,
-		unsigned long *ramrod_flags)
+		uint32_t *ramrod_flags)
 {
 	struct ecore_vlan_mac_ramrod_params ramrod_param;
 	int rc;
@@ -6730,11 +6704,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_get_bit32(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit32(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6757,11 +6731,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 
 static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 {
-	unsigned long ramrod_flags = 0;
+	uint32_t ramrod_flags = 0;
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6867,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit32(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6905,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_get_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_get_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6922,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_get_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_test_and_clear_bit32(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6944,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_get_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_get_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_get_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_get_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_get_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_get_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 3383c76..4e71c75 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -997,8 +997,8 @@ struct bnx2x_sp_objs {
  * link parameters twice.
  */
 struct bnx2x_link_report_data {
-	uint16_t      line_speed;        /* Effective line speed */
-	unsigned long link_report_flags; /* BNX2X_LINK_REPORT_XXX flags */
+	uint16_t line_speed;        /* Effective line speed */
+	uint32_t link_report_flags; /* BNX2X_LINK_REPORT_XXX flags */
 };
 
 enum {
@@ -1229,7 +1229,7 @@ struct bnx2x_softc {
 	/* slow path */
 	struct bnx2x_dma      sp_dma;
 	struct bnx2x_slowpath *sp;
-	unsigned long       sp_state;
+	uint32_t	    sp_state;
 
 	/* slow path queue */
 	struct bnx2x_dma spq_dma;
@@ -1809,10 +1809,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db37..ce869f8 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_get_bit32(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit32(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit32(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_test_and_clear_bit32(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
@@ -538,7 +539,7 @@ struct ecore_vlan_mac_data {
 	/* used to contain the data related vlan_mac_flags bits from
 	 * ramrod parameters.
 	 */
-	unsigned long vlan_mac_flags;
+	uint32_t vlan_mac_flags;
 
 	/* Needed for MOVE command */
 	struct ecore_vlan_mac_obj *target_obj;
@@ -688,7 +689,7 @@ struct ecore_vlan_mac_ramrod_params {
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* General command flags: COMP_WAIT, etc. */
-	unsigned long ramrod_flags;
+	uint32_t ramrod_flags;
 
 	/* Command specific configuration request */
 	struct ecore_vlan_mac_data user_req;
@@ -928,7 +929,7 @@ struct ecore_mcast_ramrod_params {
 	struct ecore_mcast_obj *mcast_obj;
 
 	/* Relevant options are RAMROD_COMP_WAIT and RAMROD_DRV_CLR_ONLY */
-	unsigned long ramrod_flags;
+	uint32_t ramrod_flags;
 
 	ecore_list_t mcast_list; /* list of struct ecore_mcast_list_elem */
 	/** TODO:
@@ -1144,22 +1145,22 @@ struct ecore_config_rss_params {
 	struct ecore_rss_config_obj *rss_obj;
 
 	/* may have RAMROD_COMP_WAIT set only */
-	unsigned long	ramrod_flags;
+	uint32_t ramrod_flags;
 
 	/* ECORE_RSS_X bits */
-	unsigned long	rss_flags;
+	uint32_t rss_flags;
 
 	/* Number hash bits to take into an account */
-	uint8_t		rss_result_mask;
+	uint8_t	 rss_result_mask;
 
 	/* Indirection table */
-	uint8_t		ind_table[T_ETH_INDIRECTION_TABLE_SIZE];
+	uint8_t	 ind_table[T_ETH_INDIRECTION_TABLE_SIZE];
 
 	/* RSS hash values */
-	uint32_t		rss_key[10];
+	uint32_t rss_key[10];
 
 	/* valid only if ECORE_RSS_UPDATE_TOE is set */
-	uint16_t		toe_rss_bitmap;
+	uint16_t toe_rss_bitmap;
 };
 
 struct ecore_rss_config_obj {
@@ -1290,17 +1291,17 @@ enum ecore_q_type {
 
 struct ecore_queue_init_params {
 	struct {
-		unsigned long	flags;
-		uint16_t		hc_rate;
-		uint8_t		fw_sb_id;
-		uint8_t		sb_cq_index;
+		uint32_t flags;
+		uint16_t hc_rate;
+		uint8_t	 fw_sb_id;
+		uint8_t	 sb_cq_index;
 	} tx;
 
 	struct {
-		unsigned long	flags;
-		uint16_t		hc_rate;
-		uint8_t		fw_sb_id;
-		uint8_t		sb_cq_index;
+		uint32_t flags;
+		uint16_t hc_rate;
+		uint8_t	 fw_sb_id;
+		uint8_t	 sb_cq_index;
 	} rx;
 
 	/* CID context in the host memory */
@@ -1440,7 +1441,7 @@ struct ecore_queue_state_params {
 	enum ecore_queue_cmd cmd;
 
 	/* may have RAMROD_COMP_WAIT set only */
-	unsigned long ramrod_flags;
+	uint32_t ramrod_flags;
 
 	/* Params according to the current command */
 	union {
@@ -1704,7 +1705,7 @@ struct ecore_func_state_params {
 	enum ecore_func_cmd cmd;
 
 	/* may have RAMROD_COMP_WAIT set only */
-	unsigned long	ramrod_flags;
+	uint32_t ramrod_flags;
 
 	/* Params according to the current command */
 	union {
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v6 5/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (38 preceding siblings ...)
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 4/6] net/bnx2x: " Joyce Kong
@ 2019-12-18  6:00 ` " Joyce Kong
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 6/6] net/hinic: " Joyce Kong
                   ` (28 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-12-18  6:00 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c          | 22 +-----------------
 drivers/net/qede/base/bcm_osal.h          | 14 +++++-------
 drivers/net/qede/base/ecore.h             |  6 ++---
 drivers/net/qede/base/ecore_cxt.c         |  6 ++---
 drivers/net/qede/base/ecore_dcbx.c        |  8 +++----
 drivers/net/qede/base/ecore_dev.c         | 38 +++++++++++++++----------------
 drivers/net/qede/base/ecore_dev_api.h     |  2 +-
 drivers/net/qede/base/ecore_l2.c          |  6 ++---
 drivers/net/qede/base/ecore_mcp.c         |  4 ++--
 drivers/net/qede/base/ecore_sp_commands.c | 12 +++++-----
 drivers/net/qede/base/ecore_spq.c         |  2 +-
 drivers/net/qede/base/ecore_spq.h         | 10 ++++----
 drivers/net/qede/qede_main.c              |  4 ++--
 13 files changed, 56 insertions(+), 78 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 48d016e..54e5e4f 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -46,26 +46,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
@@ -95,7 +75,7 @@ static inline u32 qede_ffz(unsigned long word)
 	return first_zero ? (first_zero - 1) : OSAL_BITS_PER_UL;
 }
 
-inline u32 qede_find_first_zero_bit(unsigned long *addr, u32 limit)
+inline u32 qede_find_first_zero_bit(u32 *addr, u32 limit)
 {
 	u32 i;
 	u32 nwords = 0;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 0f09557..023ca06 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,23 +312,20 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit32(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit32(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
-#define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+#define OSAL_GET_BIT(bit, bitmap) \
+	rte_get_bit32(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
 	qede_find_first_bit(bitmap, length)
 
-u32 qede_find_first_zero_bit(unsigned long *, u32);
+u32 qede_find_first_zero_bit(u32 *addr, u32 limit);
 #define OSAL_FIND_FIRST_ZERO_BIT(bitmap, length) \
 	qede_find_first_zero_bit(bitmap, length)
 
diff --git a/drivers/net/qede/base/ecore.h b/drivers/net/qede/base/ecore.h
index b2077bc..498bb6f 100644
--- a/drivers/net/qede/base/ecore.h
+++ b/drivers/net/qede/base/ecore.h
@@ -422,8 +422,8 @@ struct ecore_hw_info {
 	u8 max_chains_per_vf;
 
 	u32 port_mode;
-	u32	hw_mode;
-	unsigned long device_capabilities;
+	u32 hw_mode;
+	u32 device_capabilities;
 
 	/* Default DCBX mode */
 	u8 dcbx_mode;
@@ -807,7 +807,7 @@ struct ecore_dev {
 
 	u8				path_id;
 
-	unsigned long			mf_bits;
+	u32				mf_bits;
 	enum ecore_mf_mode		mf_mode;
 #define IS_MF_DEFAULT(_p_hwfn)	\
 	(((_p_hwfn)->p_dev)->mf_mode == ECORE_MF_DEFAULT)
diff --git a/drivers/net/qede/base/ecore_cxt.c b/drivers/net/qede/base/ecore_cxt.c
index 773b75e..dda47ea 100644
--- a/drivers/net/qede/base/ecore_cxt.c
+++ b/drivers/net/qede/base/ecore_cxt.c
@@ -154,7 +154,7 @@ struct ecore_ilt_client_cfg {
 struct ecore_cid_acquired_map {
 	u32 start_cid;
 	u32 max_count;
-	unsigned long *cid_map;
+	u32 *cid_map;
 };
 
 struct ecore_src_t2 {
@@ -1991,7 +1991,7 @@ static bool ecore_cxt_test_cid_acquired(struct ecore_hwfn *p_hwfn,
 	}
 
 	rel_cid = cid - (*pp_map)->start_cid;
-	if (!OSAL_TEST_BIT(rel_cid, (*pp_map)->cid_map)) {
+	if (!OSAL_GET_BIT(rel_cid, (*pp_map)->cid_map)) {
 		DP_NOTICE(p_hwfn, true,
 			  "CID %d [vifd %02x] not acquired", cid, vfid);
 		goto fail;
@@ -2102,7 +2102,7 @@ enum _ecore_status_t ecore_cxt_set_pf_params(struct ecore_hwfn *p_hwfn)
 
 		count = p_params->num_arfs_filters;
 
-		if (!OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS,
+		if (!OSAL_GET_BIT(ECORE_MF_DISABLE_ARFS,
 				   &p_hwfn->p_dev->mf_bits))
 			p_hwfn->p_cxt_mngr->arfs_count = count;
 
diff --git a/drivers/net/qede/base/ecore_dcbx.c b/drivers/net/qede/base/ecore_dcbx.c
index ccd4383..31234f1 100644
--- a/drivers/net/qede/base/ecore_dcbx.c
+++ b/drivers/net/qede/base/ecore_dcbx.c
@@ -148,7 +148,7 @@ ecore_dcbx_set_params(struct ecore_dcbx_results *p_data,
 	p_data->arr[type].update = UPDATE_DCB_DSCP;
 
 	/* Do not add valn tag 0 when DCB is enabled and port is in UFP mode */
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
 		p_data->arr[type].dont_add_vlan0 = true;
 
 	/* QM reconf data */
@@ -156,8 +156,8 @@ ecore_dcbx_set_params(struct ecore_dcbx_results *p_data,
 		p_hwfn->hw_info.offload_tc = tc;
 
 	/* Configure dcbx vlan priority in doorbell block for roce EDPM */
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits) &&
-	    (type == DCBX_PROTOCOL_ROCE)) {
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits) &&
+	    type == DCBX_PROTOCOL_ROCE) {
 		ecore_wr(p_hwfn, p_ptt, DORQ_REG_TAG1_OVRD_MODE, 1);
 		ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_PCP, prio << 1);
 	}
@@ -293,7 +293,7 @@ ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	}
 
 	/* If Eth TLV is not detected, use UFP TC as default TC */
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC,
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC,
 			  &p_hwfn->p_dev->mf_bits) && !eth_tlv)
 		p_data->arr[DCBX_PROTOCOL_ETH].tc = p_hwfn->ufp_info.tc;
 
diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index 9d1db14..e292299 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -805,7 +805,7 @@ static enum _ecore_status_t ecore_llh_hw_init_pf(struct ecore_hwfn *p_hwfn,
 		ecore_wr(p_hwfn, p_ptt, addr, p_hwfn->rel_pf_id);
 	}
 
-	if (OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
+	if (OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
 	    !ECORE_IS_FCOE_PERSONALITY(p_hwfn)) {
 		rc = ecore_llh_add_mac_filter(p_dev, 0,
 					      p_hwfn->hw_info.hw_mac_addr);
@@ -1044,7 +1044,7 @@ ecore_llh_add_filter(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	filter_details.enable = 1;
 	filter_details.value = ((u64)high << 32) | low;
 	filter_details.hdr_sel =
-		OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits) ?
+		OSAL_GET_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits) ?
 		1 : /* inner/encapsulated header */
 		0;  /* outer/tunnel header */
 	filter_details.protocol_type = filter_prot_type;
@@ -1083,7 +1083,7 @@ enum _ecore_status_t ecore_llh_add_mac_filter(struct ecore_dev *p_dev, u8 ppfid,
 	if (p_ptt == OSAL_NULL)
 		return ECORE_AGAIN;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	OSAL_MEM_ZERO(&filter, sizeof(filter));
@@ -1220,7 +1220,7 @@ ecore_llh_add_protocol_filter(struct ecore_dev *p_dev, u8 ppfid,
 	if (p_ptt == OSAL_NULL)
 		return ECORE_AGAIN;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	rc = ecore_llh_protocol_filter_stringify(p_dev, type,
@@ -1287,7 +1287,7 @@ void ecore_llh_remove_mac_filter(struct ecore_dev *p_dev, u8 ppfid,
 	if (p_ptt == OSAL_NULL)
 		return;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	OSAL_MEM_ZERO(&filter, sizeof(filter));
@@ -1342,7 +1342,7 @@ void ecore_llh_remove_protocol_filter(struct ecore_dev *p_dev, u8 ppfid,
 	if (p_ptt == OSAL_NULL)
 		return;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	rc = ecore_llh_protocol_filter_stringify(p_dev, type,
@@ -1396,8 +1396,8 @@ void ecore_llh_clear_ppfid_filters(struct ecore_dev *p_dev, u8 ppfid)
 	if (p_ptt == OSAL_NULL)
 		return;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
-	    !OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
+	    !OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
@@ -1423,8 +1423,8 @@ void ecore_llh_clear_all_filters(struct ecore_dev *p_dev)
 {
 	u8 ppfid;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
-	    !OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
+	    !OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
 		return;
 
 	for (ppfid = 0; ppfid < p_dev->p_llh_info->num_ppfid; ppfid++)
@@ -2674,7 +2674,7 @@ static enum _ecore_status_t ecore_calc_hw_mode(struct ecore_hwfn *p_hwfn)
 		return ECORE_INVAL;
 	}
 
-	if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
 		hw_mode |= 1 << MODE_MF_SD;
 	else
 		hw_mode |= 1 << MODE_MF_SI;
@@ -3382,7 +3382,7 @@ static enum _ecore_status_t ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
 		 * The ppfid should be set in the vector, except in BB which has
 		 * a bug in the LLH where the ppfid is actually engine based.
 		 */
-		if (OSAL_TEST_BIT(ECORE_MF_NEED_DEF_PF, &p_dev->mf_bits)) {
+		if (OSAL_GET_BIT(ECORE_MF_NEED_DEF_PF, &p_dev->mf_bits)) {
 			u8 pf_id = p_hwfn->rel_pf_id;
 
 			if (!ECORE_IS_BB(p_dev))
@@ -3715,11 +3715,11 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 		if (rc != ECORE_SUCCESS)
 			return rc;
 
-		if (IS_PF(p_dev) && (OSAL_TEST_BIT(ECORE_MF_8021Q_TAGGING,
+		if (IS_PF(p_dev) && (OSAL_GET_BIT(ECORE_MF_8021Q_TAGGING,
 						   &p_dev->mf_bits) ||
-				     OSAL_TEST_BIT(ECORE_MF_8021AD_TAGGING,
+				     OSAL_GET_BIT(ECORE_MF_8021AD_TAGGING,
 						   &p_dev->mf_bits))) {
-			if (OSAL_TEST_BIT(ECORE_MF_8021Q_TAGGING,
+			if (OSAL_GET_BIT(ECORE_MF_8021Q_TAGGING,
 					  &p_dev->mf_bits))
 				ether_type = ETHER_TYPE_VLAN;
 			else
@@ -4119,7 +4119,7 @@ enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev)
 		OSAL_MSLEEP(1);
 
 		if (IS_LEAD_HWFN(p_hwfn) &&
-		    OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
+		    OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
 		    !ECORE_IS_FCOE_PERSONALITY(p_hwfn))
 			ecore_llh_remove_mac_filter(p_dev, 0,
 						   p_hwfn->hw_info.hw_mac_addr);
@@ -5113,7 +5113,7 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 			p_hwfn->p_dev->mf_bits |= 1 << ECORE_MF_NEED_DEF_PF;
 		break;
 	}
-	DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n",
+	DP_INFO(p_hwfn, "Multi function mode is 0x%x\n",
 		p_hwfn->p_dev->mf_bits);
 
 	if (ECORE_IS_CMT(p_hwfn->p_dev))
@@ -6202,7 +6202,7 @@ enum _ecore_status_t
 ecore_llh_set_function_as_default(struct ecore_hwfn *p_hwfn,
 				  struct ecore_ptt *p_ptt)
 {
-	if (OSAL_TEST_BIT(ECORE_MF_NEED_DEF_PF, &p_hwfn->p_dev->mf_bits)) {
+	if (OSAL_GET_BIT(ECORE_MF_NEED_DEF_PF, &p_hwfn->p_dev->mf_bits)) {
 		ecore_wr(p_hwfn, p_ptt,
 			 NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR,
 			 1 << p_hwfn->abs_pf_id / 2);
@@ -6779,5 +6779,5 @@ void ecore_set_fw_mac_addr(__le16 *fw_msb,
 
 bool ecore_is_mf_fip_special(struct ecore_dev *p_dev)
 {
-	return !!OSAL_TEST_BIT(ECORE_MF_FIP_SPECIAL, &p_dev->mf_bits);
+	return !!OSAL_GET_BIT(ECORE_MF_FIP_SPECIAL, &p_dev->mf_bits);
 }
diff --git a/drivers/net/qede/base/ecore_dev_api.h b/drivers/net/qede/base/ecore_dev_api.h
index 4d5cc1a..83cfcf7 100644
--- a/drivers/net/qede/base/ecore_dev_api.h
+++ b/drivers/net/qede/base/ecore_dev_api.h
@@ -212,7 +212,7 @@ enum _ecore_status_t ecore_db_recovery_del(struct ecore_dev *p_dev,
 
 static OSAL_INLINE bool ecore_is_mf_ufp(struct ecore_hwfn *p_hwfn)
 {
-	return !!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits);
+	return !!OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits);
 }
 
 #endif
diff --git a/drivers/net/qede/base/ecore_l2.c b/drivers/net/qede/base/ecore_l2.c
index b20d837..af234de 100644
--- a/drivers/net/qede/base/ecore_l2.c
+++ b/drivers/net/qede/base/ecore_l2.c
@@ -29,7 +29,7 @@
 
 struct ecore_l2_info {
 	u32 queues;
-	unsigned long **pp_qid_usage;
+	u32 **pp_qid_usage;
 
 	/* The lock is meant to synchronize access to the qid usage */
 	osal_mutex_t lock;
@@ -38,7 +38,7 @@ struct ecore_l2_info {
 enum _ecore_status_t ecore_l2_alloc(struct ecore_hwfn *p_hwfn)
 {
 	struct ecore_l2_info *p_l2_info;
-	unsigned long **pp_qids;
+	u32 **pp_qids;
 	u32 i;
 
 	if (!ECORE_IS_L2_PERSONALITY(p_hwfn))
@@ -2116,7 +2116,7 @@ void ecore_arfs_mode_configure(struct ecore_hwfn *p_hwfn,
 			       struct ecore_ptt *p_ptt,
 			       struct ecore_arfs_config_params *p_cfg_params)
 {
-	if (OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_DISABLE_ARFS, &p_hwfn->p_dev->mf_bits))
 		return;
 
 	if (p_cfg_params->mode != ECORE_FILTER_CONFIG_MODE_DISABLE) {
diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c
index 7518765..a748596 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -1732,7 +1732,7 @@ static void ecore_mcp_update_stag(struct ecore_hwfn *p_hwfn,
 	p_hwfn->mcp_info->func_info.ovlan = (u16)shmem_info.ovlan_stag &
 						 FUNC_MF_CFG_OV_STAG_MASK;
 	p_hwfn->hw_info.ovlan = p_hwfn->mcp_info->func_info.ovlan;
-	if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits)) {
+	if (OSAL_GET_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits)) {
 		if (p_hwfn->hw_info.ovlan != ECORE_MCP_VLAN_UNSET) {
 			ecore_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_VALUE,
 				 p_hwfn->hw_info.ovlan);
@@ -2026,7 +2026,7 @@ ecore_mcp_read_ufp_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 	struct public_func shmem_info;
 	u32 port_cfg, val;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
 		return;
 
 	OSAL_MEMSET(&p_hwfn->ufp_info, 0, sizeof(p_hwfn->ufp_info));
diff --git a/drivers/net/qede/base/ecore_sp_commands.c b/drivers/net/qede/base/ecore_sp_commands.c
index 9860a62..44ced13 100644
--- a/drivers/net/qede/base/ecore_sp_commands.c
+++ b/drivers/net/qede/base/ecore_sp_commands.c
@@ -335,16 +335,16 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	p_ramrod->dont_log_ramrods = 0;
 	p_ramrod->log_type_mask = OSAL_CPU_TO_LE16(0x8f);
 
-	if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
 		p_ramrod->mf_mode = MF_OVLAN;
 	else
 		p_ramrod->mf_mode = MF_NPAR;
 
 	p_ramrod->outer_tag_config.outer_tag.tci =
 		OSAL_CPU_TO_LE16(p_hwfn->hw_info.ovlan);
-	if (OSAL_TEST_BIT(ECORE_MF_8021Q_TAGGING, &p_hwfn->p_dev->mf_bits)) {
+	if (OSAL_GET_BIT(ECORE_MF_8021Q_TAGGING, &p_hwfn->p_dev->mf_bits)) {
 		p_ramrod->outer_tag_config.outer_tag.tpid = ETH_P_8021Q;
-	} else if (OSAL_TEST_BIT(ECORE_MF_8021AD_TAGGING,
+	} else if (OSAL_GET_BIT(ECORE_MF_8021AD_TAGGING,
 		 &p_hwfn->p_dev->mf_bits)) {
 		p_ramrod->outer_tag_config.outer_tag.tpid = ETH_P_8021AD;
 		p_ramrod->outer_tag_config.enable_stag_pri_change = 1;
@@ -357,7 +357,7 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	/* enable_stag_pri_change should be set if port is in BD mode or,
 	 * UFP with Host Control mode.
 	 */
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits)) {
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits)) {
 		if (p_hwfn->ufp_info.pri_type == ECORE_UFP_PRI_OS)
 			p_ramrod->outer_tag_config.enable_stag_pri_change = 1;
 		else
@@ -378,7 +378,7 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	ecore_tunn_set_pf_start_params(p_hwfn, p_tunn,
 				       &p_ramrod->tunnel_config);
 
-	if (OSAL_TEST_BIT(ECORE_MF_INTER_PF_SWITCH,
+	if (OSAL_GET_BIT(ECORE_MF_INTER_PF_SWITCH,
 			  &p_hwfn->p_dev->mf_bits))
 		p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch;
 
@@ -638,7 +638,7 @@ enum _ecore_status_t ecore_sp_heartbeat_ramrod(struct ecore_hwfn *p_hwfn)
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
 		p_ent->ramrod.pf_update.mf_vlan |=
 			OSAL_CPU_TO_LE16(((u16)p_hwfn->ufp_info.tc << 13));
 
diff --git a/drivers/net/qede/base/ecore_spq.c b/drivers/net/qede/base/ecore_spq.c
index 6c38682..02f6136 100644
--- a/drivers/net/qede/base/ecore_spq.c
+++ b/drivers/net/qede/base/ecore_spq.c
@@ -977,7 +977,7 @@ enum _ecore_status_t ecore_spq_completion(struct ecore_hwfn *p_hwfn,
 			 * for the first successive completed entries.
 			 */
 			SPQ_COMP_BMAP_SET_BIT(p_spq, echo);
-			while (SPQ_COMP_BMAP_TEST_BIT(p_spq,
+			while (SPQ_COMP_BMAP_GET_BIT(p_spq,
 						      p_spq->comp_bitmap_idx)) {
 				SPQ_COMP_BMAP_CLEAR_BIT(p_spq,
 							p_spq->comp_bitmap_idx);
diff --git a/drivers/net/qede/base/ecore_spq.h b/drivers/net/qede/base/ecore_spq.h
index 6142c39..0958e5a 100644
--- a/drivers/net/qede/base/ecore_spq.h
+++ b/drivers/net/qede/base/ecore_spq.h
@@ -121,17 +121,17 @@ struct ecore_spq {
 #define SPQ_RING_SIZE		\
 	(CORE_SPQE_PAGE_SIZE_BYTES / sizeof(struct slow_path_element))
 /* BITS_PER_LONG */
-#define SPQ_COMP_BMAP_SIZE	(SPQ_RING_SIZE / (sizeof(unsigned long) * 8))
-	unsigned long			p_comp_bitmap[SPQ_COMP_BMAP_SIZE];
-	u8				comp_bitmap_idx;
+#define SPQ_COMP_BMAP_SIZE	(SPQ_RING_SIZE / (sizeof(u32) * 8))
+	u32			p_comp_bitmap[SPQ_COMP_BMAP_SIZE];
+	u8			comp_bitmap_idx;
 #define SPQ_COMP_BMAP_SET_BIT(p_spq, idx)				\
 	(OSAL_SET_BIT(((idx) % SPQ_RING_SIZE), (p_spq)->p_comp_bitmap))
 
 #define SPQ_COMP_BMAP_CLEAR_BIT(p_spq, idx)				\
 	(OSAL_CLEAR_BIT(((idx) % SPQ_RING_SIZE), (p_spq)->p_comp_bitmap))
 
-#define SPQ_COMP_BMAP_TEST_BIT(p_spq, idx)	\
-	(OSAL_TEST_BIT(((idx) % SPQ_RING_SIZE), (p_spq)->p_comp_bitmap))
+#define SPQ_COMP_BMAP_GET_BIT(p_spq, idx)	\
+	(OSAL_GET_BIT(((idx) % SPQ_RING_SIZE), (p_spq)->p_comp_bitmap))
 
 	/* Statistics */
 	u32				unlimited_pending_count;
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 4eb79d0..c5b909e 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -378,8 +378,8 @@ qed_fill_dev_info(struct ecore_dev *edev, struct qed_dev_info *dev_info)
 
 	if (IS_PF(edev)) {
 		dev_info->b_inter_pf_switch =
-			OSAL_TEST_BIT(ECORE_MF_INTER_PF_SWITCH, &edev->mf_bits);
-		if (!OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS, &edev->mf_bits))
+			OSAL_GET_BIT(ECORE_MF_INTER_PF_SWITCH, &edev->mf_bits);
+		if (!OSAL_GET_BIT(ECORE_MF_DISABLE_ARFS, &edev->mf_bits))
 			dev_info->b_arfs_capable = true;
 		dev_info->tx_switching = false;
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v6 6/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (39 preceding siblings ...)
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 5/6] net/qede: " Joyce Kong
@ 2019-12-18  6:00 ` " Joyce Kong
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (27 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2019-12-18  6:00 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/hinic/Makefile            |  1 +
 drivers/net/hinic/base/hinic_compat.h | 33 +--------------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++++--------
 drivers/net/hinic/hinic_pmd_ethdev.h  |  2 +-
 drivers/net/hinic/meson.build         |  2 ++
 5 files changed, 13 insertions(+), 41 deletions(-)

diff --git a/drivers/net/hinic/Makefile b/drivers/net/hinic/Makefile
index 87fd843..f087baa 100644
--- a/drivers/net/hinic/Makefile
+++ b/drivers/net/hinic/Makefile
@@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_hinic.a
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 
 ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index e4a7f12..c0a0b3e 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,38 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	res = ((*addr) & (1UL << nr)) != 0;
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index 803a39e..17f235f 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -269,7 +269,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_get_bit32(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -1076,7 +1076,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit32(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1201,7 +1201,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit32(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1246,7 +1246,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit32(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2845,7 +2845,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit32(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -3042,7 +3042,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit32(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* initialize filter info */
 	filter_info = &nic_dev->filter;
@@ -3054,7 +3054,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 	TAILQ_INIT(&nic_dev->filter_fdir_rule_list);
 	TAILQ_INIT(&nic_dev->hinic_flow_list);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit32(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -3110,7 +3110,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit32(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.h b/drivers/net/hinic/hinic_pmd_ethdev.h
index 3e3f3b3..114f1df 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.h
+++ b/drivers/net/hinic/hinic_pmd_ethdev.h
@@ -171,7 +171,7 @@ struct hinic_nic_dev {
 	unsigned int flags;
 	struct nic_service_cap nic_cap;
 	u32 rx_mode_status;	/* promisc or allmulticast */
-	unsigned long dev_status;
+	u32 dev_status;
 
 	char proc_dev_name[HINIC_DEV_NAME_LEN];
 	/* PF0->COS4, PF1->COS5, PF2->COS6, PF3->COS7,
diff --git a/drivers/net/hinic/meson.build b/drivers/net/hinic/meson.build
index bc7e246..8c7ee9d 100644
--- a/drivers/net/hinic/meson.build
+++ b/drivers/net/hinic/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Huawei Technologies Co., Ltd
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 0/6] implement common rte bit operation APIs in PMDs
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-12-18  6:55   ` Gavin Hu
  2020-01-17 13:03   ` David Marchand
  1 sibling, 0 replies; 139+ messages in thread
From: Gavin Hu @ 2019-12-18  6:55 UTC (permalink / raw)
  To: Joyce Kong, thomas, stephen, david.marchand, mb, jerinj,
	bruce.richardson, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Honnappa Nagarahalli, Phil Yang,
	Stephen Hemminger, Morten Brørup
  Cc: nd, dev, nd

Hi Maintainers, 

This series of patches is to consolidate the rte bitops APIs(to reduce duplication) and aim for use by all PMDs. 
In this stage, a few of PMDs you maintained were piloted to stabilize the APIs.

Before expansion to all PMDs, could you please shout out your opinions? 
The APIs have already evolved over community feedback, have a look to know more the background.  

Best Regards,
Gavin

> -----Original Message-----
> From: Joyce Kong <joyce.kong@arm.com>
> Sent: Wednesday, December 18, 2019 2:00 PM
> To: thomas@monjalon.net; stephen@networkplumber.org;
> david.marchand@redhat.com; mb@smartsharesystems.com;
> jerinj@marvell.com; bruce.richardson@intel.com; ravi1.kumar@amd.com;
> rmody@marvell.com; shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Phil Yang
> <Phil.Yang@arm.com>; Gavin Hu <Gavin.Hu@arm.com>
> Cc: nd <nd@arm.com>; dev@dpdk.org
> Subject: [PATCH v6 0/6] implement common rte bit operation APIs in PMDs
> 
> There are a lot functions of bit operations scattered in PMDs, consolidate
> them into a common API family and applied in different PMDs to reduce
> code
> duplication.
> 
> v6:
>  Trim 'unsigned long' in PMDs down to 'uint32_t', as on mainstream 64-bit OS,
>  'unsigned long' is 64-bit in size, but the 32-bit OS expects 32-bit 'unsigned
>  long' argument.
> 
> v5:
>  Correct the spelling mistake in test_bitops.c
> 
> v4:
>   Introduce uint32_t/uint64_t *addr when definiting bit operation
> APIs(suggested by
>   Morten Brørup).
> 
> v3:
>   1. Change the API's head file back to rte_bitops.h, then implement both 32-
> bit and
>      64-bit operations with and without C11 atomic memory ordering.
>   2. Add multi-core test case for bit operations which implemented with
> memory ordering.
>   3. Modify the doc of both APIs and test cases.
> 
> v2:
>   1. Add doxygen comments for the rte bit operation API(suggested by
> Stephen Hemminger).
>   2. Add test cases for common rte bit operation API(suggested by Stephen
> Hemminger).
>   3. Change the header file to rte_io_bitops.h and the operation to
> rte_io_set_bit()etc.,
>      as the API uses barriers inside and the barriers are only needed for IO
> operations
>      (suggested by Jerin Jacob).
>   4. Use an well defined uint_NN_t type(suggested by Morten Brørup).
> 
> Joyce Kong (6):
>   lib/eal: implement the family of rte bit operation APIs
>   test/bitops: add bit operation test case
>   net/axgbe: use common rte bit operation APIs instead
>   net/bnx2x: use common rte bit operation APIs instead
>   net/qede: use common rte bit operation APIs instead
>   net/hinic: use common rte bit operation APIs instead
> 
>  MAINTAINERS                                |   5 +
>  app/test/Makefile                          |   1 +
>  app/test/autotest_data.py                  |   6 +
>  app/test/meson.build                       |   2 +
>  app/test/test_bitops.c                     | 305 +++++++++++++++++++
>  doc/api/doxy-api-index.md                  |   5 +-
>  drivers/net/axgbe/axgbe_common.h           |  29 +-
>  drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
>  drivers/net/axgbe/axgbe_ethdev.h           |   2 +-
>  drivers/net/axgbe/axgbe_mdio.c             |  14 +-
>  drivers/net/bnx2x/bnx2x.c                  | 232 +++++++-------
>  drivers/net/bnx2x/bnx2x.h                  |  10 +-
>  drivers/net/bnx2x/ecore_sp.h               |  47 +--
>  drivers/net/hinic/Makefile                 |   1 +
>  drivers/net/hinic/base/hinic_compat.h      |  33 +-
>  drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +-
>  drivers/net/hinic/hinic_pmd_ethdev.h       |   2 +-
>  drivers/net/hinic/meson.build              |   2 +
>  drivers/net/qede/base/bcm_osal.c           |  22 +-
>  drivers/net/qede/base/bcm_osal.h           |  14 +-
>  drivers/net/qede/base/ecore.h              |   6 +-
>  drivers/net/qede/base/ecore_cxt.c          |   6 +-
>  drivers/net/qede/base/ecore_dcbx.c         |   8 +-
>  drivers/net/qede/base/ecore_dev.c          |  38 +--
>  drivers/net/qede/base/ecore_dev_api.h      |   2 +-
>  drivers/net/qede/base/ecore_l2.c           |   6 +-
>  drivers/net/qede/base/ecore_mcp.c          |   4 +-
>  drivers/net/qede/base/ecore_sp_commands.c  |  12 +-
>  drivers/net/qede/base/ecore_spq.c          |   2 +-
>  drivers/net/qede/base/ecore_spq.h          |  10 +-
>  drivers/net/qede/qede_main.c               |   4 +-
>  lib/librte_eal/common/Makefile             |   1 +
>  lib/librte_eal/common/include/rte_bitops.h | 474
> +++++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build          |   3 +-
>  34 files changed, 1015 insertions(+), 323 deletions(-)
>  create mode 100644 app/test/test_bitops.c
>  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> 
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-12-20  6:52   ` Honnappa Nagarahalli
  2019-12-21 16:07     ` Honnappa Nagarahalli
  0 siblings, 1 reply; 139+ messages in thread
From: Honnappa Nagarahalli @ 2019-12-20  6:52 UTC (permalink / raw)
  To: Joyce Kong, thomas, stephen, david.marchand, mb, jerinj,
	bruce.richardson, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Phil Yang, Gavin Hu,
	Honnappa Nagarahalli
  Cc: nd, dev, nd

Hi Joyce,
	These APIs seem to be written considering the PMD requirements. Is there a need to expose these to applications (external to DPDK?).

> -----Original Message-----
> From: Joyce Kong <joyce.kong@arm.com>
> Sent: Wednesday, December 18, 2019 12:00 AM
> To: thomas@monjalon.net; stephen@networkplumber.org;
> david.marchand@redhat.com; mb@smartsharesystems.com;
> jerinj@marvell.com; bruce.richardson@intel.com; ravi1.kumar@amd.com;
> rmody@marvell.com; shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Phil Yang
> <Phil.Yang@arm.com>; Gavin Hu <Gavin.Hu@arm.com>
> Cc: nd <nd@arm.com>; dev@dpdk.org
> Subject: [PATCH v6 1/6] lib/eal: implement the family of rte bit operation
> APIs
> 
> There are a lot functions of bit operations scattered and duplicated in PMDs,
> consolidating them into a common API family is necessary. Furthermore,
> when the bit operation is applied to the IO devices, use __ATOMIC_ACQ_REL
> to ensure the ordering for io bit operation.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>  MAINTAINERS                                |   5 +
>  doc/api/doxy-api-index.md                  |   5 +-
>  lib/librte_eal/common/Makefile             |   1 +
>  lib/librte_eal/common/include/rte_bitops.h | 474
> +++++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build          |   3 +-
>  5 files changed, 485 insertions(+), 3 deletions(-)  create mode 100644
> lib/librte_eal/common/include/rte_bitops.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 4395d8d..d2a29a2 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -236,6 +236,11 @@ M: Cristian Dumitrescu
> <cristian.dumitrescu@intel.com>
>  F: lib/librte_eal/common/include/rte_bitmap.h
>  F: app/test/test_bitmap.c
> 
> +Bitops
> +M: Joyce Kong <joyce.kong@arm.com>
> +F: lib/librte_eal/common/include/rte_bitops.h
> +F: app/test/test_bitops.c
> +
>  MCSlock - EXPERIMENTAL
>  M: Phil Yang <phil.yang@arm.com>
>  F: lib/librte_eal/common/include/generic/rte_mcslock.h
> diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md index
> dff496b..ade7c01 100644
> --- a/doc/api/doxy-api-index.md
> +++ b/doc/api/doxy-api-index.md
> @@ -133,12 +133,13 @@ The public API headers are grouped by topics:
>    [BPF]                (@ref rte_bpf.h)
> 
>  - **containers**:
> +  [bitmap]             (@ref rte_bitmap.h),
> +  [bitops]             (@ref rte_bitops.h),
>    [mbuf]               (@ref rte_mbuf.h),
>    [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
>    [ring]               (@ref rte_ring.h),
>    [stack]              (@ref rte_stack.h),
> -  [tailq]              (@ref rte_tailq.h),
> -  [bitmap]             (@ref rte_bitmap.h)
> +  [tailq]              (@ref rte_tailq.h)
> 
>  - **packet framework**:
>    * [port]             (@ref rte_port.h):
> diff --git a/lib/librte_eal/common/Makefile
> b/lib/librte_eal/common/Makefile index c2c6d92..dd025c1 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h  INC +=
> rte_service.h rte_service_component.h  INC += rte_bitmap.h rte_vfio.h
> rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
> +INC += rte_bitops.h
> 
>  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
> GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff --git
> a/lib/librte_eal/common/include/rte_bitops.h
> b/lib/librte_eal/common/include/rte_bitops.h
> new file mode 100644
> index 0000000..34158d1
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_bitops.h
> @@ -0,0 +1,474 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#ifndef _RTE_BITOPS_H_
> +#define _RTE_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a API for bit operations without/with memory ordering.
> + */
> +
> +#include <stdint.h>
> +#include <rte_debug.h>
> +#include <rte_compat.h>
> +
> +/*---------------------------- 32 bit operations
> +----------------------------*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Get the target bit from a 32-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr) {
Why not pass the memory order as a parameter? It would reduce the number of API calls by half.

> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Set the target bit in a 32-bit value to 1 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit32_relaxed(unsigned int nr, uint32_t *addr) {
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED); }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Clear the target bit in a 32-bit value to 0 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit32_relaxed(unsigned int nr, uint32_t *addr) {
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED); }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Return the original bit from a 32-bit value, then set it to 1
> +without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_set_bit32_relaxed(unsigned int nr, uint32_t *addr) {
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Return the original bit from a 32-bit value, then clear it to 0
> +without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_clear_bit32_relaxed(unsigned int nr, uint32_t *addr) {
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) &
> mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Get the target bit from a 32-bit value with memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_get_bit32(unsigned int nr, uint32_t *addr) {
__atomic_load_n takes other memory orders along with relaxed and acquire. The API name needs to change to indicate acquire memory order here?

> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Set the target bit in a 32-bit value to 1 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit32(unsigned int nr, uint32_t *addr) {
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL); }
Why not use just '__ATOMIC_RELEASE' here? The full barrier might not be required in all use cases.

> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Clear the target bit in a 32-bit value to 0 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit32(unsigned int nr, uint32_t *addr) {
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL); }
Why not use just '__ATOMIC_RELEASE' here? The full barrier might not be required in all use cases. I see similar issue in other APIs below.

> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Return the original bit from a 32-bit value, then set it to 1 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_set_bit32(unsigned int nr, uint32_t *addr) {
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Return the original bit from a 32-bit value, then clear it to 0 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_clear_bit32(unsigned int nr, uint32_t *addr) {
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> mask; }
> +
> +/*---------------------------- 64 bit operations
> +----------------------------*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Get the target bit from a 64-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_get_bit64_relaxed(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Set the target bit in a 64-bit value to 1 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit64_relaxed(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED); }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Clear the target bit in a 64-bit value to 0 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit64_relaxed(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED); }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Return the original bit from a 64-bit value, then set it to 1
> +without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_set_bit64_relaxed(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Return the original bit from a 64-bit value, then clear it to 0
> +without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_clear_bit64_relaxed(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) &
> mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Get the target bit from a 64-bit value with memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_get_bit64(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Set the target bit in a 64-bit value to 1 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit64(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL); }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Clear the target bit in a 64-bit value to 0 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit64(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL); }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Return the original bit from a 64-bit value, then set it to 1 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_set_bit64(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> mask; }
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> +notice
> + *
> + * Return the original bit from a 64-bit value, then clear it to 0 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_clear_bit64(unsigned int nr, uint64_t *addr) {
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> mask; }
> +#endif /* _RTE_BITOPS_H_ */
> diff --git a/lib/librte_eal/common/meson.build
> b/lib/librte_eal/common/meson.build
> index 2b97715..766edbd 100644
> --- a/lib/librte_eal/common/meson.build
> +++ b/lib/librte_eal/common/meson.build
> @@ -50,9 +50,10 @@ common_objs += eal_common_arch_objs
> 
>  common_headers = files(
>  	'include/rte_alarm.h',
> +	'include/rte_bitmap.h',
> +	'include/rte_bitops.h',
>  	'include/rte_branch_prediction.h',
>  	'include/rte_bus.h',
> -	'include/rte_bitmap.h',
>  	'include/rte_class.h',
>  	'include/rte_common.h',
>  	'include/rte_compat.h',
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-20  6:52   ` Honnappa Nagarahalli
@ 2019-12-21 16:07     ` Honnappa Nagarahalli
  2019-12-21 18:07       ` Stephen Hemminger
                         ` (2 more replies)
  0 siblings, 3 replies; 139+ messages in thread
From: Honnappa Nagarahalli @ 2019-12-21 16:07 UTC (permalink / raw)
  To: Joyce Kong, thomas, stephen, david.marchand, mb, jerinj,
	bruce.richardson, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Phil Yang, Gavin Hu
  Cc: nd, dev, Honnappa Nagarahalli, nd

<snip>

> > Subject: [PATCH v6 1/6] lib/eal: implement the family of rte bit
> > operation APIs
> >
> > There are a lot functions of bit operations scattered and duplicated
> > in PMDs, consolidating them into a common API family is necessary.
> > Furthermore, when the bit operation is applied to the IO devices, use
> > __ATOMIC_ACQ_REL to ensure the ordering for io bit operation.
> >
> > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > Acked-by: Morten Brørup <mb@smartsharesystems.com>
> > ---
> >  MAINTAINERS                                |   5 +
> >  doc/api/doxy-api-index.md                  |   5 +-
> >  lib/librte_eal/common/Makefile             |   1 +
> >  lib/librte_eal/common/include/rte_bitops.h | 474
> > +++++++++++++++++++++++++++++
> >  lib/librte_eal/common/meson.build          |   3 +-
> >  5 files changed, 485 insertions(+), 3 deletions(-)  create mode
> > 100644 lib/librte_eal/common/include/rte_bitops.h
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS index 4395d8d..d2a29a2 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -236,6 +236,11 @@ M: Cristian Dumitrescu
> > <cristian.dumitrescu@intel.com>
> >  F: lib/librte_eal/common/include/rte_bitmap.h
> >  F: app/test/test_bitmap.c
> >
> > +Bitops
> > +M: Joyce Kong <joyce.kong@arm.com>
> > +F: lib/librte_eal/common/include/rte_bitops.h
> > +F: app/test/test_bitops.c
> > +
> >  MCSlock - EXPERIMENTAL
> >  M: Phil Yang <phil.yang@arm.com>
> >  F: lib/librte_eal/common/include/generic/rte_mcslock.h
> > diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
> > index
> > dff496b..ade7c01 100644
> > --- a/doc/api/doxy-api-index.md
> > +++ b/doc/api/doxy-api-index.md
> > @@ -133,12 +133,13 @@ The public API headers are grouped by topics:
> >    [BPF]                (@ref rte_bpf.h)
> >
> >  - **containers**:
> > +  [bitmap]             (@ref rte_bitmap.h),
> > +  [bitops]             (@ref rte_bitops.h),
> >    [mbuf]               (@ref rte_mbuf.h),
> >    [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
> >    [ring]               (@ref rte_ring.h),
> >    [stack]              (@ref rte_stack.h),
> > -  [tailq]              (@ref rte_tailq.h),
> > -  [bitmap]             (@ref rte_bitmap.h)
> > +  [tailq]              (@ref rte_tailq.h)
> >
> >  - **packet framework**:
> >    * [port]             (@ref rte_port.h):
> > diff --git a/lib/librte_eal/common/Makefile
> > b/lib/librte_eal/common/Makefile index c2c6d92..dd025c1 100644
> > --- a/lib/librte_eal/common/Makefile
> > +++ b/lib/librte_eal/common/Makefile
> > @@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h  INC
> > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > rte_fbarray.h rte_uuid.h
> > +INC += rte_bitops.h
> >
> >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > rte_prefetch.h GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff --git
> > a/lib/librte_eal/common/include/rte_bitops.h
> > b/lib/librte_eal/common/include/rte_bitops.h
> > new file mode 100644
> > index 0000000..34158d1
> > --- /dev/null
> > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > @@ -0,0 +1,474 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2019 Arm Limited
> > + */
> > +
> > +#ifndef _RTE_BITOPS_H_
> > +#define _RTE_BITOPS_H_
> > +
> > +/**
> > + * @file
> > + * Bit Operations
> > + *
> > + * This file defines a API for bit operations without/with memory ordering.
> > + */
> > +
> > +#include <stdint.h>
> > +#include <rte_debug.h>
> > +#include <rte_compat.h>
> > +
> > +/*---------------------------- 32 bit operations
> > +----------------------------*/
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> > +notice
> > + *
> > + * Get the target bit from a 32-bit value without memory ordering.
> > + *
> > + * @param nr
> > + *   The target bit to get.
> > + * @param addr
> > + *   The address holding the bit.
> > + * @return
> > + *   The target bit.
> > + */
> > +__rte_experimental
> > +static inline uint32_t
> > +rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr) {
> Why not pass the memory order as a parameter? It would reduce the number
> of API calls by half.
I think these APIs should be modelled according to C11 __atomic_xxx APIs. Otherwise, the programmers have to understand another interface. It will also help reduce the number of APIs.
Converting these into macros will help remove the size based duplication of APIs. I came up with the following macro:

#define RTE_GET_BIT(nr, var, ret, memorder) \
({ \
    if (sizeof(var) == sizeof(uint32_t)) { \
        uint32_t mask1 = 1U << (nr)%32; \
        ret = __atomic_load_n(&var, (memorder)) & mask1;\
    } \
    else {\
        uint64_t mask2 = 1UL << (nr)%64;\
        ret = __atomic_load_n(&var, (memorder)) & mask2;\
    } \
})

The '%' is required to avoid a compiler warning/error. But the '%' operation will get removed by the compiler since 'nr' is a constant.
IMO, the macro itself is not complex and should not be a pain for debugging.

Currently, we have 20 APIs in this patch (possibly more coming in the future and creating an explosion with memory order/size combinations). The above macro will reduce this to 5 macros without further explosion in number of combinations.

Any thoughts? What do others think?

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-21 16:07     ` Honnappa Nagarahalli
@ 2019-12-21 18:07       ` Stephen Hemminger
  2019-12-23  5:04         ` Honnappa Nagarahalli
  2019-12-21 18:08       ` Stephen Hemminger
  2019-12-23  8:59       ` Jerin Jacob
  2 siblings, 1 reply; 139+ messages in thread
From: Stephen Hemminger @ 2019-12-21 18:07 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Joyce Kong, thomas, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Phil Yang, Gavin Hu, nd, dev

On Sat, 21 Dec 2019 16:07:23 +0000
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:

> Converting these into macros will help remove the size based duplication of APIs. I came up with the following macro:
> 
> #define RTE_GET_BIT(nr, var, ret, memorder) \
> ({ \
>     if (sizeof(var) == sizeof(uint32_t)) { \
>         uint32_t mask1 = 1U << (nr)%32; \
>         ret = __atomic_load_n(&var, (memorder)) & mask1;\
>     } \
>     else {\
>         uint64_t mask2 = 1UL << (nr)%64;\
>         ret = __atomic_load_n(&var, (memorder)) & mask2;\
>     } \
> })

Macros are more error prone. Especially because this is in exposed header file

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-21 16:07     ` Honnappa Nagarahalli
  2019-12-21 18:07       ` Stephen Hemminger
@ 2019-12-21 18:08       ` Stephen Hemminger
  2019-12-23  5:45         ` Honnappa Nagarahalli
  2019-12-23  8:59       ` Jerin Jacob
  2 siblings, 1 reply; 139+ messages in thread
From: Stephen Hemminger @ 2019-12-21 18:08 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Joyce Kong, thomas, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Phil Yang, Gavin Hu, nd, dev

On Sat, 21 Dec 2019 16:07:23 +0000
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:

> Converting these into macros will help remove the size based duplication of APIs. I came up with the following macro:
> 
> #define RTE_GET_BIT(nr, var, ret, memorder) \
> ({ \
>     if (sizeof(var) == sizeof(uint32_t)) { \
>         uint32_t mask1 = 1U << (nr)%32; \
>         ret = __atomic_load_n(&var, (memorder)) & mask1;\
>     } \
>     else {\
>         uint64_t mask2 = 1UL << (nr)%64;\
>         ret = __atomic_load_n(&var, (memorder)) & mask2;\
>     } \
> })


Follow on if you want to do it as macros, then use typeof() to make the
mask any size.

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-21 18:07       ` Stephen Hemminger
@ 2019-12-23  5:04         ` Honnappa Nagarahalli
  2019-12-23 16:36           ` Stephen Hemminger
  0 siblings, 1 reply; 139+ messages in thread
From: Honnappa Nagarahalli @ 2019-12-23  5:04 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Joyce Kong, thomas, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Phil Yang, Gavin Hu, nd, dev, Honnappa Nagarahalli,
	nd

<snip>

> 
> On Sat, 21 Dec 2019 16:07:23 +0000
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> > Converting these into macros will help remove the size based duplication of
> APIs. I came up with the following macro:
> >
> > #define RTE_GET_BIT(nr, var, ret, memorder) \ ({ \
> >     if (sizeof(var) == sizeof(uint32_t)) { \
> >         uint32_t mask1 = 1U << (nr)%32; \
> >         ret = __atomic_load_n(&var, (memorder)) & mask1;\
> >     } \
> >     else {\
> >         uint64_t mask2 = 1UL << (nr)%64;\
> >         ret = __atomic_load_n(&var, (memorder)) & mask2;\
> >     } \
> > })
> 
> Macros are more error prone. Especially because this is in exposed header file
That's another question I have. Why do we need to have these APIs in a public header file? These will add to the ABI burden as well. These APIs should be in a common-but-not-public header file. I am also not sure how helpful these APIs are for applications as these APIs seem to have considered requirements only from the PMDs.

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-21 18:08       ` Stephen Hemminger
@ 2019-12-23  5:45         ` Honnappa Nagarahalli
  0 siblings, 0 replies; 139+ messages in thread
From: Honnappa Nagarahalli @ 2019-12-23  5:45 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Joyce Kong, thomas, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Phil Yang, Gavin Hu, nd, dev, Honnappa Nagarahalli,
	nd

> 
> On Sat, 21 Dec 2019 16:07:23 +0000
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> > Converting these into macros will help remove the size based duplication of
> APIs. I came up with the following macro:
> >
> > #define RTE_GET_BIT(nr, var, ret, memorder) \ ({ \
> >     if (sizeof(var) == sizeof(uint32_t)) { \
> >         uint32_t mask1 = 1U << (nr)%32; \
> >         ret = __atomic_load_n(&var, (memorder)) & mask1;\
> >     } \
> >     else {\
> >         uint64_t mask2 = 1UL << (nr)%64;\
> >         ret = __atomic_load_n(&var, (memorder)) & mask2;\
> >     } \
> > })
> 
> 
> Follow on if you want to do it as macros, then use typeof() to make the mask
> any size.
Yes, that makes it much simple
#define RTE_GET_BIT(nr, var, ret, memorder) \ ({ \
     typeof(var) mask; \
     if (sizeof(var) == sizeof(uint32_t)) { \
         mask = 1U << (nr)%32; \
     } else {\
         mask = 1UL << (nr)%64;\
     } \
     ret = __atomic_load_n(&var, (memorder)) & mask;\
})

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-21 16:07     ` Honnappa Nagarahalli
  2019-12-21 18:07       ` Stephen Hemminger
  2019-12-21 18:08       ` Stephen Hemminger
@ 2019-12-23  8:59       ` Jerin Jacob
  2 siblings, 0 replies; 139+ messages in thread
From: Jerin Jacob @ 2019-12-23  8:59 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Joyce Kong, thomas, stephen, david.marchand, mb, jerinj,
	bruce.richardson, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Phil Yang, Gavin Hu, nd, dev

On Sat, Dec 21, 2019 at 9:37 PM Honnappa Nagarahalli
<Honnappa.Nagarahalli@arm.com> wrote:

> > > +__rte_experimental
> > > +static inline uint32_t
> > > +rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr) {
> > Why not pass the memory order as a parameter? It would reduce the number
> > of API calls by half.
> I think these APIs should be modelled according to C11 __atomic_xxx APIs. Otherwise, the programmers have to understand another interface. It will also help reduce the number of APIs.
> Converting these into macros will help remove the size based duplication of APIs. I came up with the following macro:
>
> #define RTE_GET_BIT(nr, var, ret, memorder) \
> ({ \
>     if (sizeof(var) == sizeof(uint32_t)) { \
>         uint32_t mask1 = 1U << (nr)%32; \
>         ret = __atomic_load_n(&var, (memorder)) & mask1;\
>     } \
>     else {\
>         uint64_t mask2 = 1UL << (nr)%64;\
>         ret = __atomic_load_n(&var, (memorder)) & mask2;\
>     } \
> })
>
> The '%' is required to avoid a compiler warning/error. But the '%' operation will get removed by the compiler since 'nr' is a constant.
> IMO, the macro itself is not complex and should not be a pain for debugging.
>
> Currently, we have 20 APIs in this patch (possibly more coming in the future and creating an explosion with memory order/size combinations). The above macro will reduce this to 5 macros without further explosion in number of combinations.
>
> Any thoughts? What do others think?

# I think, the most common use case for register manipulation is
getting/setting of "fields"(set of consecutive bits). IMO, Linux
kernel bit manipulation APIs makes more sense.
At least have implementation similar to FIELD_GET() and FIELD_SET().
https://github.com/torvalds/linux/blob/master/include/linux/bitfield.h

# FIELD_GET will be superset API. A single bit can get through width = 1

# I think, it good to two versions of Macro/API. RTE_FIELD_GET(without
atomics) and RTE_FIELD_GET_ATOMIC(with C11 atomics)

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-23  5:04         ` Honnappa Nagarahalli
@ 2019-12-23 16:36           ` Stephen Hemminger
  2019-12-30  3:02             ` Gavin Hu
  2020-01-07  0:41             ` Honnappa Nagarahalli
  0 siblings, 2 replies; 139+ messages in thread
From: Stephen Hemminger @ 2019-12-23 16:36 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Joyce Kong, thomas, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Phil Yang, Gavin Hu, nd, dev

On Mon, 23 Dec 2019 05:04:12 +0000
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:

> <snip>
> 
> > 
> > On Sat, 21 Dec 2019 16:07:23 +0000
> > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> >   
> > > Converting these into macros will help remove the size based duplication of  
> > APIs. I came up with the following macro:  
> > >
> > > #define RTE_GET_BIT(nr, var, ret, memorder) \ ({ \
> > >     if (sizeof(var) == sizeof(uint32_t)) { \
> > >         uint32_t mask1 = 1U << (nr)%32; \
> > >         ret = __atomic_load_n(&var, (memorder)) & mask1;\
> > >     } \
> > >     else {\
> > >         uint64_t mask2 = 1UL << (nr)%64;\
> > >         ret = __atomic_load_n(&var, (memorder)) & mask2;\
> > >     } \
> > > })  
> > 
> > Macros are more error prone. Especially because this is in exposed header file  
> That's another question I have. Why do we need to have these APIs in a public header file? These will add to the ABI burden as well. These APIs should be in a common-but-not-public header file. I am also not sure how helpful these APIs are for applications as these APIs seem to have considered requirements only from the PMDs.

Why do we have to wrap every C atomic builtin? What value is there in that?

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-23 16:36           ` Stephen Hemminger
@ 2019-12-30  3:02             ` Gavin Hu
  2020-01-07  0:44               ` Honnappa Nagarahalli
  2020-01-07  0:41             ` Honnappa Nagarahalli
  1 sibling, 1 reply; 139+ messages in thread
From: Gavin Hu @ 2019-12-30  3:02 UTC (permalink / raw)
  To: Stephen Hemminger, Honnappa Nagarahalli
  Cc: Joyce Kong, thomas, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Phil Yang, nd, dev, nd

Hi Stephen, Honnappa,

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, December 24, 2019 12:37 AM
> To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Cc: Joyce Kong <Joyce.Kong@arm.com>; thomas@monjalon.net;
> david.marchand@redhat.com; mb@smartsharesystems.com;
> jerinj@marvell.com; bruce.richardson@intel.com; ravi1.kumar@amd.com;
> rmody@marvell.com; shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com; Phil Yang
> <Phil.Yang@arm.com>; Gavin Hu <Gavin.Hu@arm.com>; nd <nd@arm.com>;
> dev@dpdk.org
> Subject: Re: [PATCH v6 1/6] lib/eal: implement the family of rte bit operation
> APIs
> 
> On Mon, 23 Dec 2019 05:04:12 +0000
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> > <snip>
> >
> > >
> > > On Sat, 21 Dec 2019 16:07:23 +0000
> > > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> > >
> > > > Converting these into macros will help remove the size based duplication
> of
> > > APIs. I came up with the following macro:
> > > >
> > > > #define RTE_GET_BIT(nr, var, ret, memorder) \ ({ \
> > > >     if (sizeof(var) == sizeof(uint32_t)) { \
> > > >         uint32_t mask1 = 1U << (nr)%32; \
> > > >         ret = __atomic_load_n(&var, (memorder)) & mask1;\
> > > >     } \
> > > >     else {\
> > > >         uint64_t mask2 = 1UL << (nr)%64;\
> > > >         ret = __atomic_load_n(&var, (memorder)) & mask2;\
> > > >     } \
> > > > })
> > >
> > > Macros are more error prone. Especially because this is in exposed header
> file
> > That's another question I have. Why do we need to have these APIs in a
> public header file? These will add to the ABI burden as well. These APIs should
> be in a common-but-not-public header file. I am also not sure how helpful
> these APIs are for applications as these APIs seem to have considered
> requirements only from the PMDs.
> 
> Why do we have to wrap every C atomic builtin? What value is there in that?

The wrapping is aimed to reduce code duplication, on average 3 lines cut down to 1 line for a single core.
Overall I am thinking this bitops APIs are targeted for use by PMDs only, applications can use C11 freely.
The initial thought for the new APIs came from the idea of consolidating the scattered bit operations all over the PMDs. It is unwise to expanding to applications or libraries, as different memory orderings are required and complexity generate. 

If the use cases are limited to PMDs, a 'volatile' or a compiler barrier is sufficient therefore the number of APIs can be saved by half. 
http://inbox.dpdk.org/dev/VI1PR08MB53766C30B5CDA00FB9FCE9678F2E0@VI1PR08MB5376.eurprd08.prod.outlook.com/

Any thoughts and comments are welcome!


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-23 16:36           ` Stephen Hemminger
  2019-12-30  3:02             ` Gavin Hu
@ 2020-01-07  0:41             ` Honnappa Nagarahalli
  1 sibling, 0 replies; 139+ messages in thread
From: Honnappa Nagarahalli @ 2020-01-07  0:41 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Joyce Kong, thomas, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Phil Yang, Gavin Hu, nd, dev, Honnappa Nagarahalli,
	nd

<snip>
> >
> > >
> > > On Sat, 21 Dec 2019 16:07:23 +0000
> > > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> > >
> > > > Converting these into macros will help remove the size based
> duplication of
> > > APIs. I came up with the following macro:
> > > >
> > > > #define RTE_GET_BIT(nr, var, ret, memorder) \ ({ \
> > > >     if (sizeof(var) == sizeof(uint32_t)) { \
> > > >         uint32_t mask1 = 1U << (nr)%32; \
> > > >         ret = __atomic_load_n(&var, (memorder)) & mask1;\
> > > >     } \
> > > >     else {\
> > > >         uint64_t mask2 = 1UL << (nr)%64;\
> > > >         ret = __atomic_load_n(&var, (memorder)) & mask2;\
> > > >     } \
> > > > })
> > >
> > > Macros are more error prone. Especially because this is in exposed header
> file
> > That's another question I have. Why do we need to have these APIs in a
> public header file? These will add to the ABI burden as well. These APIs
> should be in a common-but-not-public header file. I am also not sure how
> helpful these APIs are for applications as these APIs seem to have considered
> requirements only from the PMDs.
> 
> Why do we have to wrap every C atomic builtin? What value is there in that?
As long as we stick to requirements from PMD we do not need to worry about every atomic builtin. We seem to be making these APIs public, which requires us to keep these APIs generic considering possible future requirements.

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-12-30  3:02             ` Gavin Hu
@ 2020-01-07  0:44               ` Honnappa Nagarahalli
  2020-01-07  1:26                 ` Stephen Hemminger
  0 siblings, 1 reply; 139+ messages in thread
From: Honnappa Nagarahalli @ 2020-01-07  0:44 UTC (permalink / raw)
  To: Gavin Hu, Stephen Hemminger
  Cc: Joyce Kong, thomas, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Phil Yang, nd, dev, Honnappa Nagarahalli, nd

<snip>

> > >
> > > >
> > > > On Sat, 21 Dec 2019 16:07:23 +0000 Honnappa Nagarahalli
> > > > <Honnappa.Nagarahalli@arm.com> wrote:
> > > >
> > > > > Converting these into macros will help remove the size based
> > > > > duplication
> > of
> > > > APIs. I came up with the following macro:
> > > > >
> > > > > #define RTE_GET_BIT(nr, var, ret, memorder) \ ({ \
> > > > >     if (sizeof(var) == sizeof(uint32_t)) { \
> > > > >         uint32_t mask1 = 1U << (nr)%32; \
> > > > >         ret = __atomic_load_n(&var, (memorder)) & mask1;\
> > > > >     } \
> > > > >     else {\
> > > > >         uint64_t mask2 = 1UL << (nr)%64;\
> > > > >         ret = __atomic_load_n(&var, (memorder)) & mask2;\
> > > > >     } \
> > > > > })
> > > >
> > > > Macros are more error prone. Especially because this is in exposed
> > > > header
> > file
> > > That's another question I have. Why do we need to have these APIs in
> > > a
> > public header file? These will add to the ABI burden as well. These
> > APIs should be in a common-but-not-public header file. I am also not
> > sure how helpful these APIs are for applications as these APIs seem to
> > have considered requirements only from the PMDs.
> >
> > Why do we have to wrap every C atomic builtin? What value is there in that?
> 
> The wrapping is aimed to reduce code duplication, on average 3 lines cut
> down to 1 line for a single core.
> Overall I am thinking this bitops APIs are targeted for use by PMDs only,
> applications can use C11 freely.
> The initial thought for the new APIs came from the idea of consolidating the
> scattered bit operations all over the PMDs. It is unwise to expanding to
> applications or libraries, as different memory orderings are required and
> complexity generate.
> 
> If the use cases are limited to PMDs, a 'volatile' or a compiler barrier is
> sufficient therefore the number of APIs can be saved by half.
> http://inbox.dpdk.org/dev/VI1PR08MB53766C30B5CDA00FB9FCE9678F2E0
> @VI1PR08MB5376.eurprd08.prod.outlook.com/
> 
> Any thoughts and comments are welcome!
I would prefer that the APIs/Macros just address PMD's requirements. These also should be kept private (through naming conventions?). Given that the current PMDs are not using C11, we can skip using C11 atomics in these APIs.

> 


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2020-01-07  0:44               ` Honnappa Nagarahalli
@ 2020-01-07  1:26                 ` Stephen Hemminger
  2020-01-07  4:41                   ` Honnappa Nagarahalli
  0 siblings, 1 reply; 139+ messages in thread
From: Stephen Hemminger @ 2020-01-07  1:26 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Gavin Hu, Joyce Kong, thomas, david.marchand, mb, jerinj,
	bruce.richardson, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Phil Yang, nd, dev

On Tue, 7 Jan 2020 00:44:51 +0000
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:

> <snip>
> 
> > > >  
> > > > >
> > > > > On Sat, 21 Dec 2019 16:07:23 +0000 Honnappa Nagarahalli
> > > > > <Honnappa.Nagarahalli@arm.com> wrote:
> > > > >  
> > > > > > Converting these into macros will help remove the size based
> > > > > > duplication  
> > > of  
> > > > > APIs. I came up with the following macro:  
> > > > > >
> > > > > > #define RTE_GET_BIT(nr, var, ret, memorder) \ ({ \
> > > > > >     if (sizeof(var) == sizeof(uint32_t)) { \
> > > > > >         uint32_t mask1 = 1U << (nr)%32; \
> > > > > >         ret = __atomic_load_n(&var, (memorder)) & mask1;\
> > > > > >     } \
> > > > > >     else {\
> > > > > >         uint64_t mask2 = 1UL << (nr)%64;\
> > > > > >         ret = __atomic_load_n(&var, (memorder)) & mask2;\
> > > > > >     } \
> > > > > > })  
> > > > >
> > > > > Macros are more error prone. Especially because this is in exposed
> > > > > header  
> > > file  
> > > > That's another question I have. Why do we need to have these APIs in
> > > > a  
> > > public header file? These will add to the ABI burden as well. These
> > > APIs should be in a common-but-not-public header file. I am also not
> > > sure how helpful these APIs are for applications as these APIs seem to
> > > have considered requirements only from the PMDs.
> > >
> > > Why do we have to wrap every C atomic builtin? What value is there in that?  
> > 
> > The wrapping is aimed to reduce code duplication, on average 3 lines cut
> > down to 1 line for a single core.
> > Overall I am thinking this bitops APIs are targeted for use by PMDs only,
> > applications can use C11 freely.
> > The initial thought for the new APIs came from the idea of consolidating the
> > scattered bit operations all over the PMDs. It is unwise to expanding to
> > applications or libraries, as different memory orderings are required and
> > complexity generate.
> > 
> > If the use cases are limited to PMDs, a 'volatile' or a compiler barrier is
> > sufficient therefore the number of APIs can be saved by half.
> > http://inbox.dpdk.org/dev/VI1PR08MB53766C30B5CDA00FB9FCE9678F2E0
> > @VI1PR08MB5376.eurprd08.prod.outlook.com/
> > 
> > Any thoughts and comments are welcome!  
> I would prefer that the APIs/Macros just address PMD's requirements. These also should be kept private (through naming conventions?). Given that the current PMDs are not using C11, we can skip using C11 atomics in these APIs.

Not in favor, just use existing Gcc/clang/icc atomics instead of creating
unnecessary bloat wrappers. 

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/6] lib/eal: implement the family of rte bit operation APIs
  2020-01-07  1:26                 ` Stephen Hemminger
@ 2020-01-07  4:41                   ` Honnappa Nagarahalli
  0 siblings, 0 replies; 139+ messages in thread
From: Honnappa Nagarahalli @ 2020-01-07  4:41 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Gavin Hu, Joyce Kong, thomas, david.marchand, mb, jerinj,
	bruce.richardson, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Phil Yang, nd, dev,
	Honnappa Nagarahalli, nd

<snip>
> >
> > > > >
> > > > > >
> > > > > > On Sat, 21 Dec 2019 16:07:23 +0000 Honnappa Nagarahalli
> > > > > > <Honnappa.Nagarahalli@arm.com> wrote:
> > > > > >
> > > > > > > Converting these into macros will help remove the size based
> > > > > > > duplication
> > > > of
> > > > > > APIs. I came up with the following macro:
> > > > > > >
> > > > > > > #define RTE_GET_BIT(nr, var, ret, memorder) \ ({ \
> > > > > > >     if (sizeof(var) == sizeof(uint32_t)) { \
> > > > > > >         uint32_t mask1 = 1U << (nr)%32; \
> > > > > > >         ret = __atomic_load_n(&var, (memorder)) & mask1;\
> > > > > > >     } \
> > > > > > >     else {\
> > > > > > >         uint64_t mask2 = 1UL << (nr)%64;\
> > > > > > >         ret = __atomic_load_n(&var, (memorder)) & mask2;\
> > > > > > >     } \
> > > > > > > })
> > > > > >
> > > > > > Macros are more error prone. Especially because this is in
> > > > > > exposed header
> > > > file
> > > > > That's another question I have. Why do we need to have these
> > > > > APIs in a
> > > > public header file? These will add to the ABI burden as well.
> > > > These APIs should be in a common-but-not-public header file. I am
> > > > also not sure how helpful these APIs are for applications as these
> > > > APIs seem to have considered requirements only from the PMDs.
> > > >
> > > > Why do we have to wrap every C atomic builtin? What value is there in
> that?
> > >
> > > The wrapping is aimed to reduce code duplication, on average 3 lines
> > > cut down to 1 line for a single core.
> > > Overall I am thinking this bitops APIs are targeted for use by PMDs
> > > only, applications can use C11 freely.
> > > The initial thought for the new APIs came from the idea of
> > > consolidating the scattered bit operations all over the PMDs. It is
> > > unwise to expanding to applications or libraries, as different
> > > memory orderings are required and complexity generate.
> > >
> > > If the use cases are limited to PMDs, a 'volatile' or a compiler
> > > barrier is sufficient therefore the number of APIs can be saved by half.
> > >
> http://inbox.dpdk.org/dev/VI1PR08MB53766C30B5CDA00FB9FCE9678F2E0
> > > @VI1PR08MB5376.eurprd08.prod.outlook.com/
> > >
> > > Any thoughts and comments are welcome!
> > I would prefer that the APIs/Macros just address PMD's requirements.
> These also should be kept private (through naming conventions?). Given that
> the current PMDs are not using C11, we can skip using C11 atomics in these
> APIs.
> 
> Not in favor, just use existing Gcc/clang/icc atomics instead of creating
> unnecessary bloat wrappers.
I thought, you blessed this patch [1]. 

[1] http://mails.dpdk.org/archives/dev/2019-October/147297.html

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v6 0/6] implement common rte bit operation APIs in PMDs
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
  2019-12-18  6:55   ` Gavin Hu
@ 2020-01-17 13:03   ` David Marchand
  1 sibling, 0 replies; 139+ messages in thread
From: David Marchand @ 2020-01-17 13:03 UTC (permalink / raw)
  To: Joyce Kong
  Cc: Thomas Monjalon, Stephen Hemminger, mb,
	Jerin Jacob Kollanukkaran, Bruce Richardson, Ravi Kumar,
	Rasesh Mody, Shahed Shaikh, Ziyang Xuan, Xiaoyun Wang,
	Guoyang Zhou, Honnappa Nagarahalli, Phil Yang, Gavin Hu, nd, dev

On Wed, Dec 18, 2019 at 7:00 AM Joyce Kong <joyce.kong@arm.com> wrote:
>
> There are a lot functions of bit operations scattered in PMDs, consolidate
> them into a common API family and applied in different PMDs to reduce code
> duplication.
>
> v6:
>  Trim 'unsigned long' in PMDs down to 'uint32_t', as on mainstream 64-bit OS,
>  'unsigned long' is 64-bit in size, but the 32-bit OS expects 32-bit 'unsigned
>  long' argument.
>
> v5:
>  Correct the spelling mistake in test_bitops.c
>
> v4:
>   Introduce uint32_t/uint64_t *addr when definiting bit operation APIs(suggested by
>   Morten Brørup).
>
> v3:
>   1. Change the API's head file back to rte_bitops.h, then implement both 32-bit and
>      64-bit operations with and without C11 atomic memory ordering.
>   2. Add multi-core test case for bit operations which implemented with memory ordering.
>   3. Modify the doc of both APIs and test cases.
>
> v2:
>   1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
>   2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
>   3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
>      as the API uses barriers inside and the barriers are only needed for IO operations
>      (suggested by Jerin Jacob).
>   4. Use an well defined uint_NN_t type(suggested by Morten Brørup).


I did not wait for travis to finish but afaics, this series only
builds on 32bits and AARCH64.
https://travis-ci.com/david-marchand/dpdk/builds/144927372


-- 
David Marchand


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v7 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (40 preceding siblings ...)
  2019-12-18  6:00 ` [dpdk-dev] [PATCH v6 6/6] net/hinic: " Joyce Kong
@ 2020-03-09  9:54 ` Joyce Kong
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs Joyce Kong
                   ` (26 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2020-03-09  9:54 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Bitwise operation APIs are defined and used in a lot of PMDs,
which caused a huge code duplication. To reduce duplication,
this patch consolidates them into a common API family and use
it for all the PMDs(we started with a few selectively).

v7:
  1. Change the API's head file to 'rte_pmd_bitops.h' as a common-
     but-not-public file.
  2. Replace C11 atomic operations with 'volatile'. As the use cases
     are limited to PMDs, a 'volatile' is sufficient.

v5,v6:
  Trim 'unsigned long' in PMDs down to 'uint32_t', as on mainstream
  64-bit OS, 'unsigned long' is 64-bit in size, but the 32-bit OS
  expects 32-bit 'unsigned long' argument.

v4:
  Introduce uint32_t/uint64_t *addr when definiting bit operation
  APIs.

v3:
  1. Change the API's head file back to rte_bitops.h, then implement
     both 32-bit and 64-bit operations with and without C11 atomic
     memory ordering.
  2. Add multi-core test case for bit operations which implemented
     with memory ordering.
  3. Modify the doc of both APIs and test cases.

v2:
  1. Add doxygen comments for the rte bit operation API.
  2. Add test cases for common rte bit operation API.
  3. Change the header file to rte_io_bitops.h and the operation to
     rte_io_set_bit() etc., as the API uses barriers inside and the
     barriers are only needed for IO operations.
  4. Use an well defined uint_NN_t type.

Joyce Kong (6):
  lib/eal: implement the family of PMD bit operation APIs
  test/pmdbitops: add PMD bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead

 MAINTAINERS                                   |   5 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_pmd_bitops.c                    | 137 ++++++++++
 doc/api/doxy-api-index.md                     |   5 +-
 drivers/net/Makefile                          |   1 +
 drivers/net/axgbe/axgbe_common.h              |  29 +-
 drivers/net/axgbe/axgbe_ethdev.c              |  14 +-
 drivers/net/axgbe/axgbe_ethdev.h              |   2 +-
 drivers/net/axgbe/axgbe_mdio.c                |  15 +-
 drivers/net/bnx2x/bnx2x.c                     | 253 +++++++++--------
 drivers/net/bnx2x/bnx2x.h                     |  10 +-
 drivers/net/bnx2x/ecore_sp.h                  |  48 ++--
 drivers/net/hinic/Makefile                    |   1 +
 drivers/net/hinic/base/hinic_compat.h         |  33 +--
 drivers/net/hinic/hinic_pmd_ethdev.c          |  18 +-
 drivers/net/hinic/hinic_pmd_ethdev.h          |   2 +-
 drivers/net/hinic/meson.build                 |   2 +
 drivers/net/qede/base/bcm_osal.c              |  22 +-
 drivers/net/qede/base/bcm_osal.h              |  14 +-
 drivers/net/qede/base/ecore.h                 |   6 +-
 drivers/net/qede/base/ecore_cxt.c             |   6 +-
 drivers/net/qede/base/ecore_dcbx.c            |   8 +-
 drivers/net/qede/base/ecore_dev.c             |  38 +--
 drivers/net/qede/base/ecore_dev_api.h         |   2 +-
 drivers/net/qede/base/ecore_l2.c              |   6 +-
 drivers/net/qede/base/ecore_mcp.c             |   4 +-
 drivers/net/qede/base/ecore_sp_commands.c     |  12 +-
 drivers/net/qede/base/ecore_spq.c             |   2 +-
 drivers/net/qede/base/ecore_spq.h             |  10 +-
 drivers/net/qede/qede_main.c                  |   4 +-
 lib/librte_eal/common/Makefile                |   1 +
 .../common/include/rte_pmd_bitops.h           | 257 ++++++++++++++++++
 lib/librte_eal/common/meson.build             |   3 +-
 35 files changed, 653 insertions(+), 326 deletions(-)
 create mode 100644 app/test/test_pmd_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_pmd_bitops.h

-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (41 preceding siblings ...)
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2020-03-09  9:54 ` Joyce Kong
  2020-03-09 15:50   ` Stephen Hemminger
  2020-03-31 22:35   ` Thomas Monjalon
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 2/6] test/pmdbitops: add PMD bit operation test case Joyce Kong
                   ` (25 subsequent siblings)
  68 siblings, 2 replies; 139+ messages in thread
From: Joyce Kong @ 2020-03-09  9:54 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Bitwise operation APIs are defined and used in a lot of PMDs,
which caused a huge code duplication. To reduce duplication,
this patch consolidates them into a common API family.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 MAINTAINERS                                   |   4 +
 doc/api/doxy-api-index.md                     |   5 +-
 drivers/net/Makefile                          |   1 +
 lib/librte_eal/common/Makefile                |   1 +
 .../common/include/rte_pmd_bitops.h           | 257 ++++++++++++++++++
 lib/librte_eal/common/meson.build             |   3 +-
 6 files changed, 268 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_pmd_bitops.h

diff --git a/MAINTAINERS b/MAINTAINERS
index f4e0ed8e0..071daf887 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -244,6 +244,10 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 F: lib/librte_eal/common/include/rte_bitmap.h
 F: app/test/test_bitmap.c
 
+PMD Bitops
+M: Joyce Kong <joyce.kong@arm.com>
+F: lib/librte_eal/common/include/rte_pmd_bitops.h
+
 MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/common/include/generic/rte_mcslock.h
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index dff496be0..64ab142f3 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -35,6 +35,7 @@ The public API headers are grouped by topics:
   [vfio]               (@ref rte_vfio.h)
 
 - **device specific**:
+  [pmd_bitops]         (@ref rte_pmd_bitops.h),
   [softnic]            (@ref rte_eth_softnic.h),
   [bond]               (@ref rte_eth_bond.h),
   [vhost]              (@ref rte_vhost.h),
@@ -133,12 +134,12 @@ The public API headers are grouped by topics:
   [BPF]                (@ref rte_bpf.h)
 
 - **containers**:
+  [bitmap]             (@ref rte_bitmap.h),
   [mbuf]               (@ref rte_mbuf.h),
   [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
   [ring]               (@ref rte_ring.h),
   [stack]              (@ref rte_stack.h),
-  [tailq]              (@ref rte_tailq.h),
-  [bitmap]             (@ref rte_bitmap.h)
+  [tailq]              (@ref rte_tailq.h)
 
 - **packet framework**:
   * [port]             (@ref rte_port.h):
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 4a7f155fc..1a3202d1c 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -3,6 +3,7 @@
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
+INC := rte_pmd_bitops.h
 # set in mk/toolchain/xxx/rte.toolchain-compat.mk
 ifeq ($(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD),d)
         $(warning thunderx pmd is not supported by old compilers)
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index c2c6d92cd..24a5ae94f 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_pmd_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_pmd_bitops.h b/lib/librte_eal/common/include/rte_pmd_bitops.h
new file mode 100644
index 000000000..b7801e01e
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pmd_bitops.h
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Arm Limited
+ */
+
+#ifndef _RTE_PMD_BITOPS_H_
+#define _RTE_PMD_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a API for bit operations without/with memory ordering.
+ */
+
+#include <stdint.h>
+#include <rte_debug.h>
+#include <rte_compat.h>
+
+/*---------------------------- 32 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return (*addr) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	*addr = (*addr) | mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	*addr = (*addr) & (~mask);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	uint32_t val = *addr;
+	*addr = (*addr) | mask;
+	return val & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	uint32_t val = *addr;
+	*addr = (*addr) & (~mask);
+	return val & mask;
+}
+
+/*---------------------------- 64 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return (*addr) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	(*addr) = (*addr) | mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	*addr = (*addr) & (~mask);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	uint64_t val = *addr;
+	*addr = (*addr) | mask;
+	return val;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	uint64_t val = *addr;
+	*addr = (*addr) & (~mask);
+	return val & mask;
+}
+
+#endif /* _RTE_PMD_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 2b97715a2..0862ae64c 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -50,9 +50,9 @@ common_objs += eal_common_arch_objs
 
 common_headers = files(
 	'include/rte_alarm.h',
+	'include/rte_bitmap.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
-	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
 	'include/rte_compat.h',
@@ -78,6 +78,7 @@ common_headers = files(
 	'include/rte_pci_dev_feature_defs.h',
 	'include/rte_pci_dev_features.h',
 	'include/rte_per_lcore.h',
+	'include/rte_pmd_bitops.h',
 	'include/rte_random.h',
 	'include/rte_reciprocal.h',
 	'include/rte_service.h',
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v7 2/6] test/pmdbitops: add PMD bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (42 preceding siblings ...)
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs Joyce Kong
@ 2020-03-09  9:54 ` Joyce Kong
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (24 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2020-03-09  9:54 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Add test cases of bit operations which used by PMDs.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 MAINTAINERS                |   1 +
 app/test/Makefile          |   1 +
 app/test/autotest_data.py  |   6 ++
 app/test/meson.build       |   2 +
 app/test/test_pmd_bitops.c | 137 +++++++++++++++++++++++++++++++++++++
 5 files changed, 147 insertions(+)
 create mode 100644 app/test/test_pmd_bitops.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 071daf887..876cb2bfd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -247,6 +247,7 @@ F: app/test/test_bitmap.c
 PMD Bitops
 M: Joyce Kong <joyce.kong@arm.com>
 F: lib/librte_eal/common/include/rte_pmd_bitops.h
+F: app/test/test_pmd_bitops.c
 
 MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
diff --git a/app/test/Makefile b/app/test/Makefile
index 1f080d162..3bdcbfdcf 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -78,6 +78,7 @@ SRCS-y += test_rand_perf.c
 
 SRCS-y += test_ring.c
 SRCS-y += test_ring_perf.c
+SRCS-y += test_pmd_bitops.c
 SRCS-y += test_pmd_perf.c
 
 ifeq ($(CONFIG_RTE_LIBRTE_TABLE),y)
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 7b1d01389..143b59097 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -284,6 +284,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "PMD bitops autotest",
+        "Command": "pmd_bitops_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    }
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index 0a2ce710f..5567b9d1c 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -84,6 +84,7 @@ test_sources = files('commands.c',
 	'test_mcslock.c',
 	'test_mp_secondary.c',
 	'test_per_lcore.c',
+	'test_pmd_bitops.c',
 	'test_pmd_perf.c',
 	'test_power.c',
 	'test_power_cpufreq.c',
@@ -198,6 +199,7 @@ fast_test_names = [
         'meter_autotest',
         'multiprocess_autotest',
         'per_lcore_autotest',
+        'pmd_bitops_autotest',
         'prefetch_autotest',
         'rcu_qsbr_autotest',
         'red_autotest',
diff --git a/app/test/test_pmd_bitops.c b/app/test/test_pmd_bitops.c
new file mode 100644
index 000000000..f84c582be
--- /dev/null
+++ b/app/test/test_pmd_bitops.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_launch.h>
+#include <rte_pmd_bitops.h>
+#include "test.h"
+
+uint32_t val32;
+uint64_t val64;
+
+#define MAX_BITS_32 32
+#define MAX_BITS_64 64
+/*
+ * Bitops functions
+ * ================
+ *
+ * - The main test function performs several subtests.
+ * - For relaxed version, check bit operations on one core.
+ *   - Initialize valXX to specified values, then set each bit of valXX
+ *     to 1 one by one in "test_bitops_set_relaxed".
+ *
+ *   - Clear each bit of valXX to 0 one by one in "test_bitops_clear_relaxed".
+ *
+ *   - Function "test_bitops_test_set_clear_relaxed" checks whether each bit
+ *     of valXX can do "test and set" and "test and clear" correctly.
+ */
+
+static int
+test_bitops_set_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_test_set_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_test_and_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_clear_bit32_relaxed(i, &val32)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_test_and_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_clear_bit64_relaxed(i, &val64)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops(void)
+{
+	val32 = 0;
+	val64 = 0;
+
+	if (test_bitops_set_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_test_set_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(bitops_autotest, test_bitops);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v7 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (43 preceding siblings ...)
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 2/6] test/pmdbitops: add PMD bit operation test case Joyce Kong
@ 2020-03-09  9:54 ` Joyce Kong
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 4/6] net/bnx2x: " Joyce Kong
                   ` (23 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2020-03-09  9:54 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_ethdev.h |  2 +-
 drivers/net/axgbe/axgbe_mdio.c   | 15 ++++++++-------
 4 files changed, 17 insertions(+), 43 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index fdb037dd5..a4a685181 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -38,6 +38,7 @@
 #include <rte_common.h>
 #include <rte_cycles.h>
 #include <rte_io.h>
+#include <rte_pmd_bitops.h>
 
 #define BIT(nr)	                       (1 << (nr))
 #ifndef ARRAY_SIZE
@@ -1677,34 +1678,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d0b6f091f..fe1f92843 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -277,8 +277,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit32_relaxed(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit32_relaxed(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -292,17 +292,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_get_bit32_relaxed(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit32_relaxed(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit32_relaxed(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -1031,8 +1031,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit32_relaxed(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit32_relaxed(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_ethdev.h b/drivers/net/axgbe/axgbe_ethdev.h
index a1083b17b..259221964 100644
--- a/drivers/net/axgbe/axgbe_ethdev.h
+++ b/drivers/net/axgbe/axgbe_ethdev.h
@@ -511,7 +511,7 @@ struct axgbe_port {
 	unsigned int xpcs_window_mask;
 
 	/* Flags representing axgbe_state */
-	unsigned long dev_state;
+	uint32_t dev_state;
 
 	struct axgbe_hw_if hw_if;
 	struct axgbe_phy_if phy_if;
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 4b280b72d..2bb7f5263 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -875,7 +875,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit32_relaxed(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -937,9 +937,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit32_relaxed(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit32_relaxed(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -1026,7 +1026,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int reg = 0;
 	unsigned long autoneg_start_time;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_get_bit32_relaxed(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -1070,10 +1070,11 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			}
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_get_bit32_relaxed(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit32_relaxed(AXGBE_LINK_INIT,
+						&pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_get_bit32_relaxed(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v7 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (44 preceding siblings ...)
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2020-03-09  9:54 ` " Joyce Kong
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 5/6] net/qede: " Joyce Kong
                   ` (22 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2020-03-09  9:54 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 253 +++++++++++++++++------------------
 drivers/net/bnx2x/bnx2x.h    |  10 +-
 drivers/net/bnx2x/ecore_sp.h |  48 +++----
 3 files changed, 150 insertions(+), 161 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index 0b4030e2b..2ea23b2eb 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -25,6 +25,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <zlib.h>
+#include <rte_pmd_bitops.h>
 #include <rte_string_fns.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1434,16 +1409,16 @@ static int
 bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 		 int mac_type, uint8_t wait_for_comp)
 {
-	unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
+	uint32_t ramrod_flags = 0, vlan_mac_flags = 0;
 	int rc;
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit32_relaxed(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1454,8 +1429,7 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 static int
 bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
-			unsigned long *rx_accept_flags,
-			unsigned long *tx_accept_flags)
+			uint32_t *rx_accept_flags, uint32_t *tx_accept_flags)
 {
 	/* Clear the flags first */
 	*rx_accept_flags = 0;
@@ -1470,26 +1444,28 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_ALL_MULTICAST,
+				      rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_ALL_MULTICAST,
+				      tx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1500,19 +1476,23 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_ALL_MULTICAST,
+				      rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_ALL_MULTICAST,
+				      tx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit32_relaxed(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit32_relaxed(ECORE_ACCEPT_UNICAST,
+					      tx_accept_flags);
 		}
 
 		break;
@@ -1524,8 +1504,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit32_relaxed(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1554,7 +1534,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit32_relaxed(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1573,8 +1553,8 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 
 int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 {
-	unsigned long rx_mode_flags = 0, ramrod_flags = 0;
-	unsigned long rx_accept_flags = 0, tx_accept_flags = 0;
+	uint32_t rx_mode_flags = 0, ramrod_flags = 0;
+	uint32_t rx_accept_flags = 0, tx_accept_flags = 0;
 	int rc;
 
 	rc = bnx2x_fill_accept_flags(sc, sc->rx_mode, &rx_accept_flags,
@@ -1583,9 +1563,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_RX, &ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_TX, &ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1710,7 +1690,8 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit32_relaxed(RAMROD_DRV_CLR_ONLY,
+				      &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1734,7 +1715,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1795,7 +1776,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1809,7 +1790,8 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit32_relaxed(RAMROD_DRV_CLR_ONLY,
+				      &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1821,7 +1803,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1878,11 +1860,12 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_get_bit32_relaxed(ECORE_FILTER_RX_MODE_PENDING,
+				  &sc->sp_state))
+		rte_set_bit32_relaxed(ECORE_FILTER_RX_MODE_SCHED,
+				      &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1964,7 +1947,7 @@ static void bnx2x_disable_close_the_gate(struct bnx2x_softc *sc)
  */
 static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 {
-	unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
+	uint32_t ramrod_flags = 0, vlan_mac_flags = 0;
 	struct ecore_mcast_ramrod_params rparam = { NULL };
 	struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj;
 	int rc;
@@ -1972,12 +1955,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit32_relaxed(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1986,7 +1969,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit32_relaxed(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1996,7 +1979,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4294,13 +4277,13 @@ static void bnx2x_handle_mcast_eqe(struct bnx2x_softc *sc)
 static void
 bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *elem)
 {
-	unsigned long ramrod_flags = 0;
+	uint32_t ramrod_flags = 0;
 	int rc = 0;
 	uint32_t cid = elem->message.data.eth_event.echo & BNX2X_SWCID_MASK;
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4331,12 +4314,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit32_relaxed(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit32_relaxed(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4705,7 +4688,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4953,7 +4936,7 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 {
 	struct bnx2x_fastpath *fp = &sc->fp[idx];
 	uint32_t cids[ECORE_MULTI_TX_COS] = { 0 };
-	unsigned long q_type = 0;
+	uint32_t q_type = 0;
 	int cos;
 
 	fp->sc = sc;
@@ -5000,8 +4983,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit32_relaxed(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit32_relaxed(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5815,7 +5798,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6391,11 +6374,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit32_relaxed(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit32_relaxed(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit32_relaxed(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit32_relaxed(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6426,10 +6409,10 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 static unsigned long
 bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 {
-	unsigned long flags = 0;
+	uint32_t flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit32_relaxed(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6437,9 +6420,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit32_relaxed(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit32_relaxed(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6447,28 +6430,28 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit32_relaxed(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit32_relaxed(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
 
 static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 {
-	unsigned long flags = 0;
+	uint32_t flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit32_relaxed(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit32_relaxed(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit32_relaxed(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit32_relaxed(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6589,7 +6572,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6657,20 +6640,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit32_relaxed(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit32_relaxed(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit32_relaxed(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit32_relaxed(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit32_relaxed(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit32_relaxed(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit32_relaxed(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6685,7 +6668,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit32_relaxed(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,7 +6713,7 @@ static int bnx2x_init_rss_pf(struct bnx2x_softc *sc)
 static int
 bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 		struct ecore_vlan_mac_obj *obj, uint8_t set, int mac_type,
-		unsigned long *ramrod_flags)
+		uint32_t *ramrod_flags)
 {
 	struct ecore_vlan_mac_ramrod_params ramrod_param;
 	int rc;
@@ -6742,11 +6725,12 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_get_bit32_relaxed(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit32_relaxed(mac_type,
+				      &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6769,11 +6753,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 
 static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 {
-	unsigned long ramrod_flags = 0;
+	uint32_t ramrod_flags = 0;
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32_relaxed(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6905,24 +6889,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit32_relaxed(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit32_relaxed(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit32_relaxed(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit32_relaxed(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6941,9 +6927,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_get_bit32_relaxed(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_get_bit32_relaxed(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6958,15 +6944,16 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_get_bit32_relaxed(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
-					   &cur_data.link_report_flags)) {
+		if (rte_test_and_clear_bit32_relaxed
+				(BNX2X_LINK_REPORT_FULL_DUPLEX,
+				 &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
 		} else {
@@ -6980,20 +6967,24 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_get_bit32_relaxed(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_get_bit32_relaxed(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
-						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			} else if (rte_get_bit32_relaxed
+						(BNX2X_LINK_REPORT_RX_FC_ON,
+						 &cur_data.link_report_flags) &&
+				   !rte_get_bit32_relaxed
+						(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_get_bit32_relaxed
+						(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
-						&cur_data.link_report_flags)) {
+				   rte_get_bit32_relaxed
+						(BNX2X_LINK_REPORT_TX_FC_ON,
+						 &cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
 				flow = "none";	/* possible? */
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 1dbc98197..63be72a5b 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1000,8 +1000,8 @@ struct bnx2x_sp_objs {
  * link parameters twice.
  */
 struct bnx2x_link_report_data {
-	uint16_t      line_speed;        /* Effective line speed */
-	unsigned long link_report_flags; /* BNX2X_LINK_REPORT_XXX flags */
+	uint16_t line_speed;        /* Effective line speed */
+	uint32_t link_report_flags; /* BNX2X_LINK_REPORT_XXX flags */
 };
 
 enum {
@@ -1232,7 +1232,7 @@ struct bnx2x_softc {
 	/* slow path */
 	struct bnx2x_dma      sp_dma;
 	struct bnx2x_slowpath *sp;
-	unsigned long       sp_state;
+	uint32_t	    sp_state;
 
 	/* slow path queue */
 	struct bnx2x_dma spq_dma;
@@ -1812,10 +1812,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db377a..d3f1dac36 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_pmd_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,11 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_get_bit32_relaxed(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit32_relaxed(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit32_relaxed(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) \
+	rte_test_and_clear_bit32_relaxed(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
@@ -538,7 +540,7 @@ struct ecore_vlan_mac_data {
 	/* used to contain the data related vlan_mac_flags bits from
 	 * ramrod parameters.
 	 */
-	unsigned long vlan_mac_flags;
+	uint32_t vlan_mac_flags;
 
 	/* Needed for MOVE command */
 	struct ecore_vlan_mac_obj *target_obj;
@@ -688,7 +690,7 @@ struct ecore_vlan_mac_ramrod_params {
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* General command flags: COMP_WAIT, etc. */
-	unsigned long ramrod_flags;
+	uint32_t ramrod_flags;
 
 	/* Command specific configuration request */
 	struct ecore_vlan_mac_data user_req;
@@ -928,7 +930,7 @@ struct ecore_mcast_ramrod_params {
 	struct ecore_mcast_obj *mcast_obj;
 
 	/* Relevant options are RAMROD_COMP_WAIT and RAMROD_DRV_CLR_ONLY */
-	unsigned long ramrod_flags;
+	uint32_t ramrod_flags;
 
 	ecore_list_t mcast_list; /* list of struct ecore_mcast_list_elem */
 	/** TODO:
@@ -1144,22 +1146,22 @@ struct ecore_config_rss_params {
 	struct ecore_rss_config_obj *rss_obj;
 
 	/* may have RAMROD_COMP_WAIT set only */
-	unsigned long	ramrod_flags;
+	uint32_t ramrod_flags;
 
 	/* ECORE_RSS_X bits */
-	unsigned long	rss_flags;
+	uint32_t rss_flags;
 
 	/* Number hash bits to take into an account */
-	uint8_t		rss_result_mask;
+	uint8_t	 rss_result_mask;
 
 	/* Indirection table */
-	uint8_t		ind_table[T_ETH_INDIRECTION_TABLE_SIZE];
+	uint8_t	 ind_table[T_ETH_INDIRECTION_TABLE_SIZE];
 
 	/* RSS hash values */
-	uint32_t		rss_key[10];
+	uint32_t rss_key[10];
 
 	/* valid only if ECORE_RSS_UPDATE_TOE is set */
-	uint16_t		toe_rss_bitmap;
+	uint16_t toe_rss_bitmap;
 };
 
 struct ecore_rss_config_obj {
@@ -1290,17 +1292,17 @@ enum ecore_q_type {
 
 struct ecore_queue_init_params {
 	struct {
-		unsigned long	flags;
-		uint16_t		hc_rate;
-		uint8_t		fw_sb_id;
-		uint8_t		sb_cq_index;
+		uint32_t flags;
+		uint16_t hc_rate;
+		uint8_t	 fw_sb_id;
+		uint8_t	 sb_cq_index;
 	} tx;
 
 	struct {
-		unsigned long	flags;
-		uint16_t		hc_rate;
-		uint8_t		fw_sb_id;
-		uint8_t		sb_cq_index;
+		uint32_t flags;
+		uint16_t hc_rate;
+		uint8_t	 fw_sb_id;
+		uint8_t	 sb_cq_index;
 	} rx;
 
 	/* CID context in the host memory */
@@ -1440,7 +1442,7 @@ struct ecore_queue_state_params {
 	enum ecore_queue_cmd cmd;
 
 	/* may have RAMROD_COMP_WAIT set only */
-	unsigned long ramrod_flags;
+	uint32_t ramrod_flags;
 
 	/* Params according to the current command */
 	union {
@@ -1704,7 +1706,7 @@ struct ecore_func_state_params {
 	enum ecore_func_cmd cmd;
 
 	/* may have RAMROD_COMP_WAIT set only */
-	unsigned long	ramrod_flags;
+	uint32_t ramrod_flags;
 
 	/* Params according to the current command */
 	union {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v7 5/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (45 preceding siblings ...)
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 4/6] net/bnx2x: " Joyce Kong
@ 2020-03-09  9:54 ` " Joyce Kong
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 6/6] net/hinic: " Joyce Kong
                   ` (21 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2020-03-09  9:54 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c          | 22 +------------
 drivers/net/qede/base/bcm_osal.h          | 14 ++++-----
 drivers/net/qede/base/ecore.h             |  6 ++--
 drivers/net/qede/base/ecore_cxt.c         |  6 ++--
 drivers/net/qede/base/ecore_dcbx.c        |  8 ++---
 drivers/net/qede/base/ecore_dev.c         | 38 +++++++++++------------
 drivers/net/qede/base/ecore_dev_api.h     |  2 +-
 drivers/net/qede/base/ecore_l2.c          |  6 ++--
 drivers/net/qede/base/ecore_mcp.c         |  4 +--
 drivers/net/qede/base/ecore_sp_commands.c | 12 +++----
 drivers/net/qede/base/ecore_spq.c         |  2 +-
 drivers/net/qede/base/ecore_spq.h         | 10 +++---
 drivers/net/qede/qede_main.c              |  4 +--
 13 files changed, 56 insertions(+), 78 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 48d016e24..54e5e4f98 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -46,26 +46,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
@@ -95,7 +75,7 @@ static inline u32 qede_ffz(unsigned long word)
 	return first_zero ? (first_zero - 1) : OSAL_BITS_PER_UL;
 }
 
-inline u32 qede_find_first_zero_bit(unsigned long *addr, u32 limit)
+inline u32 qede_find_first_zero_bit(u32 *addr, u32 limit)
 {
 	u32 i;
 	u32 nwords = 0;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 513f6a8b4..51598a62b 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -18,6 +18,7 @@
 #include <rte_debug.h>
 #include <rte_ether.h>
 #include <rte_io.h>
+#include <rte_pmd_bitops.h>
 
 /* Forward declaration */
 struct ecore_dev;
@@ -308,23 +309,20 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit32_relaxed(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit32_relaxed(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
-#define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+#define OSAL_GET_BIT(bit, bitmap) \
+	rte_get_bit32_relaxed(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
 	qede_find_first_bit(bitmap, length)
 
-u32 qede_find_first_zero_bit(unsigned long *, u32);
+u32 qede_find_first_zero_bit(u32 *bitmap, u32 length);
 #define OSAL_FIND_FIRST_ZERO_BIT(bitmap, length) \
 	qede_find_first_zero_bit(bitmap, length)
 
diff --git a/drivers/net/qede/base/ecore.h b/drivers/net/qede/base/ecore.h
index b2077bc46..498bb6f09 100644
--- a/drivers/net/qede/base/ecore.h
+++ b/drivers/net/qede/base/ecore.h
@@ -422,8 +422,8 @@ struct ecore_hw_info {
 	u8 max_chains_per_vf;
 
 	u32 port_mode;
-	u32	hw_mode;
-	unsigned long device_capabilities;
+	u32 hw_mode;
+	u32 device_capabilities;
 
 	/* Default DCBX mode */
 	u8 dcbx_mode;
@@ -807,7 +807,7 @@ struct ecore_dev {
 
 	u8				path_id;
 
-	unsigned long			mf_bits;
+	u32				mf_bits;
 	enum ecore_mf_mode		mf_mode;
 #define IS_MF_DEFAULT(_p_hwfn)	\
 	(((_p_hwfn)->p_dev)->mf_mode == ECORE_MF_DEFAULT)
diff --git a/drivers/net/qede/base/ecore_cxt.c b/drivers/net/qede/base/ecore_cxt.c
index 773b75ecd..dda47ea67 100644
--- a/drivers/net/qede/base/ecore_cxt.c
+++ b/drivers/net/qede/base/ecore_cxt.c
@@ -154,7 +154,7 @@ struct ecore_ilt_client_cfg {
 struct ecore_cid_acquired_map {
 	u32 start_cid;
 	u32 max_count;
-	unsigned long *cid_map;
+	u32 *cid_map;
 };
 
 struct ecore_src_t2 {
@@ -1991,7 +1991,7 @@ static bool ecore_cxt_test_cid_acquired(struct ecore_hwfn *p_hwfn,
 	}
 
 	rel_cid = cid - (*pp_map)->start_cid;
-	if (!OSAL_TEST_BIT(rel_cid, (*pp_map)->cid_map)) {
+	if (!OSAL_GET_BIT(rel_cid, (*pp_map)->cid_map)) {
 		DP_NOTICE(p_hwfn, true,
 			  "CID %d [vifd %02x] not acquired", cid, vfid);
 		goto fail;
@@ -2102,7 +2102,7 @@ enum _ecore_status_t ecore_cxt_set_pf_params(struct ecore_hwfn *p_hwfn)
 
 		count = p_params->num_arfs_filters;
 
-		if (!OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS,
+		if (!OSAL_GET_BIT(ECORE_MF_DISABLE_ARFS,
 				   &p_hwfn->p_dev->mf_bits))
 			p_hwfn->p_cxt_mngr->arfs_count = count;
 
diff --git a/drivers/net/qede/base/ecore_dcbx.c b/drivers/net/qede/base/ecore_dcbx.c
index ccd4383bb..31234f18c 100644
--- a/drivers/net/qede/base/ecore_dcbx.c
+++ b/drivers/net/qede/base/ecore_dcbx.c
@@ -148,7 +148,7 @@ ecore_dcbx_set_params(struct ecore_dcbx_results *p_data,
 	p_data->arr[type].update = UPDATE_DCB_DSCP;
 
 	/* Do not add valn tag 0 when DCB is enabled and port is in UFP mode */
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
 		p_data->arr[type].dont_add_vlan0 = true;
 
 	/* QM reconf data */
@@ -156,8 +156,8 @@ ecore_dcbx_set_params(struct ecore_dcbx_results *p_data,
 		p_hwfn->hw_info.offload_tc = tc;
 
 	/* Configure dcbx vlan priority in doorbell block for roce EDPM */
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits) &&
-	    (type == DCBX_PROTOCOL_ROCE)) {
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits) &&
+	    type == DCBX_PROTOCOL_ROCE) {
 		ecore_wr(p_hwfn, p_ptt, DORQ_REG_TAG1_OVRD_MODE, 1);
 		ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_PCP, prio << 1);
 	}
@@ -293,7 +293,7 @@ ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	}
 
 	/* If Eth TLV is not detected, use UFP TC as default TC */
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC,
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC,
 			  &p_hwfn->p_dev->mf_bits) && !eth_tlv)
 		p_data->arr[DCBX_PROTOCOL_ETH].tc = p_hwfn->ufp_info.tc;
 
diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index 86ecfb269..e18c2fa89 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -805,7 +805,7 @@ static enum _ecore_status_t ecore_llh_hw_init_pf(struct ecore_hwfn *p_hwfn,
 		ecore_wr(p_hwfn, p_ptt, addr, p_hwfn->rel_pf_id);
 	}
 
-	if (OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
+	if (OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
 	    !ECORE_IS_FCOE_PERSONALITY(p_hwfn)) {
 		rc = ecore_llh_add_mac_filter(p_dev, 0,
 					      p_hwfn->hw_info.hw_mac_addr);
@@ -1044,7 +1044,7 @@ ecore_llh_add_filter(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	filter_details.enable = 1;
 	filter_details.value = ((u64)high << 32) | low;
 	filter_details.hdr_sel =
-		OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits) ?
+		OSAL_GET_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits) ?
 		1 : /* inner/encapsulated header */
 		0;  /* outer/tunnel header */
 	filter_details.protocol_type = filter_prot_type;
@@ -1083,7 +1083,7 @@ enum _ecore_status_t ecore_llh_add_mac_filter(struct ecore_dev *p_dev, u8 ppfid,
 	if (p_ptt == OSAL_NULL)
 		return ECORE_AGAIN;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	OSAL_MEM_ZERO(&filter, sizeof(filter));
@@ -1220,7 +1220,7 @@ ecore_llh_add_protocol_filter(struct ecore_dev *p_dev, u8 ppfid,
 	if (p_ptt == OSAL_NULL)
 		return ECORE_AGAIN;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	rc = ecore_llh_protocol_filter_stringify(p_dev, type,
@@ -1287,7 +1287,7 @@ void ecore_llh_remove_mac_filter(struct ecore_dev *p_dev, u8 ppfid,
 	if (p_ptt == OSAL_NULL)
 		return;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	OSAL_MEM_ZERO(&filter, sizeof(filter));
@@ -1342,7 +1342,7 @@ void ecore_llh_remove_protocol_filter(struct ecore_dev *p_dev, u8 ppfid,
 	if (p_ptt == OSAL_NULL)
 		return;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	rc = ecore_llh_protocol_filter_stringify(p_dev, type,
@@ -1396,8 +1396,8 @@ void ecore_llh_clear_ppfid_filters(struct ecore_dev *p_dev, u8 ppfid)
 	if (p_ptt == OSAL_NULL)
 		return;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
-	    !OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
+	    !OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
 		goto out;
 
 	rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
@@ -1423,8 +1423,8 @@ void ecore_llh_clear_all_filters(struct ecore_dev *p_dev)
 {
 	u8 ppfid;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
-	    !OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
+	    !OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
 		return;
 
 	for (ppfid = 0; ppfid < p_dev->p_llh_info->num_ppfid; ppfid++)
@@ -2674,7 +2674,7 @@ static enum _ecore_status_t ecore_calc_hw_mode(struct ecore_hwfn *p_hwfn)
 		return ECORE_INVAL;
 	}
 
-	if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
 		hw_mode |= 1 << MODE_MF_SD;
 	else
 		hw_mode |= 1 << MODE_MF_SI;
@@ -3382,7 +3382,7 @@ static enum _ecore_status_t ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
 		 * The ppfid should be set in the vector, except in BB which has
 		 * a bug in the LLH where the ppfid is actually engine based.
 		 */
-		if (OSAL_TEST_BIT(ECORE_MF_NEED_DEF_PF, &p_dev->mf_bits)) {
+		if (OSAL_GET_BIT(ECORE_MF_NEED_DEF_PF, &p_dev->mf_bits)) {
 			u8 pf_id = p_hwfn->rel_pf_id;
 
 			if (!ECORE_IS_BB(p_dev))
@@ -3715,11 +3715,11 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 		if (rc != ECORE_SUCCESS)
 			return rc;
 
-		if (IS_PF(p_dev) && (OSAL_TEST_BIT(ECORE_MF_8021Q_TAGGING,
+		if (IS_PF(p_dev) && (OSAL_GET_BIT(ECORE_MF_8021Q_TAGGING,
 						   &p_dev->mf_bits) ||
-				     OSAL_TEST_BIT(ECORE_MF_8021AD_TAGGING,
+				     OSAL_GET_BIT(ECORE_MF_8021AD_TAGGING,
 						   &p_dev->mf_bits))) {
-			if (OSAL_TEST_BIT(ECORE_MF_8021Q_TAGGING,
+			if (OSAL_GET_BIT(ECORE_MF_8021Q_TAGGING,
 					  &p_dev->mf_bits))
 				ether_type = ETHER_TYPE_VLAN;
 			else
@@ -4119,7 +4119,7 @@ enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev)
 		OSAL_MSLEEP(1);
 
 		if (IS_LEAD_HWFN(p_hwfn) &&
-		    OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
+		    OSAL_GET_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
 		    !ECORE_IS_FCOE_PERSONALITY(p_hwfn))
 			ecore_llh_remove_mac_filter(p_dev, 0,
 						   p_hwfn->hw_info.hw_mac_addr);
@@ -5113,7 +5113,7 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 			p_hwfn->p_dev->mf_bits |= 1 << ECORE_MF_NEED_DEF_PF;
 		break;
 	}
-	DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n",
+	DP_INFO(p_hwfn, "Multi function mode is 0x%x\n",
 		p_hwfn->p_dev->mf_bits);
 
 	if (ECORE_IS_CMT(p_hwfn->p_dev))
@@ -6218,7 +6218,7 @@ enum _ecore_status_t
 ecore_llh_set_function_as_default(struct ecore_hwfn *p_hwfn,
 				  struct ecore_ptt *p_ptt)
 {
-	if (OSAL_TEST_BIT(ECORE_MF_NEED_DEF_PF, &p_hwfn->p_dev->mf_bits)) {
+	if (OSAL_GET_BIT(ECORE_MF_NEED_DEF_PF, &p_hwfn->p_dev->mf_bits)) {
 		ecore_wr(p_hwfn, p_ptt,
 			 NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR,
 			 1 << p_hwfn->abs_pf_id / 2);
@@ -6795,5 +6795,5 @@ void ecore_set_fw_mac_addr(__le16 *fw_msb,
 
 bool ecore_is_mf_fip_special(struct ecore_dev *p_dev)
 {
-	return !!OSAL_TEST_BIT(ECORE_MF_FIP_SPECIAL, &p_dev->mf_bits);
+	return !!OSAL_GET_BIT(ECORE_MF_FIP_SPECIAL, &p_dev->mf_bits);
 }
diff --git a/drivers/net/qede/base/ecore_dev_api.h b/drivers/net/qede/base/ecore_dev_api.h
index 5ea8427a0..9ddf502eb 100644
--- a/drivers/net/qede/base/ecore_dev_api.h
+++ b/drivers/net/qede/base/ecore_dev_api.h
@@ -212,7 +212,7 @@ enum _ecore_status_t ecore_db_recovery_del(struct ecore_dev *p_dev,
 
 static OSAL_INLINE bool ecore_is_mf_ufp(struct ecore_hwfn *p_hwfn)
 {
-	return !!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits);
+	return !!OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits);
 }
 
 #endif
diff --git a/drivers/net/qede/base/ecore_l2.c b/drivers/net/qede/base/ecore_l2.c
index b20d83762..af234dec8 100644
--- a/drivers/net/qede/base/ecore_l2.c
+++ b/drivers/net/qede/base/ecore_l2.c
@@ -29,7 +29,7 @@
 
 struct ecore_l2_info {
 	u32 queues;
-	unsigned long **pp_qid_usage;
+	u32 **pp_qid_usage;
 
 	/* The lock is meant to synchronize access to the qid usage */
 	osal_mutex_t lock;
@@ -38,7 +38,7 @@ struct ecore_l2_info {
 enum _ecore_status_t ecore_l2_alloc(struct ecore_hwfn *p_hwfn)
 {
 	struct ecore_l2_info *p_l2_info;
-	unsigned long **pp_qids;
+	u32 **pp_qids;
 	u32 i;
 
 	if (!ECORE_IS_L2_PERSONALITY(p_hwfn))
@@ -2116,7 +2116,7 @@ void ecore_arfs_mode_configure(struct ecore_hwfn *p_hwfn,
 			       struct ecore_ptt *p_ptt,
 			       struct ecore_arfs_config_params *p_cfg_params)
 {
-	if (OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_DISABLE_ARFS, &p_hwfn->p_dev->mf_bits))
 		return;
 
 	if (p_cfg_params->mode != ECORE_FILTER_CONFIG_MODE_DISABLE) {
diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c
index 7518765a0..a7485967b 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -1732,7 +1732,7 @@ static void ecore_mcp_update_stag(struct ecore_hwfn *p_hwfn,
 	p_hwfn->mcp_info->func_info.ovlan = (u16)shmem_info.ovlan_stag &
 						 FUNC_MF_CFG_OV_STAG_MASK;
 	p_hwfn->hw_info.ovlan = p_hwfn->mcp_info->func_info.ovlan;
-	if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits)) {
+	if (OSAL_GET_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits)) {
 		if (p_hwfn->hw_info.ovlan != ECORE_MCP_VLAN_UNSET) {
 			ecore_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_VALUE,
 				 p_hwfn->hw_info.ovlan);
@@ -2026,7 +2026,7 @@ ecore_mcp_read_ufp_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 	struct public_func shmem_info;
 	u32 port_cfg, val;
 
-	if (!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
+	if (!OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
 		return;
 
 	OSAL_MEMSET(&p_hwfn->ufp_info, 0, sizeof(p_hwfn->ufp_info));
diff --git a/drivers/net/qede/base/ecore_sp_commands.c b/drivers/net/qede/base/ecore_sp_commands.c
index 9860a62b5..44ced135d 100644
--- a/drivers/net/qede/base/ecore_sp_commands.c
+++ b/drivers/net/qede/base/ecore_sp_commands.c
@@ -335,16 +335,16 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	p_ramrod->dont_log_ramrods = 0;
 	p_ramrod->log_type_mask = OSAL_CPU_TO_LE16(0x8f);
 
-	if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
 		p_ramrod->mf_mode = MF_OVLAN;
 	else
 		p_ramrod->mf_mode = MF_NPAR;
 
 	p_ramrod->outer_tag_config.outer_tag.tci =
 		OSAL_CPU_TO_LE16(p_hwfn->hw_info.ovlan);
-	if (OSAL_TEST_BIT(ECORE_MF_8021Q_TAGGING, &p_hwfn->p_dev->mf_bits)) {
+	if (OSAL_GET_BIT(ECORE_MF_8021Q_TAGGING, &p_hwfn->p_dev->mf_bits)) {
 		p_ramrod->outer_tag_config.outer_tag.tpid = ETH_P_8021Q;
-	} else if (OSAL_TEST_BIT(ECORE_MF_8021AD_TAGGING,
+	} else if (OSAL_GET_BIT(ECORE_MF_8021AD_TAGGING,
 		 &p_hwfn->p_dev->mf_bits)) {
 		p_ramrod->outer_tag_config.outer_tag.tpid = ETH_P_8021AD;
 		p_ramrod->outer_tag_config.enable_stag_pri_change = 1;
@@ -357,7 +357,7 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	/* enable_stag_pri_change should be set if port is in BD mode or,
 	 * UFP with Host Control mode.
 	 */
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits)) {
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits)) {
 		if (p_hwfn->ufp_info.pri_type == ECORE_UFP_PRI_OS)
 			p_ramrod->outer_tag_config.enable_stag_pri_change = 1;
 		else
@@ -378,7 +378,7 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	ecore_tunn_set_pf_start_params(p_hwfn, p_tunn,
 				       &p_ramrod->tunnel_config);
 
-	if (OSAL_TEST_BIT(ECORE_MF_INTER_PF_SWITCH,
+	if (OSAL_GET_BIT(ECORE_MF_INTER_PF_SWITCH,
 			  &p_hwfn->p_dev->mf_bits))
 		p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch;
 
@@ -638,7 +638,7 @@ enum _ecore_status_t ecore_sp_heartbeat_ramrod(struct ecore_hwfn *p_hwfn)
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
+	if (OSAL_GET_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
 		p_ent->ramrod.pf_update.mf_vlan |=
 			OSAL_CPU_TO_LE16(((u16)p_hwfn->ufp_info.tc << 13));
 
diff --git a/drivers/net/qede/base/ecore_spq.c b/drivers/net/qede/base/ecore_spq.c
index 6c386821f..02f613688 100644
--- a/drivers/net/qede/base/ecore_spq.c
+++ b/drivers/net/qede/base/ecore_spq.c
@@ -977,7 +977,7 @@ enum _ecore_status_t ecore_spq_completion(struct ecore_hwfn *p_hwfn,
 			 * for the first successive completed entries.
 			 */
 			SPQ_COMP_BMAP_SET_BIT(p_spq, echo);
-			while (SPQ_COMP_BMAP_TEST_BIT(p_spq,
+			while (SPQ_COMP_BMAP_GET_BIT(p_spq,
 						      p_spq->comp_bitmap_idx)) {
 				SPQ_COMP_BMAP_CLEAR_BIT(p_spq,
 							p_spq->comp_bitmap_idx);
diff --git a/drivers/net/qede/base/ecore_spq.h b/drivers/net/qede/base/ecore_spq.h
index 6142c399a..0958e5a0a 100644
--- a/drivers/net/qede/base/ecore_spq.h
+++ b/drivers/net/qede/base/ecore_spq.h
@@ -121,17 +121,17 @@ struct ecore_spq {
 #define SPQ_RING_SIZE		\
 	(CORE_SPQE_PAGE_SIZE_BYTES / sizeof(struct slow_path_element))
 /* BITS_PER_LONG */
-#define SPQ_COMP_BMAP_SIZE	(SPQ_RING_SIZE / (sizeof(unsigned long) * 8))
-	unsigned long			p_comp_bitmap[SPQ_COMP_BMAP_SIZE];
-	u8				comp_bitmap_idx;
+#define SPQ_COMP_BMAP_SIZE	(SPQ_RING_SIZE / (sizeof(u32) * 8))
+	u32			p_comp_bitmap[SPQ_COMP_BMAP_SIZE];
+	u8			comp_bitmap_idx;
 #define SPQ_COMP_BMAP_SET_BIT(p_spq, idx)				\
 	(OSAL_SET_BIT(((idx) % SPQ_RING_SIZE), (p_spq)->p_comp_bitmap))
 
 #define SPQ_COMP_BMAP_CLEAR_BIT(p_spq, idx)				\
 	(OSAL_CLEAR_BIT(((idx) % SPQ_RING_SIZE), (p_spq)->p_comp_bitmap))
 
-#define SPQ_COMP_BMAP_TEST_BIT(p_spq, idx)	\
-	(OSAL_TEST_BIT(((idx) % SPQ_RING_SIZE), (p_spq)->p_comp_bitmap))
+#define SPQ_COMP_BMAP_GET_BIT(p_spq, idx)	\
+	(OSAL_GET_BIT(((idx) % SPQ_RING_SIZE), (p_spq)->p_comp_bitmap))
 
 	/* Statistics */
 	u32				unlimited_pending_count;
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 8580cbcd7..bd63c0ac7 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -382,8 +382,8 @@ qed_fill_dev_info(struct ecore_dev *edev, struct qed_dev_info *dev_info)
 
 	if (IS_PF(edev)) {
 		dev_info->b_inter_pf_switch =
-			OSAL_TEST_BIT(ECORE_MF_INTER_PF_SWITCH, &edev->mf_bits);
-		if (!OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS, &edev->mf_bits))
+			OSAL_GET_BIT(ECORE_MF_INTER_PF_SWITCH, &edev->mf_bits);
+		if (!OSAL_GET_BIT(ECORE_MF_DISABLE_ARFS, &edev->mf_bits))
 			dev_info->b_arfs_capable = true;
 		dev_info->tx_switching = false;
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v7 6/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (46 preceding siblings ...)
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 5/6] net/qede: " Joyce Kong
@ 2020-03-09  9:54 ` " Joyce Kong
  2020-04-16  5:38 ` [dpdk-dev] [PATCH v8 0/6] implement common bit operation APIs Joyce Kong
                   ` (20 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2020-03-09  9:54 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu
  Cc: nd, dev

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/hinic/Makefile            |  1 +
 drivers/net/hinic/base/hinic_compat.h | 33 +--------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 18 ++++++++-------
 drivers/net/hinic/hinic_pmd_ethdev.h  |  2 +-
 drivers/net/hinic/meson.build         |  2 ++
 5 files changed, 15 insertions(+), 41 deletions(-)

diff --git a/drivers/net/hinic/Makefile b/drivers/net/hinic/Makefile
index 87fd843e4..f087baac5 100644
--- a/drivers/net/hinic/Makefile
+++ b/drivers/net/hinic/Makefile
@@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_hinic.a
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 
 ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index b72e8cfe2..7fab6dc29 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -18,6 +18,7 @@
 #include <rte_spinlock.h>
 #include <rte_cycles.h>
 #include <rte_log.h>
+#include <rte_pmd_bitops.h>
 
 typedef uint8_t   u8;
 typedef int8_t    s8;
@@ -116,38 +117,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	res = ((*addr) & (1UL << nr)) != 0;
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index 803a39e2d..c53980f44 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -269,7 +269,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_get_bit32_relaxed(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -1076,7 +1076,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit32_relaxed(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1201,7 +1201,8 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit32_relaxed(HINIC_DEV_START,
+					      &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1246,7 +1247,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit32_relaxed(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2845,7 +2846,8 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit32_relaxed(HINIC_DEV_CLOSE,
+					   &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -3042,7 +3044,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit32_relaxed(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* initialize filter info */
 	filter_info = &nic_dev->filter;
@@ -3054,7 +3056,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 	TAILQ_INIT(&nic_dev->filter_fdir_rule_list);
 	TAILQ_INIT(&nic_dev->hinic_flow_list);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit32_relaxed(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -3110,7 +3112,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit32_relaxed(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.h b/drivers/net/hinic/hinic_pmd_ethdev.h
index 3e3f3b360..114f1df92 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.h
+++ b/drivers/net/hinic/hinic_pmd_ethdev.h
@@ -171,7 +171,7 @@ struct hinic_nic_dev {
 	unsigned int flags;
 	struct nic_service_cap nic_cap;
 	u32 rx_mode_status;	/* promisc or allmulticast */
-	unsigned long dev_status;
+	u32 dev_status;
 
 	char proc_dev_name[HINIC_DEV_NAME_LEN];
 	/* PF0->COS4, PF1->COS5, PF2->COS6, PF3->COS7,
diff --git a/drivers/net/hinic/meson.build b/drivers/net/hinic/meson.build
index bc7e24639..8c7ee9dfc 100644
--- a/drivers/net/hinic/meson.build
+++ b/drivers/net/hinic/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Huawei Technologies Co., Ltd
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs Joyce Kong
@ 2020-03-09 15:50   ` Stephen Hemminger
  2020-03-31 22:35   ` Thomas Monjalon
  1 sibling, 0 replies; 139+ messages in thread
From: Stephen Hemminger @ 2020-03-09 15:50 UTC (permalink / raw)
  To: Joyce Kong
  Cc: thomas, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu, nd, dev

On Mon,  9 Mar 2020 17:54:05 +0800
Joyce Kong <joyce.kong@arm.com> wrote:

> /**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Return the original bit from a 64-bit value, then clear it to 0 without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_clear_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	uint64_t val = *addr;
> +	*addr = (*addr) & (~mask);
> +	return val & mask;
> +}
> +

This is not thread safe. You should use the existing GCC builtins.

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs Joyce Kong
  2020-03-09 15:50   ` Stephen Hemminger
@ 2020-03-31 22:35   ` Thomas Monjalon
  2020-04-01  8:27     ` Gavin Hu
  1 sibling, 1 reply; 139+ messages in thread
From: Thomas Monjalon @ 2020-03-31 22:35 UTC (permalink / raw)
  To: Joyce Kong
  Cc: stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, phil.yang, gavin.hu, nd, dev

Hi,

09/03/2020 10:54, Joyce Kong:
> Bitwise operation APIs are defined and used in a lot of PMDs,
> which caused a huge code duplication.

Statistics of the series: 653 insertions(+), 326 deletions(-)
I would not say it is a huge duplication.

> To reduce duplication,
> this patch consolidates them into a common API family.
[...]
> +PMD Bitops
> +M: Joyce Kong <joyce.kong@arm.com>
> +F: lib/librte_eal/common/include/rte_pmd_bitops.h

Why is it called PMD bitops and not simply bitops?



^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs
  2020-03-31 22:35   ` Thomas Monjalon
@ 2020-04-01  8:27     ` Gavin Hu
  2020-04-01  9:45       ` Thomas Monjalon
  0 siblings, 1 reply; 139+ messages in thread
From: Gavin Hu @ 2020-04-01  8:27 UTC (permalink / raw)
  To: thomas, Joyce Kong
  Cc: stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Honnappa Nagarahalli, Phil Yang, nd, dev, nd

Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Wednesday, April 1, 2020 6:36 AM
> To: Joyce Kong <Joyce.Kong@arm.com>
> Cc: stephen@networkplumber.org; david.marchand@redhat.com;
> mb@smartsharesystems.com; jerinj@marvell.com;
> bruce.richardson@intel.com; ravi1.kumar@amd.com; rmody@marvell.com;
> shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Phil Yang
> <Phil.Yang@arm.com>; Gavin Hu <Gavin.Hu@arm.com>; nd
> <nd@arm.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD
> bit operation APIs
> 
> Hi,
> 
> 09/03/2020 10:54, Joyce Kong:
> > Bitwise operation APIs are defined and used in a lot of PMDs,
> > which caused a huge code duplication.
> 
> Statistics of the series: 653 insertions(+), 326 deletions(-)
> I would not say it is a huge duplication.
We did not include all PMDs, just a few for piloting and seeking opinions.
It is a huge duplication when counting all the PMDs.
> 
> > To reduce duplication,
> > this patch consolidates them into a common API family.
> [...]
> > +PMD Bitops
> > +M: Joyce Kong <joyce.kong@arm.com>
> > +F: lib/librte_eal/common/include/rte_pmd_bitops.h
> 
> Why is it called PMD bitops and not simply bitops?
The scope of these APIs are decreased to PMD use only, for libraries/applications, it is recommended to use C11 directly as there are complications of more ordering models involved. 
> 


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs
  2020-04-01  8:27     ` Gavin Hu
@ 2020-04-01  9:45       ` Thomas Monjalon
  2020-04-02  7:20         ` Gavin Hu
  0 siblings, 1 reply; 139+ messages in thread
From: Thomas Monjalon @ 2020-04-01  9:45 UTC (permalink / raw)
  To: Joyce Kong, Gavin Hu
  Cc: stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Honnappa Nagarahalli, Phil Yang, nd, dev, nd

01/04/2020 10:27, Gavin Hu:
> Hi Thomas,
> 
> From: Thomas Monjalon <thomas@monjalon.net>
> > 
> > Hi,
> > 
> > 09/03/2020 10:54, Joyce Kong:
> > > Bitwise operation APIs are defined and used in a lot of PMDs,
> > > which caused a huge code duplication.
> > 
> > Statistics of the series: 653 insertions(+), 326 deletions(-)
> > I would not say it is a huge duplication.
> We did not include all PMDs, just a few for piloting and seeking opinions.
> It is a huge duplication when counting all the PMDs.
> > 
> > > To reduce duplication,
> > > this patch consolidates them into a common API family.
> > [...]
> > > +PMD Bitops
> > > +M: Joyce Kong <joyce.kong@arm.com>
> > > +F: lib/librte_eal/common/include/rte_pmd_bitops.h
> > 
> > Why is it called PMD bitops and not simply bitops?
> 
> The scope of these APIs are decreased to PMD use only, for libraries/applications, it is recommended to use C11 directly as there are complications of more ordering models involved. 

OK, but PMD means nothing, except this is where it is used *now*.
Please describe and name the API with memory ordering words.



^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs
  2020-04-01  9:45       ` Thomas Monjalon
@ 2020-04-02  7:20         ` Gavin Hu
  2020-04-02  8:07           ` Thomas Monjalon
  0 siblings, 1 reply; 139+ messages in thread
From: Gavin Hu @ 2020-04-02  7:20 UTC (permalink / raw)
  To: thomas, Joyce Kong
  Cc: stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Honnappa Nagarahalli, Phil Yang, nd, dev, nd, nd

Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Wednesday, April 1, 2020 5:45 PM
> To: Joyce Kong <Joyce.Kong@arm.com>; Gavin Hu <Gavin.Hu@arm.com>
> Cc: stephen@networkplumber.org; david.marchand@redhat.com;
> mb@smartsharesystems.com; jerinj@marvell.com;
> bruce.richardson@intel.com; ravi1.kumar@amd.com; rmody@marvell.com;
> shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Phil Yang
> <Phil.Yang@arm.com>; nd <nd@arm.com>; dev@dpdk.org; nd
> <nd@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD
> bit operation APIs
> 
> 01/04/2020 10:27, Gavin Hu:
> > Hi Thomas,
> >
> > From: Thomas Monjalon <thomas@monjalon.net>
> > >
> > > Hi,
> > >
> > > 09/03/2020 10:54, Joyce Kong:
> > > > Bitwise operation APIs are defined and used in a lot of PMDs,
> > > > which caused a huge code duplication.
> > >
> > > Statistics of the series: 653 insertions(+), 326 deletions(-)
> > > I would not say it is a huge duplication.
> > We did not include all PMDs, just a few for piloting and seeking opinions.
> > It is a huge duplication when counting all the PMDs.
> > >
> > > > To reduce duplication,
> > > > this patch consolidates them into a common API family.
> > > [...]
> > > > +PMD Bitops
> > > > +M: Joyce Kong <joyce.kong@arm.com>
> > > > +F: lib/librte_eal/common/include/rte_pmd_bitops.h
> > >
> > > Why is it called PMD bitops and not simply bitops?
> >
> > The scope of these APIs are decreased to PMD use only, for
> libraries/applications, it is recommended to use C11 directly as there are
> complications of more ordering models involved.
> 
> OK, but PMD means nothing, except this is where it is used *now*.
> Please describe and name the API with memory ordering words.
Will remove 'PMD' in v8.
The APIs were already named with a '_relaxed' suffix, for example 'rte_get_bit64_relaxed'. 
According to Honnappa, this patch set just address PMD's requirement, and the current PMDs are not using C11, so only '_relaxed' version is offered.
http://inbox.dpdk.org/dev/VE1PR08MB514983C3200859B27F166EBB983F0@VE1PR08MB5149.eurprd08.prod.outlook.com/


^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs
  2020-04-02  7:20         ` Gavin Hu
@ 2020-04-02  8:07           ` Thomas Monjalon
  2020-04-02  8:11             ` Jerin Jacob
  0 siblings, 1 reply; 139+ messages in thread
From: Thomas Monjalon @ 2020-04-02  8:07 UTC (permalink / raw)
  To: Joyce Kong, Gavin Hu
  Cc: stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Honnappa Nagarahalli, Phil Yang, nd, dev

02/04/2020 09:20, Gavin Hu:
> Hi Thomas,
> 
> > -----Original Message-----
> > From: Thomas Monjalon <thomas@monjalon.net>
> > Sent: Wednesday, April 1, 2020 5:45 PM
> > To: Joyce Kong <Joyce.Kong@arm.com>; Gavin Hu <Gavin.Hu@arm.com>
> > Cc: stephen@networkplumber.org; david.marchand@redhat.com;
> > mb@smartsharesystems.com; jerinj@marvell.com;
> > bruce.richardson@intel.com; ravi1.kumar@amd.com; rmody@marvell.com;
> > shshaikh@marvell.com; xuanziyang2@huawei.com;
> > cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com; Honnappa
> > Nagarahalli <Honnappa.Nagarahalli@arm.com>; Phil Yang
> > <Phil.Yang@arm.com>; nd <nd@arm.com>; dev@dpdk.org; nd
> > <nd@arm.com>
> > Subject: Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD
> > bit operation APIs
> > 
> > 01/04/2020 10:27, Gavin Hu:
> > > Hi Thomas,
> > >
> > > From: Thomas Monjalon <thomas@monjalon.net>
> > > >
> > > > Hi,
> > > >
> > > > 09/03/2020 10:54, Joyce Kong:
> > > > > Bitwise operation APIs are defined and used in a lot of PMDs,
> > > > > which caused a huge code duplication.
> > > >
> > > > Statistics of the series: 653 insertions(+), 326 deletions(-)
> > > > I would not say it is a huge duplication.
> > > We did not include all PMDs, just a few for piloting and seeking opinions.
> > > It is a huge duplication when counting all the PMDs.
> > > >
> > > > > To reduce duplication,
> > > > > this patch consolidates them into a common API family.
> > > > [...]
> > > > > +PMD Bitops
> > > > > +M: Joyce Kong <joyce.kong@arm.com>
> > > > > +F: lib/librte_eal/common/include/rte_pmd_bitops.h
> > > >
> > > > Why is it called PMD bitops and not simply bitops?
> > >
> > > The scope of these APIs are decreased to PMD use only, for
> > libraries/applications, it is recommended to use C11 directly as there are
> > complications of more ordering models involved.
> > 
> > OK, but PMD means nothing, except this is where it is used *now*.
> > Please describe and name the API with memory ordering words.
> Will remove 'PMD' in v8.
> The APIs were already named with a '_relaxed' suffix, for example 'rte_get_bit64_relaxed'. 
> According to Honnappa, this patch set just address PMD's requirement, and the current PMDs are not using C11, so only '_relaxed' version is offered.
> http://inbox.dpdk.org/dev/VE1PR08MB514983C3200859B27F166EBB983F0@VE1PR08MB5149.eurprd08.prod.outlook.com/

So why not calling this component "relaxed bitops"?





^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs
  2020-04-02  8:07           ` Thomas Monjalon
@ 2020-04-02  8:11             ` Jerin Jacob
  2020-04-02  9:02               ` Gavin Hu
  0 siblings, 1 reply; 139+ messages in thread
From: Jerin Jacob @ 2020-04-02  8:11 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: Joyce Kong, Gavin Hu, stephen, david.marchand, mb, jerinj,
	bruce.richardson, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Honnappa Nagarahalli, Phil Yang,
	nd, dev

On Thu, Apr 2, 2020 at 1:37 PM Thomas Monjalon <thomas@monjalon.net> wrote:
>
> 02/04/2020 09:20, Gavin Hu:
> > Hi Thomas,
> >
> > > -----Original Message-----
> > > From: Thomas Monjalon <thomas@monjalon.net>
> > > Sent: Wednesday, April 1, 2020 5:45 PM
> > > To: Joyce Kong <Joyce.Kong@arm.com>; Gavin Hu <Gavin.Hu@arm.com>
> > > Cc: stephen@networkplumber.org; david.marchand@redhat.com;
> > > mb@smartsharesystems.com; jerinj@marvell.com;
> > > bruce.richardson@intel.com; ravi1.kumar@amd.com; rmody@marvell.com;
> > > shshaikh@marvell.com; xuanziyang2@huawei.com;
> > > cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com; Honnappa
> > > Nagarahalli <Honnappa.Nagarahalli@arm.com>; Phil Yang
> > > <Phil.Yang@arm.com>; nd <nd@arm.com>; dev@dpdk.org; nd
> > > <nd@arm.com>
> > > Subject: Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD
> > > bit operation APIs
> > >
> > > 01/04/2020 10:27, Gavin Hu:
> > > > Hi Thomas,
> > > >
> > > > From: Thomas Monjalon <thomas@monjalon.net>
> > > > >
> > > > > Hi,
> > > > >
> > > > > 09/03/2020 10:54, Joyce Kong:
> > > > > > Bitwise operation APIs are defined and used in a lot of PMDs,
> > > > > > which caused a huge code duplication.
> > > > >
> > > > > Statistics of the series: 653 insertions(+), 326 deletions(-)
> > > > > I would not say it is a huge duplication.
> > > > We did not include all PMDs, just a few for piloting and seeking opinions.
> > > > It is a huge duplication when counting all the PMDs.
> > > > >
> > > > > > To reduce duplication,
> > > > > > this patch consolidates them into a common API family.
> > > > > [...]
> > > > > > +PMD Bitops
> > > > > > +M: Joyce Kong <joyce.kong@arm.com>
> > > > > > +F: lib/librte_eal/common/include/rte_pmd_bitops.h

Change to lib/librte_eal/include/rte_pmd_bitops.h. Check top of tree.

> > > > >
> > > > > Why is it called PMD bitops and not simply bitops?
> > > >
> > > > The scope of these APIs are decreased to PMD use only, for
> > > libraries/applications, it is recommended to use C11 directly as there are
> > > complications of more ordering models involved.
> > >
> > > OK, but PMD means nothing, except this is where it is used *now*.
> > > Please describe and name the API with memory ordering words.
> > Will remove 'PMD' in v8.
> > The APIs were already named with a '_relaxed' suffix, for example 'rte_get_bit64_relaxed'.
> > According to Honnappa, this patch set just address PMD's requirement, and the current PMDs are not using C11, so only '_relaxed' version is offered.
> > http://inbox.dpdk.org/dev/VE1PR08MB514983C3200859B27F166EBB983F0@VE1PR08MB5149.eurprd08.prod.outlook.com/
>
> So why not calling this component "relaxed bitops"?

In the future, we can extend to more memory orders as needed. IMO,
Just changing to rte_bitops.h is enough.

>
>
>
>

^ permalink raw reply	[flat|nested] 139+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD bit operation APIs
  2020-04-02  8:11             ` Jerin Jacob
@ 2020-04-02  9:02               ` Gavin Hu
  0 siblings, 0 replies; 139+ messages in thread
From: Gavin Hu @ 2020-04-02  9:02 UTC (permalink / raw)
  To: Jerin Jacob, thomas
  Cc: Joyce Kong, stephen, david.marchand, mb, jerinj,
	bruce.richardson, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Honnappa Nagarahalli, Phil Yang,
	nd, dev, nd



> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Thursday, April 2, 2020 4:12 PM
> To: thomas@monjalon.net
> Cc: Joyce Kong <Joyce.Kong@arm.com>; Gavin Hu <Gavin.Hu@arm.com>;
> stephen@networkplumber.org; david.marchand@redhat.com;
> mb@smartsharesystems.com; jerinj@marvell.com;
> bruce.richardson@intel.com; ravi1.kumar@amd.com; rmody@marvell.com;
> shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Phil Yang
> <Phil.Yang@arm.com>; nd <nd@arm.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of PMD
> bit operation APIs
> 
> On Thu, Apr 2, 2020 at 1:37 PM Thomas Monjalon <thomas@monjalon.net>
> wrote:
> >
> > 02/04/2020 09:20, Gavin Hu:
> > > Hi Thomas,
> > >
> > > > -----Original Message-----
> > > > From: Thomas Monjalon <thomas@monjalon.net>
> > > > Sent: Wednesday, April 1, 2020 5:45 PM
> > > > To: Joyce Kong <Joyce.Kong@arm.com>; Gavin Hu
> <Gavin.Hu@arm.com>
> > > > Cc: stephen@networkplumber.org; david.marchand@redhat.com;
> > > > mb@smartsharesystems.com; jerinj@marvell.com;
> > > > bruce.richardson@intel.com; ravi1.kumar@amd.com;
> rmody@marvell.com;
> > > > shshaikh@marvell.com; xuanziyang2@huawei.com;
> > > > cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> Honnappa
> > > > Nagarahalli <Honnappa.Nagarahalli@arm.com>; Phil Yang
> > > > <Phil.Yang@arm.com>; nd <nd@arm.com>; dev@dpdk.org; nd
> > > > <nd@arm.com>
> > > > Subject: Re: [dpdk-dev] [PATCH v7 1/6] lib/eal: implement the family of
> PMD
> > > > bit operation APIs
> > > >
> > > > 01/04/2020 10:27, Gavin Hu:
> > > > > Hi Thomas,
> > > > >
> > > > > From: Thomas Monjalon <thomas@monjalon.net>
> > > > > >
> > > > > > Hi,
> > > > > >
> > > > > > 09/03/2020 10:54, Joyce Kong:
> > > > > > > Bitwise operation APIs are defined and used in a lot of PMDs,
> > > > > > > which caused a huge code duplication.
> > > > > >
> > > > > > Statistics of the series: 653 insertions(+), 326 deletions(-)
> > > > > > I would not say it is a huge duplication.
> > > > > We did not include all PMDs, just a few for piloting and seeking
> opinions.
> > > > > It is a huge duplication when counting all the PMDs.
> > > > > >
> > > > > > > To reduce duplication,
> > > > > > > this patch consolidates them into a common API family.
> > > > > > [...]
> > > > > > > +PMD Bitops
> > > > > > > +M: Joyce Kong <joyce.kong@arm.com>
> > > > > > > +F: lib/librte_eal/common/include/rte_pmd_bitops.h
> 
> Change to lib/librte_eal/include/rte_pmd_bitops.h. Check top of tree.
Yes, will rebase in v8.
> 
> > > > > >
> > > > > > Why is it called PMD bitops and not simply bitops?
> > > > >
> > > > > The scope of these APIs are decreased to PMD use only, for
> > > > libraries/applications, it is recommended to use C11 directly as there
> are
> > > > complications of more ordering models involved.
> > > >
> > > > OK, but PMD means nothing, except this is where it is used *now*.
> > > > Please describe and name the API with memory ordering words.
> > > Will remove 'PMD' in v8.
> > > The APIs were already named with a '_relaxed' suffix, for example
> 'rte_get_bit64_relaxed'.
> > > According to Honnappa, this patch set just address PMD's requirement,
> and the current PMDs are not using C11, so only '_relaxed' version is
> offered.
> > >
> http://inbox.dpdk.org/dev/VE1PR08MB514983C3200859B27F166EBB983F0
> @VE1PR08MB5149.eurprd08.prod.outlook.com/
> >
> > So why not calling this component "relaxed bitops"?
> 
> In the future, we can extend to more memory orders as needed. IMO,
> Just changing to rte_bitops.h is enough.
Ok, will change to rte_bitops.h to leave room for future extension. 
> 
> >
> >
> >
> >

^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v8 0/6] implement common bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (47 preceding siblings ...)
  2020-03-09  9:54 ` [dpdk-dev] [PATCH v7 6/6] net/hinic: " Joyce Kong
@ 2020-04-16  5:38 ` Joyce Kong
  2020-04-16  5:38 ` [dpdk-dev] [PATCH v8 1/6] lib/eal: implement the family of " Joyce Kong
                   ` (19 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2020-04-16  5:38 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, gavin.hu, phil.yang
  Cc: nd, dev

Bitwise operation APIs are defined and used in a lot of PMDs,
which caused a huge code duplication. To reduce duplication,
this patch consolidates them into a common API family and use
it for all the PMDs(we started with a few selectively).

v8:
  1. Change 'rte_pmd_bitops.h' to 'rte_bitops.h' allowing for
     future extension into other cases other than PMD only.
  2. Replace 'unsigned long' with 'uint32_t' to fix compling
     errors, the reason is the fact that 32-bit and 64-bit
     systems have different sized 'unsigned long', which caused
     incompatibility.

v7:
  1. Change the API's head file to 'rte_pmd_bitops.h' as a common-
     but-not-public file.
  2. Replace C11 atomic operations with 'volatile'. As the use cases
     are limited to PMDs, a 'volatile' is sufficient.

v5,v6:
  Trim 'unsigned long' in PMDs down to 'uint32_t', as on mainstream
  64-bit OS, 'unsigned long' is 64-bit in size, but the 32-bit OS
  expects 32-bit 'unsigned long' argument.

v4:
  Introduce uint32_t/uint64_t *addr when definiting bit operation
  APIs.

v3:
  1. Change the API's head file back to rte_bitops.h, then implement
     both 32-bit and 64-bit operations with and without C11 atomic
     memory ordering.
  2. Add multi-core test case for bit operations which implemented
     with memory ordering.
  3. Modify the doc of both APIs and test cases.

v2:
  1. Add doxygen comments for the rte bit operation API.
  2. Add test cases for common rte bit operation API.
  3. Change the header file to rte_io_bitops.h and the operation to
     rte_io_set_bit() etc., as the API uses barriers inside and the
     barriers are only needed for IO operations.
  4. Use an well defined uint_NN_t type.

Joyce Kong (6):
  lib/eal: implement the family of common bit operation APIs
  test/bitops: add bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead

 MAINTAINERS                               |   5 +
 app/test/Makefile                         |   1 +
 app/test/autotest_data.py                 |   6 +
 app/test/meson.build                      |   2 +
 app/test/test_bitops.c                    | 137 +++++++++++
 doc/api/doxy-api-index.md                 |   5 +-
 drivers/net/axgbe/axgbe_common.h          |  29 +--
 drivers/net/axgbe/axgbe_ethdev.c          |  14 +-
 drivers/net/axgbe/axgbe_ethdev.h          |   2 +-
 drivers/net/axgbe/axgbe_mdio.c            |  15 +-
 drivers/net/bnx2x/bnx2x.c                 | 271 +++++++++++-----------
 drivers/net/bnx2x/bnx2x.h                 |  10 +-
 drivers/net/bnx2x/ecore_sp.c              |  68 +++---
 drivers/net/bnx2x/ecore_sp.h              | 106 ++++-----
 drivers/net/hinic/Makefile                |   1 +
 drivers/net/hinic/base/hinic_compat.h     |  33 +--
 drivers/net/hinic/hinic_pmd_ethdev.c      |  18 +-
 drivers/net/hinic/hinic_pmd_ethdev.h      |   2 +-
 drivers/net/hinic/meson.build             |   2 +
 drivers/net/qede/base/bcm_osal.c          |  22 +-
 drivers/net/qede/base/bcm_osal.h          |  14 +-
 drivers/net/qede/base/ecore.h             |   6 +-
 drivers/net/qede/base/ecore_cxt.c         |   6 +-
 drivers/net/qede/base/ecore_dcbx.c        |   8 +-
 drivers/net/qede/base/ecore_dev.c         |  38 +--
 drivers/net/qede/base/ecore_dev_api.h     |   2 +-
 drivers/net/qede/base/ecore_l2.c          |   6 +-
 drivers/net/qede/base/ecore_mcp.c         |   4 +-
 drivers/net/qede/base/ecore_sp_commands.c |  12 +-
 drivers/net/qede/base/ecore_spq.c         |   2 +-
 drivers/net/qede/base/ecore_spq.h         |  10 +-
 drivers/net/qede/qede_main.c              |   4 +-
 lib/librte_eal/include/meson.build        |   1 +
 lib/librte_eal/include/rte_bitops.h       | 258 ++++++++++++++++++++
 34 files changed, 722 insertions(+), 398 deletions(-)
 create mode 100644 app/test/test_bitops.c
 create mode 100644 lib/librte_eal/include/rte_bitops.h

-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v8 1/6] lib/eal: implement the family of common bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (48 preceding siblings ...)
  2020-04-16  5:38 ` [dpdk-dev] [PATCH v8 0/6] implement common bit operation APIs Joyce Kong
@ 2020-04-16  5:38 ` " Joyce Kong
  2020-04-16 18:55   ` [dpdk-dev] [PATCH v8 1/6] lib/eal: implement the family of commonbit " Morten Brørup
  2020-04-17  9:38   ` [dpdk-dev] [PATCH v8 1/6] lib/eal: implement the family of common bit " Jerin Jacob
  2020-04-16  5:38 ` [dpdk-dev] [PATCH v8 2/6] test/bitops: add bit operation test case Joyce Kong
                   ` (18 subsequent siblings)
  68 siblings, 2 replies; 139+ messages in thread
From: Joyce Kong @ 2020-04-16  5:38 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, gavin.hu, phil.yang
  Cc: nd, dev

Bitwise operation APIs are defined and used in a lot of PMDs,
which caused a huge code duplication. To reduce duplication,
this patch consolidates them into a common API family.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 MAINTAINERS                         |   4 +
 doc/api/doxy-api-index.md           |   5 +-
 lib/librte_eal/include/meson.build  |   1 +
 lib/librte_eal/include/rte_bitops.h | 258 ++++++++++++++++++++++++++++
 4 files changed, 266 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_eal/include/rte_bitops.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 4800f6884..1d5fad28b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -241,6 +241,10 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 F: lib/librte_eal/include/rte_bitmap.h
 F: app/test/test_bitmap.c
 
+Bitops
+M: Joyce Kong <joyce.kong@arm.com>
+F: lib/librte_eal/include/rte_bitops.h
+
 MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/include/generic/rte_mcslock.h
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index dff496be0..b63e782ec 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -9,6 +9,7 @@ API {#index}
 The public API headers are grouped by topics:
 
 - **device**:
+  [bitops]             (@ref rte_bitops.h),
   [dev]                (@ref rte_dev.h),
   [ethdev]             (@ref rte_ethdev.h),
   [ethctrl]            (@ref rte_eth_ctrl.h),
@@ -133,12 +134,12 @@ The public API headers are grouped by topics:
   [BPF]                (@ref rte_bpf.h)
 
 - **containers**:
+  [bitmap]             (@ref rte_bitmap.h),
   [mbuf]               (@ref rte_mbuf.h),
   [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
   [ring]               (@ref rte_ring.h),
   [stack]              (@ref rte_stack.h),
-  [tailq]              (@ref rte_tailq.h),
-  [bitmap]             (@ref rte_bitmap.h)
+  [tailq]              (@ref rte_tailq.h)
 
 - **packet framework**:
   * [port]             (@ref rte_port.h):
diff --git a/lib/librte_eal/include/meson.build b/lib/librte_eal/include/meson.build
index 6fd427494..3afb50a5b 100644
--- a/lib/librte_eal/include/meson.build
+++ b/lib/librte_eal/include/meson.build
@@ -6,6 +6,7 @@ includes += include_directories('.')
 headers += files(
 	'rte_alarm.h',
 	'rte_bitmap.h',
+	'rte_bitops.h',
 	'rte_branch_prediction.h',
 	'rte_bus.h',
 	'rte_class.h',
diff --git a/lib/librte_eal/include/rte_bitops.h b/lib/librte_eal/include/rte_bitops.h
new file mode 100644
index 000000000..b942b677c
--- /dev/null
+++ b/lib/librte_eal/include/rte_bitops.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Arm Limited
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a family of APIs for bit operations
+ * without enforcing memory ordering.
+ */
+
+#include <stdint.h>
+#include <rte_debug.h>
+#include <rte_compat.h>
+
+/*---------------------------- 32 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return (*addr) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	*addr = (*addr) | mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	*addr = (*addr) & (~mask);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	uint32_t val = *addr;
+	*addr = (*addr) | mask;
+	return val & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32_relaxed(unsigned int nr, volatile uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	uint32_t val = *addr;
+	*addr = (*addr) & (~mask);
+	return val & mask;
+}
+
+/*---------------------------- 64 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return (*addr) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	(*addr) = (*addr) | mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	*addr = (*addr) & (~mask);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	uint64_t val = *addr;
+	*addr = (*addr) | mask;
+	return val;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64_relaxed(unsigned int nr, volatile uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	uint64_t val = *addr;
+	*addr = (*addr) & (~mask);
+	return val & mask;
+}
+
+#endif /* _RTE_BITOPS_H_ */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v8 2/6] test/bitops: add bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (49 preceding siblings ...)
  2020-04-16  5:38 ` [dpdk-dev] [PATCH v8 1/6] lib/eal: implement the family of " Joyce Kong
@ 2020-04-16  5:38 ` Joyce Kong
  2020-04-16  5:38 ` [dpdk-dev] [PATCH v8 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (17 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2020-04-16  5:38 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, gavin.hu, phil.yang
  Cc: nd, dev

Add test cases for setting bit, clearing bit, testing
and setting bit, testing and clearing bit operation.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 MAINTAINERS               |   1 +
 app/test/Makefile         |   1 +
 app/test/autotest_data.py |   6 ++
 app/test/meson.build      |   2 +
 app/test/test_bitops.c    | 137 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 147 insertions(+)
 create mode 100644 app/test/test_bitops.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 1d5fad28b..4b4b80fe6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -244,6 +244,7 @@ F: app/test/test_bitmap.c
 Bitops
 M: Joyce Kong <joyce.kong@arm.com>
 F: lib/librte_eal/include/rte_bitops.h
+F: app/test/test_bitops.c
 
 MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
diff --git a/app/test/Makefile b/app/test/Makefile
index 1f080d162..76014764e 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 7b1d01389..fc3fcc159 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -404,6 +404,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Bitops test",
+        "Command": "bitops_test",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Hash multiwriter autotest",
         "Command": "hash_multiwriter_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index 351d29cb6..da3ca523e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -12,6 +12,7 @@ test_sources = files('commands.c',
 	'test_alarm.c',
 	'test_atomic.c',
 	'test_barrier.c',
+	'test_bitops.c',
 	'test_bpf.c',
 	'test_byteorder.c',
 	'test_cmdline.c',
@@ -160,6 +161,7 @@ fast_tests = [
         ['acl_autotest', true],
         ['alarm_autotest', false],
         ['atomic_autotest', false],
+        ['bitops_autotest', true],
         ['byteorder_autotest', true],
         ['cmdline_autotest', true],
         ['common_autotest', true],
diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
new file mode 100644
index 000000000..01245b17c
--- /dev/null
+++ b/app/test/test_bitops.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_launch.h>
+#include <rte_bitops.h>
+#include "test.h"
+
+uint32_t val32;
+uint64_t val64;
+
+#define MAX_BITS_32 32
+#define MAX_BITS_64 64
+/*
+ * Bitops functions
+ * ================
+ *
+ * - The main test function performs several subtests.
+ * - Check bit operations on one core.
+ *   - Initialize valXX to specified values, then set each bit of valXX
+ *     to 1 one by one in "test_bitops_set_relaxed".
+ *
+ *   - Clear each bit of valXX to 0 one by one in "test_bitops_clear_relaxed".
+ *
+ *   - Function "test_bitops_test_set_clear_relaxed" checks whether each bit
+ *     of valXX can do "test and set" and "test and clear" correctly.
+ */
+
+static int
+test_bitops_set_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_test_set_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_test_and_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_clear_bit32_relaxed(i, &val32)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_test_and_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_clear_bit64_relaxed(i, &val64)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops(void)
+{
+	val32 = 0;
+	val64 = 0;
+
+	if (test_bitops_set_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_test_set_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(bitops_autotest, test_bitops);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 139+ messages in thread

* [dpdk-dev] [PATCH v8 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (50 preceding siblings ...)
  2020-04-16  5:38 ` [dpdk-dev] [PATCH v8 2/6] test/bitops: add bit operation test case Joyce Kong
@ 2020-04-16  5:38 ` Joyce Kong
  2020-04-16  5:38 ` [dpdk-dev] [PATCH v8 4/6] net/bnx2x: " Joyce Kong
                   ` (16 subsequent siblings)
  68 siblings, 0 replies; 139+ messages in thread
From: Joyce Kong @ 2020-04-16  5:38 UTC (permalink / raw)
  To: thomas, stephen, david.marchand, mb, jerinj, bruce.richardson,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, honnappa.nagarahalli, gavin.hu, phil.yang
  Cc: nd, dev

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_ethdev.h |  2 +-
 drivers/net/axgbe/axgbe_mdio.c   | 15 ++++++++-------
 4 files changed, 17 insertions(+), 43 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index f48117180..d53b48ce6 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -21,6 +21,7 @@
 #include <inttypes.h>
 #include <pthread.h>
 
+#include <rte_bitops.h>
 #include <rte_byteorder.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
@@ -1700,34 +1701,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync