DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD
@ 2021-04-26  3:34 Min Hu (Connor)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
                   ` (7 more replies)
  0 siblings, 8 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-26  3:34 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

This patch set contains one patch for performance optimization,
this is: 'net/hns3: improve IO path data cache usage'.
The others are bugfixes for hns3 PMD.

Chengwen Feng (6):
  net/hns3: delete some unused capabilities
  net/hns3: modify write reg opt API impl
  net/hns3: use RTE DIM instead of ARRAY SIZE
  net/hns3: improve IO path data cache usage
  net/hns3: log fdir configuration
  net/hns3: fix vector Rx burst default value

 drivers/net/hns3/hns3_cmd.c    |  11 +---
 drivers/net/hns3/hns3_cmd.h    |  15 +++--
 drivers/net/hns3/hns3_ethdev.h |  43 +++++++-------
 drivers/net/hns3/hns3_fdir.c   |  13 +++++
 drivers/net/hns3/hns3_flow.c   |  18 +++---
 drivers/net/hns3/hns3_intr.c   |   4 +-
 drivers/net/hns3/hns3_rxtx.h   | 128 ++++++++++++++++++++++++-----------------
 7 files changed, 129 insertions(+), 103 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities
  2021-04-26  3:34 [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
@ 2021-04-26  3:34 ` Min Hu (Connor)
  2021-04-27 13:37   ` Ferruh Yigit
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 2/6] net/hns3: modify write reg opt API impl Min Hu (Connor)
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-26  3:34 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch deletes some unused capabilities, include:
1. Delete some unused firmware capabilities definition.
2. Delete some unused driver capabilities definition.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_cmd.c    |  7 -------
 drivers/net/hns3/hns3_cmd.h    | 15 +++++++--------
 drivers/net/hns3/hns3_ethdev.h | 29 ++++++++++++++---------------
 3 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/drivers/net/hns3/hns3_cmd.c b/drivers/net/hns3/hns3_cmd.c
index 62dfc19..4651d5b 100644
--- a/drivers/net/hns3/hns3_cmd.c
+++ b/drivers/net/hns3/hns3_cmd.c
@@ -423,11 +423,8 @@ hns3_get_caps_name(uint32_t caps_id)
 		enum HNS3_CAPS_BITS caps;
 		const char *name;
 	} dev_caps[] = {
-		{ HNS3_CAPS_UDP_GSO_B,         "udp_gso"         },
-		{ HNS3_CAPS_ATR_B,             "atr"             },
 		{ HNS3_CAPS_FD_QUEUE_REGION_B, "fd_queue_region" },
 		{ HNS3_CAPS_PTP_B,             "ptp"             },
-		{ HNS3_CAPS_INT_QL_B,          "int_ql"          },
 		{ HNS3_CAPS_SIMPLE_BD_B,       "simple_bd"       },
 		{ HNS3_CAPS_TX_PUSH_B,         "tx_push"         },
 		{ HNS3_CAPS_PHY_IMP_B,         "phy_imp"         },
@@ -436,8 +433,6 @@ hns3_get_caps_name(uint32_t caps_id)
 		{ HNS3_CAPS_STASH_B,           "stash"           },
 		{ HNS3_CAPS_UDP_TUNNEL_CSUM_B, "udp_tunnel_csum" },
 		{ HNS3_CAPS_RAS_IMP_B,         "ras_imp"         },
-		{ HNS3_CAPS_FEC_B,             "fec"             },
-		{ HNS3_CAPS_PAUSE_B,           "pause"           },
 		{ HNS3_CAPS_RXD_ADV_LAYOUT_B,  "rxd_adv_layout"  }
 	};
 	uint32_t i;
@@ -484,8 +479,6 @@ hns3_parse_capability(struct hns3_hw *hw,
 {
 	uint32_t caps = rte_le_to_cpu_32(cmd->caps[0]);
 
-	if (hns3_get_bit(caps, HNS3_CAPS_UDP_GSO_B))
-		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_UDP_GSO_B, 1);
 	if (hns3_get_bit(caps, HNS3_CAPS_FD_QUEUE_REGION_B))
 		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B,
 			     1);
diff --git a/drivers/net/hns3/hns3_cmd.h b/drivers/net/hns3/hns3_cmd.h
index bf1772d..984f2a9 100644
--- a/drivers/net/hns3/hns3_cmd.h
+++ b/drivers/net/hns3/hns3_cmd.h
@@ -306,12 +306,13 @@ struct hns3_rx_priv_buff_cmd {
 #define HNS3_FW_VERSION_BYTE0_M		GENMASK(7, 0)
 
 enum HNS3_CAPS_BITS {
-	HNS3_CAPS_UDP_GSO_B,
-	HNS3_CAPS_ATR_B,
-	HNS3_CAPS_FD_QUEUE_REGION_B,
+	/*
+	 * The following capability index definitions must be the same as those
+	 * of the firmware.
+	 */
+	HNS3_CAPS_FD_QUEUE_REGION_B = 2,
 	HNS3_CAPS_PTP_B,
-	HNS3_CAPS_INT_QL_B,
-	HNS3_CAPS_SIMPLE_BD_B,
+	HNS3_CAPS_SIMPLE_BD_B = 5,
 	HNS3_CAPS_TX_PUSH_B,
 	HNS3_CAPS_PHY_IMP_B,
 	HNS3_CAPS_TQP_TXRX_INDEP_B,
@@ -319,9 +320,7 @@ enum HNS3_CAPS_BITS {
 	HNS3_CAPS_STASH_B,
 	HNS3_CAPS_UDP_TUNNEL_CSUM_B,
 	HNS3_CAPS_RAS_IMP_B,
-	HNS3_CAPS_FEC_B,
-	HNS3_CAPS_PAUSE_B,
-	HNS3_CAPS_RXD_ADV_LAYOUT_B,
+	HNS3_CAPS_RXD_ADV_LAYOUT_B = 15,
 };
 
 enum HNS3_API_CAP_BITS {
diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index d27c725..de17878 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -855,17 +855,20 @@ enum {
 
 #define HNS3_DEVARG_DEV_CAPS_MASK	"dev_caps_mask"
 
-#define HNS3_DEV_SUPPORT_DCB_B			0x0
-#define HNS3_DEV_SUPPORT_COPPER_B		0x1
-#define HNS3_DEV_SUPPORT_UDP_GSO_B		0x2
-#define HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B	0x3
-#define HNS3_DEV_SUPPORT_PTP_B			0x4
-#define HNS3_DEV_SUPPORT_TX_PUSH_B		0x5
-#define HNS3_DEV_SUPPORT_INDEP_TXRX_B		0x6
-#define HNS3_DEV_SUPPORT_STASH_B		0x7
-#define HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B	0x9
-#define HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B	0xA
-#define HNS3_DEV_SUPPORT_RAS_IMP_B		0xB
+enum {
+	HNS3_DEV_SUPPORT_DCB_B,
+	HNS3_DEV_SUPPORT_COPPER_B,
+	HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B,
+	HNS3_DEV_SUPPORT_PTP_B,
+	HNS3_DEV_SUPPORT_TX_PUSH_B,
+	HNS3_DEV_SUPPORT_INDEP_TXRX_B,
+	HNS3_DEV_SUPPORT_STASH_B,
+	HNS3_DEV_SUPPORT_SIMPLE_BD_B,
+	HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
+	HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B,
+	HNS3_DEV_SUPPORT_RAS_IMP_B,
+	HNS3_DEV_SUPPORT_TM_B,
+};
 
 #define hns3_dev_dcb_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_DCB_B)
@@ -874,10 +877,6 @@ enum {
 #define hns3_dev_copper_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_COPPER_B)
 
-/* Support UDP GSO offload */
-#define hns3_dev_udp_gso_supported(hw) \
-	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_UDP_GSO_B)
-
 /* Support the queue region action rule of flow directory */
 #define hns3_dev_fd_queue_region_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 2/6] net/hns3: modify write reg opt API impl
  2021-04-26  3:34 [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
@ 2021-04-26  3:34 ` Min Hu (Connor)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE Min Hu (Connor)
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-26  3:34 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch modifies hns3_write_reg_opt() API implementation because
the rte_write32() already uses rte_io_wmb().

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_ethdev.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index de17878..68541df 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -983,13 +983,13 @@ static inline void hns3_write_reg(void *base, uint32_t reg, uint32_t value)
 }
 
 /*
- * The optimized function for writing registers used in the '.rx_pkt_burst' and
- * '.tx_pkt_burst' ops implementation function.
+ * The optimized function for writing registers reduces one address addition
+ * calculation, it was used in the '.rx_pkt_burst' and '.tx_pkt_burst' ops
+ * implementation function.
  */
 static inline void hns3_write_reg_opt(volatile void *addr, uint32_t value)
 {
-	rte_io_wmb();
-	rte_write32_relaxed(rte_cpu_to_le_32(value), addr);
+	rte_write32(rte_cpu_to_le_32(value), addr);
 }
 
 static inline uint32_t hns3_read_reg(void *base, uint32_t reg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE
  2021-04-26  3:34 [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 2/6] net/hns3: modify write reg opt API impl Min Hu (Connor)
@ 2021-04-26  3:34 ` Min Hu (Connor)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 4/6] net/hns3: improve IO path data cache usage Min Hu (Connor)
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-26  3:34 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch uses RTE_DIM() instead of ARRAY_SIZE().

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_cmd.c    |  4 ++--
 drivers/net/hns3/hns3_ethdev.h |  2 --
 drivers/net/hns3/hns3_flow.c   | 18 +++++++++---------
 drivers/net/hns3/hns3_intr.c   |  4 ++--
 4 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/net/hns3/hns3_cmd.c b/drivers/net/hns3/hns3_cmd.c
index 4651d5b..2eec895 100644
--- a/drivers/net/hns3/hns3_cmd.c
+++ b/drivers/net/hns3/hns3_cmd.c
@@ -245,7 +245,7 @@ hns3_is_special_opcode(uint16_t opcode)
 				  HNS3_OPC_QUERY_ALL_ERR_INFO,};
 	uint32_t i;
 
-	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++)
+	for (i = 0; i < RTE_DIM(spec_opcode); i++)
 		if (spec_opcode[i] == opcode)
 			return true;
 
@@ -276,7 +276,7 @@ hns3_cmd_convert_err_code(uint16_t desc_ret)
 
 	uint32_t i;
 
-	for (i = 0; i < ARRAY_SIZE(hns3_cmdq_status); i++)
+	for (i = 0; i < RTE_DIM(hns3_cmdq_status); i++)
 		if (hns3_cmdq_status[i].imp_errcode == desc_ret)
 			return hns3_cmdq_status[i].linux_errcode;
 
diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index 68541df..258e26d 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -1004,8 +1004,6 @@ static inline uint32_t hns3_read_reg(void *base, uint32_t reg)
 #define hns3_read_dev(a, reg) \
 	hns3_read_reg((a)->io_base, (reg))
 
-#define ARRAY_SIZE(x) RTE_DIM(x)
-
 #define NEXT_ITEM_OF_ACTION(act, actions, index)                        \
 	do {								\
 		act = (actions) + (index);				\
diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index 4511a49..49d6568 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1054,37 +1054,37 @@ hns3_parse_normal(const struct rte_flow_item *item, struct hns3_fdir_rule *rule,
 	case RTE_FLOW_ITEM_TYPE_ETH:
 		ret = hns3_parse_eth(item, rule, error);
 		step_mngr->items = L2_next_items;
-		step_mngr->count = ARRAY_SIZE(L2_next_items);
+		step_mngr->count = RTE_DIM(L2_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_VLAN:
 		ret = hns3_parse_vlan(item, rule, error);
 		step_mngr->items = L2_next_items;
-		step_mngr->count = ARRAY_SIZE(L2_next_items);
+		step_mngr->count = RTE_DIM(L2_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_IPV4:
 		ret = hns3_parse_ipv4(item, rule, error);
 		step_mngr->items = L3_next_items;
-		step_mngr->count = ARRAY_SIZE(L3_next_items);
+		step_mngr->count = RTE_DIM(L3_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_IPV6:
 		ret = hns3_parse_ipv6(item, rule, error);
 		step_mngr->items = L3_next_items;
-		step_mngr->count = ARRAY_SIZE(L3_next_items);
+		step_mngr->count = RTE_DIM(L3_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_TCP:
 		ret = hns3_parse_tcp(item, rule, error);
 		step_mngr->items = L4_next_items;
-		step_mngr->count = ARRAY_SIZE(L4_next_items);
+		step_mngr->count = RTE_DIM(L4_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_UDP:
 		ret = hns3_parse_udp(item, rule, error);
 		step_mngr->items = L4_next_items;
-		step_mngr->count = ARRAY_SIZE(L4_next_items);
+		step_mngr->count = RTE_DIM(L4_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_SCTP:
 		ret = hns3_parse_sctp(item, rule, error);
 		step_mngr->items = L4_next_items;
-		step_mngr->count = ARRAY_SIZE(L4_next_items);
+		step_mngr->count = RTE_DIM(L4_next_items);
 		break;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
@@ -1188,7 +1188,7 @@ hns3_parse_fdir_filter(struct rte_eth_dev *dev,
 					  "Fdir not supported in VF");
 
 	step_mngr.items = first_items;
-	step_mngr.count = ARRAY_SIZE(first_items);
+	step_mngr.count = RTE_DIM(first_items);
 	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
 		if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
 			continue;
@@ -1202,7 +1202,7 @@ hns3_parse_fdir_filter(struct rte_eth_dev *dev,
 			if (ret)
 				return ret;
 			step_mngr.items = tunnel_next_items;
-			step_mngr.count = ARRAY_SIZE(tunnel_next_items);
+			step_mngr.count = RTE_DIM(tunnel_next_items);
 		} else {
 			ret = hns3_parse_normal(item, rule, &step_mngr, error);
 			if (ret)
diff --git a/drivers/net/hns3/hns3_intr.c b/drivers/net/hns3/hns3_intr.c
index cc7d7c6..3155d7f 100644
--- a/drivers/net/hns3/hns3_intr.c
+++ b/drivers/net/hns3/hns3_intr.c
@@ -2206,8 +2206,8 @@ hns3_handle_type_reg_error_data(struct hns3_hw *hw,
 	type_id = err_info->type_id & HNS3_ERR_TYPE_MASK;
 	is_ras = err_info->type_id >> HNS3_ERR_TYPE_IS_RAS_OFFSET;
 
-	total_module = ARRAY_SIZE(hns3_hw_module_name);
-	total_type = ARRAY_SIZE(hns3_hw_error_type);
+	total_module = RTE_DIM(hns3_hw_module_name);
+	total_type = RTE_DIM(hns3_hw_error_type);
 
 	hns3_err(hw, "total_module:%u, total_type:%u",
 		 total_module, total_type);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 4/6] net/hns3: improve IO path data cache usage
  2021-04-26  3:34 [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                   ` (2 preceding siblings ...)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE Min Hu (Connor)
@ 2021-04-26  3:34 ` Min Hu (Connor)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration Min Hu (Connor)
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-26  3:34 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch improves data cache usage by:
1. Rearrange the rxq frequency accessed fields in the IO path to the
first 128B.
2. Rearrange the txq frequency accessed fields in the IO path to the
first 64B.
3. Make sure ptype table align cacheline size which is 128B instead of
min cacheline size which is 64B because the L1/L2 is 64B and L3 is
128B on Kunpeng ARM platform.

The performance gains are 1.5% in 64B packet macfwd scenarios.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_ethdev.h |   4 +-
 drivers/net/hns3/hns3_rxtx.h   | 126 ++++++++++++++++++++++++-----------------
 2 files changed, 77 insertions(+), 53 deletions(-)

diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index 258e26d..5f860ee 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -735,7 +735,7 @@ struct hns3_ptype_table {
 	 * descriptor, it functions only when firmware report the capability of
 	 * HNS3_CAPS_RXD_ADV_LAYOUT_B and driver enabled it.
 	 */
-	uint32_t ptype[HNS3_PTYPE_NUM] __rte_cache_min_aligned;
+	uint32_t ptype[HNS3_PTYPE_NUM] __rte_cache_aligned;
 };
 
 #define HNS3_FIXED_MAX_TQP_NUM_MODE		0
@@ -839,7 +839,7 @@ struct hns3_adapter {
 
 	uint64_t dev_caps_mask;
 
-	struct hns3_ptype_table ptype_tbl __rte_cache_min_aligned;
+	struct hns3_ptype_table ptype_tbl __rte_cache_aligned;
 };
 
 enum {
diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index 703c4b7..1e2e994 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -289,22 +289,14 @@ struct hns3_rx_bd_errors_stats {
 };
 
 struct hns3_rx_queue {
-	void *io_base;
 	volatile void *io_head_reg;
-	struct hns3_adapter *hns;
 	struct hns3_ptype_table *ptype_tbl;
 	struct rte_mempool *mb_pool;
 	struct hns3_desc *rx_ring;
-	uint64_t rx_ring_phys_addr; /* RX ring DMA address */
-	const struct rte_memzone *mz;
 	struct hns3_entry *sw_ring;
-	struct rte_mbuf *pkt_first_seg;
-	struct rte_mbuf *pkt_last_seg;
 
-	uint16_t queue_id;
 	uint16_t port_id;
 	uint16_t nb_rx_desc;
-	uint16_t rx_buf_len;
 	/*
 	 * threshold for the number of BDs waited to passed to hardware. If the
 	 * number exceeds the threshold, driver will pass these BDs to hardware.
@@ -318,8 +310,6 @@ struct hns3_rx_queue {
 	/* 4 if DEV_RX_OFFLOAD_KEEP_CRC offload set, 0 otherwise */
 	uint8_t crc_len;
 
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	bool configured;        /* indicate if rx queue has been configured */
 	/*
 	 * Indicate whether ignore the outer VLAN field in the Rx BD reported
 	 * by the Hardware. Because the outer VLAN is the PVID if the PVID is
@@ -331,23 +321,45 @@ struct hns3_rx_queue {
 	 * driver does not need to perform PVID-related operation in Rx. At this
 	 * point, the pvid_sw_discard_en will be false.
 	 */
-	bool pvid_sw_discard_en;
-	bool ptype_en;          /* indicate if the ptype field enabled */
-	bool enabled;           /* indicate if Rx queue has been enabled */
+	uint8_t pvid_sw_discard_en:1;
+	uint8_t ptype_en:1;          /* indicate if the ptype field enabled */
+
+	uint64_t mbuf_initializer; /* value to init mbufs used with vector rx */
+	/* offset_table: used for vector, to solve execute re-order problem */
+	uint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1];
+
+	uint16_t bulk_mbuf_num; /* indicate bulk_mbuf valid nums */
 
 	struct hns3_rx_basic_stats basic_stats;
+
+	struct rte_mbuf *pkt_first_seg;
+	struct rte_mbuf *pkt_last_seg;
+
+	struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM];
+
 	/* DFX statistics that driver does not need to discard packets */
 	struct hns3_rx_dfx_stats dfx_stats;
 	/* Error statistics that driver needs to discard packets */
 	struct hns3_rx_bd_errors_stats err_stats;
 
-	struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM];
-	uint16_t bulk_mbuf_num;
-
-	/* offset_table: used for vector, to solve execute re-order problem */
-	uint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1];
-	uint64_t mbuf_initializer; /* value to init mbufs used with vector rx */
 	struct rte_mbuf fake_mbuf; /* fake mbuf used with vector rx */
+
+
+	/*
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	void *io_base;
+	struct hns3_adapter *hns;
+	uint64_t rx_ring_phys_addr; /* RX ring DMA address */
+	const struct rte_memzone *mz;
+
+	uint16_t queue_id;
+	uint16_t rx_buf_len;
+
+	bool configured;        /* indicate if rx queue has been configured */
+	bool rx_deferred_start; /* don't start this queue in dev start */
+	bool enabled;           /* indicate if Rx queue has been enabled */
 };
 
 struct hns3_tx_basic_stats {
@@ -407,16 +419,10 @@ struct hns3_tx_dfx_stats {
 };
 
 struct hns3_tx_queue {
-	void *io_base;
 	volatile void *io_tail_reg;
-	struct hns3_adapter *hns;
 	struct hns3_desc *tx_ring;
-	uint64_t tx_ring_phys_addr; /* TX ring DMA address */
-	const struct rte_memzone *mz;
 	struct hns3_entry *sw_ring;
 
-	uint16_t queue_id;
-	uint16_t port_id;
 	uint16_t nb_tx_desc;
 	/*
 	 * index of next BD whose corresponding rte_mbuf can be released by
@@ -432,21 +438,12 @@ struct hns3_tx_queue {
 	uint16_t tx_free_thresh;
 
 	/*
-	 * For better performance in tx datapath, releasing mbuf in batches is
-	 * required.
-	 * Only checking the VLD bit of the last descriptor in a batch of the
-	 * thresh descriptors does not mean that these descriptors are all sent
-	 * by hardware successfully. So we need to check that the VLD bits of
-	 * all descriptors are cleared. and then free all mbufs in the batch.
-	 * - tx_rs_thresh
-	 *   Number of mbufs released at a time.
-	 *
-	 * - free
-	 *   Tx mbuf free array used for preserving temporarily address of mbuf
-	 *   released back to mempool, when releasing mbuf in batches.
+	 * The minimum length of the packet supported by hardware in the Tx
+	 * direction.
 	 */
-	uint16_t tx_rs_thresh;
-	struct rte_mbuf **free;
+	uint8_t min_tx_pkt_len;
+
+	uint8_t max_non_tso_bd_num; /* max BD number of one non-TSO packet */
 
 	/*
 	 * tso mode.
@@ -464,7 +461,7 @@ struct hns3_tx_queue {
 	 *     checksum of packets that need TSO, so network driver software
 	 *     not need to recalculate it.
 	 */
-	uint8_t tso_mode;
+	uint16_t tso_mode:1;
 	/*
 	 * udp checksum mode.
 	 * value range:
@@ -480,16 +477,10 @@ struct hns3_tx_queue {
 	 *     In this mode, HW does not have the preceding problems and can
 	 *     directly calculate the checksum of these UDP packets.
 	 */
-	uint8_t udp_cksum_mode;
-	/*
-	 * The minimum length of the packet supported by hardware in the Tx
-	 * direction.
-	 */
-	uint32_t min_tx_pkt_len;
+	uint16_t udp_cksum_mode:1;
 
-	uint8_t max_non_tso_bd_num; /* max BD number of one non-TSO packet */
-	bool tx_deferred_start; /* don't start this queue in dev start */
-	bool configured;        /* indicate if tx queue has been configured */
+	uint16_t simple_bd_enable:1;
+	uint16_t tx_push_enable:1;    /* check whether the tx push is enabled */
 	/*
 	 * Indicate whether add the vlan_tci of the mbuf to the inner VLAN field
 	 * of Tx BD. Because the outer VLAN will always be the PVID when the
@@ -502,11 +493,44 @@ struct hns3_tx_queue {
 	 * PVID-related operations in Tx. And pvid_sw_shift_en will be false at
 	 * this point.
 	 */
-	bool pvid_sw_shift_en;
-	bool enabled;           /* indicate if Tx queue has been enabled */
+	uint16_t pvid_sw_shift_en:1;
+
+	/*
+	 * For better performance in tx datapath, releasing mbuf in batches is
+	 * required.
+	 * Only checking the VLD bit of the last descriptor in a batch of the
+	 * thresh descriptors does not mean that these descriptors are all sent
+	 * by hardware successfully. So we need to check that the VLD bits of
+	 * all descriptors are cleared. and then free all mbufs in the batch.
+	 * - tx_rs_thresh
+	 *   Number of mbufs released at a time.
+	 *
+	 * - free
+	 *   Tx mbuf free array used for preserving temporarily address of mbuf
+	 *   released back to mempool, when releasing mbuf in batches.
+	 */
+	uint16_t tx_rs_thresh;
+	struct rte_mbuf **free;
 
 	struct hns3_tx_basic_stats basic_stats;
 	struct hns3_tx_dfx_stats dfx_stats;
+
+
+	/*
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	void *io_base;
+	struct hns3_adapter *hns;
+	uint64_t tx_ring_phys_addr; /* TX ring DMA address */
+	const struct rte_memzone *mz;
+
+	uint16_t port_id;
+	uint16_t queue_id;
+
+	bool configured;        /* indicate if tx queue has been configured */
+	bool tx_deferred_start; /* don't start this queue in dev start */
+	bool enabled;           /* indicate if Tx queue has been enabled */
 };
 
 #define HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration
  2021-04-26  3:34 [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                   ` (3 preceding siblings ...)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 4/6] net/hns3: improve IO path data cache usage Min Hu (Connor)
@ 2021-04-26  3:34 ` Min Hu (Connor)
  2021-04-27 13:39   ` Ferruh Yigit
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 6/6] net/hns3: fix vector Rx burst default value Min Hu (Connor)
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-26  3:34 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

The rte flow interface does not support the API of the capability
set. Therefore, fdir configuration logs are added to facilitate
debugging.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_fdir.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/hns3/hns3_fdir.c b/drivers/net/hns3/hns3_fdir.c
index 87c1aef..8b14219 100644
--- a/drivers/net/hns3/hns3_fdir.c
+++ b/drivers/net/hns3/hns3_fdir.c
@@ -336,6 +336,8 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 	    BIT(INNER_IP_PROTO) | BIT(INNER_IP_TOS) |
 	    BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
 	    BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
+	hns3_info(hw, "fdir tuple: inner<vlan_tag1 eth_type ip_src ip_dst "
+		  "ip_proto ip_tos l4_src_port l4_dst_port>");
 
 	/* If use max 400bit key, we can support tuples for ether type */
 	if (pf->fdir.fd_cfg.max_key_length == MAX_KEY_LENGTH) {
@@ -345,6 +347,9 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 		    BIT(OUTER_DST_PORT) | BIT(INNER_VLAN_TAG2) |
 		    BIT(OUTER_TUN_VNI) | BIT(OUTER_TUN_FLOW_ID) |
 		    BIT(OUTER_ETH_TYPE) | BIT(OUTER_IP_PROTO);
+		hns3_info(hw, "fdir tuple more: inner<dst_mac src_mac "
+			  "vlan_tag2 sctp_tag> outer<eth_type ip_proto "
+			  "l4_src_port l4_dst_port tun_vni tun_flow_id>");
 	}
 
 	/* roce_type is used to filter roce frames
@@ -352,6 +357,7 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 	 */
 	key_cfg->meta_data_active = BIT(DST_VPORT) | BIT(TUNNEL_PACKET) |
 	    BIT(VLAN_NUMBER);
+	hns3_info(hw, "fdir meta data: dst_vport tunnel_packet vlan_number");
 
 	ret = hns3_get_fd_allocation(hw,
 				     &pf->fdir.fd_cfg.rule_num[HNS3_FD_STAGE_1],
@@ -361,6 +367,13 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 	if (ret)
 		return ret;
 
+	hns3_info(hw, "fdir: stage1<rules-%u counters-%u> stage2<rules-%u "
+		  "counters=%u>",
+		  pf->fdir.fd_cfg.rule_num[HNS3_FD_STAGE_1],
+		  pf->fdir.fd_cfg.cnt_num[HNS3_FD_STAGE_1],
+		  pf->fdir.fd_cfg.rule_num[HNS3_FD_STAGE_2],
+		  pf->fdir.fd_cfg.cnt_num[HNS3_FD_STAGE_2]);
+
 	return hns3_set_fd_key_config(hns);
 }
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 6/6] net/hns3: fix vector Rx burst default value
  2021-04-26  3:34 [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                   ` (4 preceding siblings ...)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration Min Hu (Connor)
@ 2021-04-26  3:34 ` Min Hu (Connor)
  2021-04-27 13:46   ` Ferruh Yigit
  2021-04-28  9:53 ` [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
  2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
  7 siblings, 1 reply; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-26  3:34 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

Currently, driver uses the macro HNS3_DEFAULT_RX_BURST whose value is
32 to limit the vector Rx burst size, as a result, the burst size user
configure can't exceed 32.

This patch fixes this problem by defining the macro
HNS3_DEFAULT_RX_BURST as 64.

Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_rxtx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index 1e2e994..ba24e00 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -20,7 +20,7 @@
 #define HNS3_DEFAULT_TX_RS_THRESH	32
 #define HNS3_TX_FAST_FREE_AHEAD		64
 
-#define HNS3_DEFAULT_RX_BURST		32
+#define HNS3_DEFAULT_RX_BURST		64
 #if (HNS3_DEFAULT_RX_BURST > 64)
 #error "PMD HNS3: HNS3_DEFAULT_RX_BURST must <= 64\n"
 #endif
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
@ 2021-04-27 13:37   ` Ferruh Yigit
  2021-04-27 14:26     ` Fengchengwen
  2021-04-27 14:30     ` Ferruh Yigit
  0 siblings, 2 replies; 31+ messages in thread
From: Ferruh Yigit @ 2021-04-27 13:37 UTC (permalink / raw)
  To: Min Hu (Connor), dev

On 4/26/2021 4:34 AM, Min Hu (Connor) wrote:
> From: Chengwen Feng <fengchengwen@huawei.com>
> 
> This patch deletes some unused capabilities, include:
> 1. Delete some unused firmware capabilities definition.
> 2. Delete some unused driver capabilities definition.
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>

<...>

> diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
> index d27c725..de17878 100644
> --- a/drivers/net/hns3/hns3_ethdev.h
> +++ b/drivers/net/hns3/hns3_ethdev.h
> @@ -855,17 +855,20 @@ enum {
>  
>  #define HNS3_DEVARG_DEV_CAPS_MASK	"dev_caps_mask"
>  
> -#define HNS3_DEV_SUPPORT_DCB_B			0x0
> -#define HNS3_DEV_SUPPORT_COPPER_B		0x1
> -#define HNS3_DEV_SUPPORT_UDP_GSO_B		0x2
> -#define HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B	0x3
> -#define HNS3_DEV_SUPPORT_PTP_B			0x4
> -#define HNS3_DEV_SUPPORT_TX_PUSH_B		0x5
> -#define HNS3_DEV_SUPPORT_INDEP_TXRX_B		0x6
> -#define HNS3_DEV_SUPPORT_STASH_B		0x7
> -#define HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B	0x9
> -#define HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B	0xA
> -#define HNS3_DEV_SUPPORT_RAS_IMP_B		0xB
> +enum {
> +	HNS3_DEV_SUPPORT_DCB_B,
> +	HNS3_DEV_SUPPORT_COPPER_B,
> +	HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B,
> +	HNS3_DEV_SUPPORT_PTP_B,
> +	HNS3_DEV_SUPPORT_TX_PUSH_B,
> +	HNS3_DEV_SUPPORT_INDEP_TXRX_B,
> +	HNS3_DEV_SUPPORT_STASH_B,
> +	HNS3_DEV_SUPPORT_SIMPLE_BD_B,
> +	HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
> +	HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B,
> +	HNS3_DEV_SUPPORT_RAS_IMP_B,
> +	HNS3_DEV_SUPPORT_TM_B,
> +};

This cause some values to be changed, because of added/removed items, is this
expected?
If so can you please mention from this value change in the commit log, to clarify.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration Min Hu (Connor)
@ 2021-04-27 13:39   ` Ferruh Yigit
  2021-04-27 14:15     ` Fengchengwen
  2021-04-27 14:25     ` Ferruh Yigit
  0 siblings, 2 replies; 31+ messages in thread
From: Ferruh Yigit @ 2021-04-27 13:39 UTC (permalink / raw)
  To: Min Hu (Connor), dev

On 4/26/2021 4:34 AM, Min Hu (Connor) wrote:
> From: Chengwen Feng <fengchengwen@huawei.com>
> 
> The rte flow interface does not support the API of the capability
> set. Therefore, fdir configuration logs are added to facilitate
> debugging.
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> ---
>  drivers/net/hns3/hns3_fdir.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/drivers/net/hns3/hns3_fdir.c b/drivers/net/hns3/hns3_fdir.c
> index 87c1aef..8b14219 100644
> --- a/drivers/net/hns3/hns3_fdir.c
> +++ b/drivers/net/hns3/hns3_fdir.c
> @@ -336,6 +336,8 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
>  	    BIT(INNER_IP_PROTO) | BIT(INNER_IP_TOS) |
>  	    BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
>  	    BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
> +	hns3_info(hw, "fdir tuple: inner<vlan_tag1 eth_type ip_src ip_dst "
> +		  "ip_proto ip_tos l4_src_port l4_dst_port>");


If the messages are for debugging, what do you think about add them as
'hns3_dbg'? Isn't info level too verbose for debugging?


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH 6/6] net/hns3: fix vector Rx burst default value
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 6/6] net/hns3: fix vector Rx burst default value Min Hu (Connor)
@ 2021-04-27 13:46   ` Ferruh Yigit
  2021-04-27 14:34     ` Fengchengwen
  0 siblings, 1 reply; 31+ messages in thread
From: Ferruh Yigit @ 2021-04-27 13:46 UTC (permalink / raw)
  To: Min Hu (Connor), dev

On 4/26/2021 4:34 AM, Min Hu (Connor) wrote:
> From: Chengwen Feng <fengchengwen@huawei.com>
> 
> Currently, driver uses the macro HNS3_DEFAULT_RX_BURST whose value is
> 32 to limit the vector Rx burst size, as a result, the burst size user
> configure can't exceed 32.
> 
> This patch fixes this problem by defining the macro
> HNS3_DEFAULT_RX_BURST as 64.
> 
> Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> ---
>  drivers/net/hns3/hns3_rxtx.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
> index 1e2e994..ba24e00 100644
> --- a/drivers/net/hns3/hns3_rxtx.h
> +++ b/drivers/net/hns3/hns3_rxtx.h
> @@ -20,7 +20,7 @@
>  #define HNS3_DEFAULT_TX_RS_THRESH	32
>  #define HNS3_TX_FAST_FREE_AHEAD		64
>  
> -#define HNS3_DEFAULT_RX_BURST		32
> +#define HNS3_DEFAULT_RX_BURST		64

It seems in the vector Rx path, the number of receive packets are limited
silently to 'HNS3_DEFAULT_RX_BURST' independent from what user requested/configured.

There can be driver/HW limitations to 'HNS3_DEFAULT_RX_BURST' that is OK, but
instead of limiting user request, what do you think about Rx in the
'HNS3_DEFAULT_RX_BURST' blocks until user request justified?

I mean something like following implementation:
https://git.dpdk.org/dpdk/commit/?id=d96394ea263c

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration
  2021-04-27 13:39   ` Ferruh Yigit
@ 2021-04-27 14:15     ` Fengchengwen
  2021-04-27 14:25     ` Ferruh Yigit
  1 sibling, 0 replies; 31+ messages in thread
From: Fengchengwen @ 2021-04-27 14:15 UTC (permalink / raw)
  To: Ferruh Yigit, humin (Q), dev






From:Ferruh Yigit <ferruh.yigit@intel.com>
To:humin (Q) <humin29@huawei.com>;dev <dev@dpdk.org>
Date:2021-04-27 21:39:45
Subject:Re: [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration

On 4/26/2021 4:34 AM, Min Hu (Connor) wrote:
> From: Chengwen Feng < fengchengwen@huawei.com<mailto:fengchengwen@huawei.com>>
>
> The rte flow interface does not support the API of the capability
> set. Therefore, fdir configuration logs are added to facilitate
> debugging.
>
> Signed-off-by: Chengwen Feng < fengchengwen@huawei.com<mailto:fengchengwen@huawei.com>>
> Signed-off-by: Min Hu (Connor) < humin29@huawei.com<mailto:humin29@huawei.com>>
> ---
> drivers/net/hns3/hns3_fdir.c | 13 +++++++++++++
> 1 file changed, 13 insertions(+)
>
> diff --git a/drivers/net/hns3/hns3_fdir.c b/drivers/net/hns3/hns3_fdir.c
> index 87c1aef..8b14219 100644
> --- a/drivers/net/hns3/hns3_fdir.c
> +++ b/drivers/net/hns3/hns3_fdir.c
> @@ -336,6 +336,8 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
>            BIT(INNER_IP_PROTO) | BIT(INNER_IP_TOS) |
>            BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
>            BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
> +     hns3_info(hw, "fdir tuple: inner<vlan_tag1 eth_type ip_src ip_dst "
> +               "ip_proto ip_tos l4_src_port l4_dst_port>");


If the messages are for debugging, what do you think about add them as
'hns3_dbg'? Isn't info level too verbose for debugging?

yes, it was used for debugging and log analyzing, we use hns3_info because the default level may not debug in product scenario.
we use verbose readable log because it's useful for reading.

thanks

BTW: sorry for this reply format because using phone.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration
  2021-04-27 13:39   ` Ferruh Yigit
  2021-04-27 14:15     ` Fengchengwen
@ 2021-04-27 14:25     ` Ferruh Yigit
  2021-04-27 14:29       ` Fengchengwen
  1 sibling, 1 reply; 31+ messages in thread
From: Ferruh Yigit @ 2021-04-27 14:25 UTC (permalink / raw)
  To: Fengchengwen, humin (Q), dev

On 4/27/2021 3:15 PM, Fengchengwen wrote:
> 
> 
> 
> 
> 
> *From:*Ferruh Yigit <ferruh.yigit@intel.com>
> *To:*humin (Q) <humin29@huawei.com>;dev <dev@dpdk.org>
> *Date:*2021-04-27 21:39:45
> *Subject:*Re: [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration
> 
> On 4/26/2021 4:34 AM, Min Hu (Connor) wrote:
>> From: Chengwen Feng < fengchengwen@huawei.com <mailto:fengchengwen@huawei.com>>
>> 
>> The rte flow interface does not support the API of the capability 
>> set. Therefore, fdir configuration logs are added to facilitate 
>> debugging. 
>> 
>> Signed-off-by: Chengwen Feng < fengchengwen@huawei.com <mailto:fengchengwen@huawei.com>>
>> Signed-off-by: Min Hu (Connor) < humin29@huawei.com <mailto:humin29@huawei.com>>
>> --- 
>> drivers/net/hns3/hns3_fdir.c | 13 +++++++++++++ 
>> 1 file changed, 13 insertions(+) 
>> 
>> diff --git a/drivers/net/hns3/hns3_fdir.c b/drivers/net/hns3/hns3_fdir.c 
>> index 87c1aef..8b14219 100644 
>> --- a/drivers/net/hns3/hns3_fdir.c 
>> +++ b/drivers/net/hns3/hns3_fdir.c 
>> @@ -336,6 +336,8 @@ int hns3_init_fd_config(struct hns3_adapter *hns) 
>>            BIT(INNER_IP_PROTO) | BIT(INNER_IP_TOS) | 
>>            BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) | 
>>            BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT); 
>> +     hns3_info(hw, "fdir tuple: inner<vlan_tag1 eth_type ip_src ip_dst " 
>> +               "ip_proto ip_tos l4_src_port l4_dst_port>"); 
> 
> 
> If the messages are for debugging, what do you think about add them as
> 'hns3_dbg'? Isn't info level too verbose for debugging?
> 
> yes, it was used for debugging and log analyzing, we use hns3_info because the
> default level may not debug in product scenario.
> we use verbose readable log because it's useful for reading.
> 

It is possible to dynamically change the log level and get debug logs when
needed, I think this is better than enable it by default.

> thanks
> 
> BTW: sorry for this reply format because using phone.
> 


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities
  2021-04-27 13:37   ` Ferruh Yigit
@ 2021-04-27 14:26     ` Fengchengwen
  2021-04-27 14:30     ` Ferruh Yigit
  1 sibling, 0 replies; 31+ messages in thread
From: Fengchengwen @ 2021-04-27 14:26 UTC (permalink / raw)
  To: Ferruh Yigit, humin (Q), dev



From:Ferruh Yigit <ferruh.yigit@intel.com>
To:humin (Q) <humin29@huawei.com>;dev <dev@dpdk.org>
Date:2021-04-27 21:37:59
Subject:Re: [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities

On 4/26/2021 4:34 AM, Min Hu (Connor) wrote:
> From: Chengwen Feng < fengchengwen@huawei.com<mailto:fengchengwen@huawei.com>>
>
> This patch deletes some unused capabilities, include:
> 1. Delete some unused firmware capabilities definition.
> 2. Delete some unused driver capabilities definition.
>
> Signed-off-by: Chengwen Feng < fengchengwen@huawei.com<mailto:fengchengwen@huawei.com>>
> Signed-off-by: Min Hu (Connor) < humin29@huawei.com<mailto:humin29@huawei.com>>

<...>

> diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
> index d27c725..de17878 100644
> --- a/drivers/net/hns3/hns3_ethdev.h
> +++ b/drivers/net/hns3/hns3_ethdev.h
> @@ -855,17 +855,20 @@ enum {
>
> #define HNS3_DEVARG_DEV_CAPS_MASK    "dev_caps_mask"
>
> -#define HNS3_DEV_SUPPORT_DCB_B                       0x0
> -#define HNS3_DEV_SUPPORT_COPPER_B            0x1
> -#define HNS3_DEV_SUPPORT_UDP_GSO_B           0x2
> -#define HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B   0x3
> -#define HNS3_DEV_SUPPORT_PTP_B                       0x4
> -#define HNS3_DEV_SUPPORT_TX_PUSH_B           0x5
> -#define HNS3_DEV_SUPPORT_INDEP_TXRX_B                0x6
> -#define HNS3_DEV_SUPPORT_STASH_B             0x7
> -#define HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B    0x9
> -#define HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B   0xA
> -#define HNS3_DEV_SUPPORT_RAS_IMP_B           0xB
> +enum {
> +     HNS3_DEV_SUPPORT_DCB_B,
> +     HNS3_DEV_SUPPORT_COPPER_B,
> +     HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B,
> +     HNS3_DEV_SUPPORT_PTP_B,
> +     HNS3_DEV_SUPPORT_TX_PUSH_B,
> +     HNS3_DEV_SUPPORT_INDEP_TXRX_B,
> +     HNS3_DEV_SUPPORT_STASH_B,
> +     HNS3_DEV_SUPPORT_SIMPLE_BD_B,
> +     HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
> +     HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B,
> +     HNS3_DEV_SUPPORT_RAS_IMP_B,
> +     HNS3_DEV_SUPPORT_TM_B,
> +};

This cause some values to be changed, because of added/removed items, is this
expected?
If so can you please mention from this value change in the commit log, to clarify.

these values are used only in driver, so the order could adjust.
the commit log memtion it brifely, which is:

2. Delete some unused driver capabilities definition

thanks



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration
  2021-04-27 14:25     ` Ferruh Yigit
@ 2021-04-27 14:29       ` Fengchengwen
  0 siblings, 0 replies; 31+ messages in thread
From: Fengchengwen @ 2021-04-27 14:29 UTC (permalink / raw)
  To: Ferruh Yigit, humin (Q), dev



From:Ferruh Yigit <ferruh.yigit@intel.com>
To:Fengchengwen <fengchengwen@huawei.com>;humin (Q) <humin29@huawei.com>;dev <dev@dpdk.org>
Date:2021-04-27 22:26:09
Subject:Re: [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration

On 4/27/2021 3:15 PM, Fengchengwen wrote:
>
>
>
>
>
> *From:*Ferruh Yigit < ferruh.yigit@intel.com<mailto:ferruh.yigit@intel.com>>
> *To:*humin (Q) < humin29@huawei.com<mailto:humin29@huawei.com>>;dev < dev@dpdk.org<mailto:dev@dpdk.org>>
> *Date:*2021-04-27 21:39:45
> *Subject:*Re: [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration
>
> On 4/26/2021 4:34 AM, Min Hu (Connor) wrote:
>> From: Chengwen Feng < fengchengwen@huawei.com<mailto:fengchengwen@huawei.com> < mailto:fengchengwen@huawei.com>>
>>
>> The rte flow interface does not support the API of the capability
>> set. Therefore, fdir configuration logs are added to facilitate
>> debugging.
>>
>> Signed-off-by: Chengwen Feng < fengchengwen@huawei.com<mailto:fengchengwen@huawei.com> < mailto:fengchengwen@huawei.com>>
>> Signed-off-by: Min Hu (Connor) < humin29@huawei.com<mailto:humin29@huawei.com> < mailto:humin29@huawei.com>>
>> ---
>> drivers/net/hns3/hns3_fdir.c | 13 +++++++++++++
>> 1 file changed, 13 insertions(+)
>>
>> diff --git a/drivers/net/hns3/hns3_fdir.c b/drivers/net/hns3/hns3_fdir.c
>> index 87c1aef..8b14219 100644
>> --- a/drivers/net/hns3/hns3_fdir.c
>> +++ b/drivers/net/hns3/hns3_fdir.c
>> @@ -336,6 +336,8 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
>>            BIT(INNER_IP_PROTO) | BIT(INNER_IP_TOS) |
>>            BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
>>            BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
>> +     hns3_info(hw, "fdir tuple: inner<vlan_tag1 eth_type ip_src ip_dst "
>> +               "ip_proto ip_tos l4_src_port l4_dst_port>");
>
>
> If the messages are for debugging, what do you think about add them as
> 'hns3_dbg'? Isn't info level too verbose for debugging?
>
> yes, it was used for debugging and log analyzing, we use hns3_info because the
> default level may not debug in product scenario.
> we use verbose readable log because it's useful for reading.
>

It is possible to dynamically change the log level and get debug logs when
needed, I think this is better than enable it by default.

OK, will fix in v2, thanks

> thanks
>
> BTW: sorry for this reply format because using phone.
>



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities
  2021-04-27 13:37   ` Ferruh Yigit
  2021-04-27 14:26     ` Fengchengwen
@ 2021-04-27 14:30     ` Ferruh Yigit
  1 sibling, 0 replies; 31+ messages in thread
From: Ferruh Yigit @ 2021-04-27 14:30 UTC (permalink / raw)
  To: Fengchengwen, humin (Q), dev

On 4/27/2021 3:26 PM, Fengchengwen wrote:
> 
> 
> *From:*Ferruh Yigit <ferruh.yigit@intel.com>
> *To:*humin (Q) <humin29@huawei.com>;dev <dev@dpdk.org>
> *Date:*2021-04-27 21:37:59
> *Subject:*Re: [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities
> 
> On 4/26/2021 4:34 AM, Min Hu (Connor) wrote:
>> From: Chengwen Feng < fengchengwen@huawei.com <mailto:fengchengwen@huawei.com>>
>> 
>> This patch deletes some unused capabilities, include: 
>> 1. Delete some unused firmware capabilities definition. 
>> 2. Delete some unused driver capabilities definition. 
>> 
>> Signed-off-by: Chengwen Feng < fengchengwen@huawei.com <mailto:fengchengwen@huawei.com>>
>> Signed-off-by: Min Hu (Connor) < humin29@huawei.com <mailto:humin29@huawei.com>>
> 
> <...>
> 
>> diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h 
>> index d27c725..de17878 100644 
>> --- a/drivers/net/hns3/hns3_ethdev.h 
>> +++ b/drivers/net/hns3/hns3_ethdev.h 
>> @@ -855,17 +855,20 @@ enum { 
>> 
>> #define HNS3_DEVARG_DEV_CAPS_MASK    "dev_caps_mask" 
>> 
>> -#define HNS3_DEV_SUPPORT_DCB_B                       0x0 
>> -#define HNS3_DEV_SUPPORT_COPPER_B            0x1 
>> -#define HNS3_DEV_SUPPORT_UDP_GSO_B           0x2 
>> -#define HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B   0x3 
>> -#define HNS3_DEV_SUPPORT_PTP_B                       0x4 
>> -#define HNS3_DEV_SUPPORT_TX_PUSH_B           0x5 
>> -#define HNS3_DEV_SUPPORT_INDEP_TXRX_B                0x6 
>> -#define HNS3_DEV_SUPPORT_STASH_B             0x7 
>> -#define HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B    0x9 
>> -#define HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B   0xA 
>> -#define HNS3_DEV_SUPPORT_RAS_IMP_B           0xB 
>> +enum { 
>> +     HNS3_DEV_SUPPORT_DCB_B, 
>> +     HNS3_DEV_SUPPORT_COPPER_B, 
>> +     HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B, 
>> +     HNS3_DEV_SUPPORT_PTP_B, 
>> +     HNS3_DEV_SUPPORT_TX_PUSH_B, 
>> +     HNS3_DEV_SUPPORT_INDEP_TXRX_B, 
>> +     HNS3_DEV_SUPPORT_STASH_B, 
>> +     HNS3_DEV_SUPPORT_SIMPLE_BD_B, 
>> +     HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, 
>> +     HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B, 
>> +     HNS3_DEV_SUPPORT_RAS_IMP_B, 
>> +     HNS3_DEV_SUPPORT_TM_B, 
>> +}; 
> 
> This cause some values to be changed, because of added/removed items, is this
> expected?
> If so can you please mention from this value change in the commit log, to clarify.
> 
> these values are used only in driver, so the order could adjust.
> the commit log memtion it brifely, which is:
> 
> 2. Delete some unused driver capabilities definition
> 

I am not talking about deletion of unused macros,

Please mention that value of some of the macros have been changed and this is
known and won't cause a problem.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH 6/6] net/hns3: fix vector Rx burst default value
  2021-04-27 13:46   ` Ferruh Yigit
@ 2021-04-27 14:34     ` Fengchengwen
  0 siblings, 0 replies; 31+ messages in thread
From: Fengchengwen @ 2021-04-27 14:34 UTC (permalink / raw)
  To: Ferruh Yigit, humin (Q), dev



From:Ferruh Yigit <ferruh.yigit@intel.com>
To:humin (Q) <humin29@huawei.com>;dev <dev@dpdk.org>
Date:2021-04-27 21:47:08
Subject:Re: [dpdk-dev] [PATCH 6/6] net/hns3: fix vector Rx burst default value

On 4/26/2021 4:34 AM, Min Hu (Connor) wrote:
> From: Chengwen Feng < fengchengwen@huawei.com<mailto:fengchengwen@huawei.com>>
>
> Currently, driver uses the macro HNS3_DEFAULT_RX_BURST whose value is
> 32 to limit the vector Rx burst size, as a result, the burst size user
> configure can't exceed 32.
>
> This patch fixes this problem by defining the macro
> HNS3_DEFAULT_RX_BURST as 64.
>
> Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")
> Cc: stable@dpdk.org<mailto:stable@dpdk.org>
>
> Signed-off-by: Chengwen Feng < fengchengwen@huawei.com<mailto:fengchengwen@huawei.com>>
> Signed-off-by: Min Hu (Connor) < humin29@huawei.com<mailto:humin29@huawei.com>>
> ---
> drivers/net/hns3/hns3_rxtx.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
> index 1e2e994..ba24e00 100644
> --- a/drivers/net/hns3/hns3_rxtx.h
> +++ b/drivers/net/hns3/hns3_rxtx.h
> @@ -20,7 +20,7 @@
> #define HNS3_DEFAULT_TX_RS_THRESH    32
> #define HNS3_TX_FAST_FREE_AHEAD              64
>
> -#define HNS3_DEFAULT_RX_BURST                32
> +#define HNS3_DEFAULT_RX_BURST                64

It seems in the vector Rx path, the number of receive packets are limited
silently to 'HNS3_DEFAULT_RX_BURST' independent from what user requested/configured.

There can be driver/HW limitations to 'HNS3_DEFAULT_RX_BURST' that is OK, but
instead of limiting user request, what do you think about Rx in the
'HNS3_DEFAULT_RX_BURST' blocks until user request justified?

I mean something like following implementation:
https://git.dpdk.org/dpdk/commit/?id=d96394ea263c


sound good, will fix in v2, thanks


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD
  2021-04-26  3:34 [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                   ` (5 preceding siblings ...)
  2021-04-26  3:34 ` [dpdk-dev] [PATCH 6/6] net/hns3: fix vector Rx burst default value Min Hu (Connor)
@ 2021-04-28  9:53 ` Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
                     ` (5 more replies)
  2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
  7 siblings, 6 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-28  9:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

This patch set contains one patch for performance optimization,
this is: 'net/hns3: improve IO path data cache usage'.
The others are bugfixes for hns3 PMD.

Chengwen Feng (6):
  net/hns3: delete some unused capabilities
  net/hns3: modify write reg opt API impl
  net/hns3: use RTE DIM instead of ARRAY SIZE
  net/hns3: improve IO path data cache usage
  net/hns3: log fdir configuration
  net/hns3: fix vector Rx burst can't exceed 32
---
v2:
* fixed commit info.
* changed hns3_info to hns3_dbg.
* deleted tx push, simple bd capa.

 drivers/net/hns3/hns3_cmd.c          |  15 +---
 drivers/net/hns3/hns3_cmd.h          |  17 ++---
 drivers/net/hns3/hns3_ethdev.h       |  43 +++++-------
 drivers/net/hns3/hns3_fdir.c         |  13 ++++
 drivers/net/hns3/hns3_flow.c         |  18 ++---
 drivers/net/hns3/hns3_intr.c         |   4 +-
 drivers/net/hns3/hns3_rxtx.h         | 128 +++++++++++++++++++++--------------
 drivers/net/hns3/hns3_rxtx_vec.c     |  41 ++++++++---
 drivers/net/hns3/hns3_rxtx_vec_sve.c |  37 ++++++++--
 9 files changed, 190 insertions(+), 126 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 1/6] net/hns3: delete some unused capabilities
  2021-04-28  9:53 ` [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
@ 2021-04-28  9:53   ` Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 2/6] net/hns3: modify write reg opt API impl Min Hu (Connor)
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-28  9:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch deletes some unused capabilities, include:
1. Delete some unused firmware capabilities definition, which are:
UDP_GSO, ATR, INT_QL, SIMPLE_BD, TX_PUSH, FEC and PAUSE.
2. Delete some unused driver capabilities definition, which are:
UDP_GSO, TX_PUSH.
3. Also redefine HNS3_DEV_SUPPORT_*  as enum type, and change some of
the values. Note: the HNS3_DEV_SUPPORT_* values is used only inside
the driver, so it's safe to change the values.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_cmd.c    | 11 -----------
 drivers/net/hns3/hns3_cmd.h    | 17 +++++++----------
 drivers/net/hns3/hns3_ethdev.h | 29 +++++++++++------------------
 3 files changed, 18 insertions(+), 39 deletions(-)

diff --git a/drivers/net/hns3/hns3_cmd.c b/drivers/net/hns3/hns3_cmd.c
index 62dfc19..6331380 100644
--- a/drivers/net/hns3/hns3_cmd.c
+++ b/drivers/net/hns3/hns3_cmd.c
@@ -423,21 +423,14 @@ hns3_get_caps_name(uint32_t caps_id)
 		enum HNS3_CAPS_BITS caps;
 		const char *name;
 	} dev_caps[] = {
-		{ HNS3_CAPS_UDP_GSO_B,         "udp_gso"         },
-		{ HNS3_CAPS_ATR_B,             "atr"             },
 		{ HNS3_CAPS_FD_QUEUE_REGION_B, "fd_queue_region" },
 		{ HNS3_CAPS_PTP_B,             "ptp"             },
-		{ HNS3_CAPS_INT_QL_B,          "int_ql"          },
-		{ HNS3_CAPS_SIMPLE_BD_B,       "simple_bd"       },
-		{ HNS3_CAPS_TX_PUSH_B,         "tx_push"         },
 		{ HNS3_CAPS_PHY_IMP_B,         "phy_imp"         },
 		{ HNS3_CAPS_TQP_TXRX_INDEP_B,  "tqp_txrx_indep"  },
 		{ HNS3_CAPS_HW_PAD_B,          "hw_pad"          },
 		{ HNS3_CAPS_STASH_B,           "stash"           },
 		{ HNS3_CAPS_UDP_TUNNEL_CSUM_B, "udp_tunnel_csum" },
 		{ HNS3_CAPS_RAS_IMP_B,         "ras_imp"         },
-		{ HNS3_CAPS_FEC_B,             "fec"             },
-		{ HNS3_CAPS_PAUSE_B,           "pause"           },
 		{ HNS3_CAPS_RXD_ADV_LAYOUT_B,  "rxd_adv_layout"  }
 	};
 	uint32_t i;
@@ -484,8 +477,6 @@ hns3_parse_capability(struct hns3_hw *hw,
 {
 	uint32_t caps = rte_le_to_cpu_32(cmd->caps[0]);
 
-	if (hns3_get_bit(caps, HNS3_CAPS_UDP_GSO_B))
-		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_UDP_GSO_B, 1);
 	if (hns3_get_bit(caps, HNS3_CAPS_FD_QUEUE_REGION_B))
 		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B,
 			     1);
@@ -502,8 +493,6 @@ hns3_parse_capability(struct hns3_hw *hw,
 			hns3_warn(hw, "ignore PTP capability due to lack of "
 				  "rxd advanced layout capability.");
 	}
-	if (hns3_get_bit(caps, HNS3_CAPS_TX_PUSH_B))
-		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_TX_PUSH_B, 1);
 	if (hns3_get_bit(caps, HNS3_CAPS_PHY_IMP_B))
 		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_COPPER_B, 1);
 	if (hns3_get_bit(caps, HNS3_CAPS_TQP_TXRX_INDEP_B))
diff --git a/drivers/net/hns3/hns3_cmd.h b/drivers/net/hns3/hns3_cmd.h
index bf1772d..944e3d6 100644
--- a/drivers/net/hns3/hns3_cmd.h
+++ b/drivers/net/hns3/hns3_cmd.h
@@ -306,22 +306,19 @@ struct hns3_rx_priv_buff_cmd {
 #define HNS3_FW_VERSION_BYTE0_M		GENMASK(7, 0)
 
 enum HNS3_CAPS_BITS {
-	HNS3_CAPS_UDP_GSO_B,
-	HNS3_CAPS_ATR_B,
-	HNS3_CAPS_FD_QUEUE_REGION_B,
+	/*
+	 * The following capability index definitions must be the same as those
+	 * of the firmware.
+	 */
+	HNS3_CAPS_FD_QUEUE_REGION_B = 2,
 	HNS3_CAPS_PTP_B,
-	HNS3_CAPS_INT_QL_B,
-	HNS3_CAPS_SIMPLE_BD_B,
-	HNS3_CAPS_TX_PUSH_B,
-	HNS3_CAPS_PHY_IMP_B,
+	HNS3_CAPS_PHY_IMP_B = 7,
 	HNS3_CAPS_TQP_TXRX_INDEP_B,
 	HNS3_CAPS_HW_PAD_B,
 	HNS3_CAPS_STASH_B,
 	HNS3_CAPS_UDP_TUNNEL_CSUM_B,
 	HNS3_CAPS_RAS_IMP_B,
-	HNS3_CAPS_FEC_B,
-	HNS3_CAPS_PAUSE_B,
-	HNS3_CAPS_RXD_ADV_LAYOUT_B,
+	HNS3_CAPS_RXD_ADV_LAYOUT_B = 15,
 };
 
 enum HNS3_API_CAP_BITS {
diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index d27c725..bedbffd 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -855,17 +855,17 @@ enum {
 
 #define HNS3_DEVARG_DEV_CAPS_MASK	"dev_caps_mask"
 
-#define HNS3_DEV_SUPPORT_DCB_B			0x0
-#define HNS3_DEV_SUPPORT_COPPER_B		0x1
-#define HNS3_DEV_SUPPORT_UDP_GSO_B		0x2
-#define HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B	0x3
-#define HNS3_DEV_SUPPORT_PTP_B			0x4
-#define HNS3_DEV_SUPPORT_TX_PUSH_B		0x5
-#define HNS3_DEV_SUPPORT_INDEP_TXRX_B		0x6
-#define HNS3_DEV_SUPPORT_STASH_B		0x7
-#define HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B	0x9
-#define HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B	0xA
-#define HNS3_DEV_SUPPORT_RAS_IMP_B		0xB
+enum {
+	HNS3_DEV_SUPPORT_DCB_B,
+	HNS3_DEV_SUPPORT_COPPER_B,
+	HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B,
+	HNS3_DEV_SUPPORT_PTP_B,
+	HNS3_DEV_SUPPORT_INDEP_TXRX_B,
+	HNS3_DEV_SUPPORT_STASH_B,
+	HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
+	HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B,
+	HNS3_DEV_SUPPORT_RAS_IMP_B,
+};
 
 #define hns3_dev_dcb_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_DCB_B)
@@ -874,10 +874,6 @@ enum {
 #define hns3_dev_copper_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_COPPER_B)
 
-/* Support UDP GSO offload */
-#define hns3_dev_udp_gso_supported(hw) \
-	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_UDP_GSO_B)
-
 /* Support the queue region action rule of flow directory */
 #define hns3_dev_fd_queue_region_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B)
@@ -886,9 +882,6 @@ enum {
 #define hns3_dev_ptp_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_PTP_B)
 
-#define hns3_dev_tx_push_supported(hw) \
-	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_TX_PUSH_B)
-
 /* Support to Independently enable/disable/reset Tx or Rx queues */
 #define hns3_dev_indep_txrx_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_INDEP_TXRX_B)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 2/6] net/hns3: modify write reg opt API impl
  2021-04-28  9:53 ` [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
@ 2021-04-28  9:53   ` Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE Min Hu (Connor)
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-28  9:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch modifies hns3_write_reg_opt() API implementation because
the rte_write32() already uses rte_io_wmb().

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_ethdev.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index bedbffd..666ef71 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -977,13 +977,13 @@ static inline void hns3_write_reg(void *base, uint32_t reg, uint32_t value)
 }
 
 /*
- * The optimized function for writing registers used in the '.rx_pkt_burst' and
- * '.tx_pkt_burst' ops implementation function.
+ * The optimized function for writing registers reduces one address addition
+ * calculation, it was used in the '.rx_pkt_burst' and '.tx_pkt_burst' ops
+ * implementation function.
  */
 static inline void hns3_write_reg_opt(volatile void *addr, uint32_t value)
 {
-	rte_io_wmb();
-	rte_write32_relaxed(rte_cpu_to_le_32(value), addr);
+	rte_write32(rte_cpu_to_le_32(value), addr);
 }
 
 static inline uint32_t hns3_read_reg(void *base, uint32_t reg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE
  2021-04-28  9:53 ` [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 2/6] net/hns3: modify write reg opt API impl Min Hu (Connor)
@ 2021-04-28  9:53   ` Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 4/6] net/hns3: improve IO path data cache usage Min Hu (Connor)
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-28  9:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch uses RTE_DIM() instead of ARRAY_SIZE().

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_cmd.c    |  4 ++--
 drivers/net/hns3/hns3_ethdev.h |  2 --
 drivers/net/hns3/hns3_flow.c   | 18 +++++++++---------
 drivers/net/hns3/hns3_intr.c   |  4 ++--
 4 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/net/hns3/hns3_cmd.c b/drivers/net/hns3/hns3_cmd.c
index 6331380..e224a95 100644
--- a/drivers/net/hns3/hns3_cmd.c
+++ b/drivers/net/hns3/hns3_cmd.c
@@ -245,7 +245,7 @@ hns3_is_special_opcode(uint16_t opcode)
 				  HNS3_OPC_QUERY_ALL_ERR_INFO,};
 	uint32_t i;
 
-	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++)
+	for (i = 0; i < RTE_DIM(spec_opcode); i++)
 		if (spec_opcode[i] == opcode)
 			return true;
 
@@ -276,7 +276,7 @@ hns3_cmd_convert_err_code(uint16_t desc_ret)
 
 	uint32_t i;
 
-	for (i = 0; i < ARRAY_SIZE(hns3_cmdq_status); i++)
+	for (i = 0; i < RTE_DIM(hns3_cmdq_status); i++)
 		if (hns3_cmdq_status[i].imp_errcode == desc_ret)
 			return hns3_cmdq_status[i].linux_errcode;
 
diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index 666ef71..48f5307 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -998,8 +998,6 @@ static inline uint32_t hns3_read_reg(void *base, uint32_t reg)
 #define hns3_read_dev(a, reg) \
 	hns3_read_reg((a)->io_base, (reg))
 
-#define ARRAY_SIZE(x) RTE_DIM(x)
-
 #define NEXT_ITEM_OF_ACTION(act, actions, index)                        \
 	do {								\
 		act = (actions) + (index);				\
diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index 4511a49..49d6568 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1054,37 +1054,37 @@ hns3_parse_normal(const struct rte_flow_item *item, struct hns3_fdir_rule *rule,
 	case RTE_FLOW_ITEM_TYPE_ETH:
 		ret = hns3_parse_eth(item, rule, error);
 		step_mngr->items = L2_next_items;
-		step_mngr->count = ARRAY_SIZE(L2_next_items);
+		step_mngr->count = RTE_DIM(L2_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_VLAN:
 		ret = hns3_parse_vlan(item, rule, error);
 		step_mngr->items = L2_next_items;
-		step_mngr->count = ARRAY_SIZE(L2_next_items);
+		step_mngr->count = RTE_DIM(L2_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_IPV4:
 		ret = hns3_parse_ipv4(item, rule, error);
 		step_mngr->items = L3_next_items;
-		step_mngr->count = ARRAY_SIZE(L3_next_items);
+		step_mngr->count = RTE_DIM(L3_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_IPV6:
 		ret = hns3_parse_ipv6(item, rule, error);
 		step_mngr->items = L3_next_items;
-		step_mngr->count = ARRAY_SIZE(L3_next_items);
+		step_mngr->count = RTE_DIM(L3_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_TCP:
 		ret = hns3_parse_tcp(item, rule, error);
 		step_mngr->items = L4_next_items;
-		step_mngr->count = ARRAY_SIZE(L4_next_items);
+		step_mngr->count = RTE_DIM(L4_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_UDP:
 		ret = hns3_parse_udp(item, rule, error);
 		step_mngr->items = L4_next_items;
-		step_mngr->count = ARRAY_SIZE(L4_next_items);
+		step_mngr->count = RTE_DIM(L4_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_SCTP:
 		ret = hns3_parse_sctp(item, rule, error);
 		step_mngr->items = L4_next_items;
-		step_mngr->count = ARRAY_SIZE(L4_next_items);
+		step_mngr->count = RTE_DIM(L4_next_items);
 		break;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
@@ -1188,7 +1188,7 @@ hns3_parse_fdir_filter(struct rte_eth_dev *dev,
 					  "Fdir not supported in VF");
 
 	step_mngr.items = first_items;
-	step_mngr.count = ARRAY_SIZE(first_items);
+	step_mngr.count = RTE_DIM(first_items);
 	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
 		if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
 			continue;
@@ -1202,7 +1202,7 @@ hns3_parse_fdir_filter(struct rte_eth_dev *dev,
 			if (ret)
 				return ret;
 			step_mngr.items = tunnel_next_items;
-			step_mngr.count = ARRAY_SIZE(tunnel_next_items);
+			step_mngr.count = RTE_DIM(tunnel_next_items);
 		} else {
 			ret = hns3_parse_normal(item, rule, &step_mngr, error);
 			if (ret)
diff --git a/drivers/net/hns3/hns3_intr.c b/drivers/net/hns3/hns3_intr.c
index cc7d7c6..3155d7f 100644
--- a/drivers/net/hns3/hns3_intr.c
+++ b/drivers/net/hns3/hns3_intr.c
@@ -2206,8 +2206,8 @@ hns3_handle_type_reg_error_data(struct hns3_hw *hw,
 	type_id = err_info->type_id & HNS3_ERR_TYPE_MASK;
 	is_ras = err_info->type_id >> HNS3_ERR_TYPE_IS_RAS_OFFSET;
 
-	total_module = ARRAY_SIZE(hns3_hw_module_name);
-	total_type = ARRAY_SIZE(hns3_hw_error_type);
+	total_module = RTE_DIM(hns3_hw_module_name);
+	total_type = RTE_DIM(hns3_hw_error_type);
 
 	hns3_err(hw, "total_module:%u, total_type:%u",
 		 total_module, total_type);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 4/6] net/hns3: improve IO path data cache usage
  2021-04-28  9:53 ` [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                     ` (2 preceding siblings ...)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE Min Hu (Connor)
@ 2021-04-28  9:53   ` Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 5/6] net/hns3: log FDIR configuration Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 6/6] net/hns3: fix vector Rx burst can't exceed 32 Min Hu (Connor)
  5 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-28  9:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch improves data cache usage by:
1. Rearrange the rxq frequency accessed fields in the IO path to the
first 128B.
2. Rearrange the txq frequency accessed fields in the IO path to the
first 64B.
3. Make sure ptype table align cacheline size which is 128B instead of
min cacheline size which is 64B because the L1/L2 is 64B and L3 is
128B on Kunpeng ARM platform.

The performance gains are 1.5% in 64B packet macfwd scenarios.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_ethdev.h |   4 +-
 drivers/net/hns3/hns3_rxtx.h   | 126 ++++++++++++++++++++++++-----------------
 2 files changed, 77 insertions(+), 53 deletions(-)

diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index 48f5307..cee78f4 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -735,7 +735,7 @@ struct hns3_ptype_table {
 	 * descriptor, it functions only when firmware report the capability of
 	 * HNS3_CAPS_RXD_ADV_LAYOUT_B and driver enabled it.
 	 */
-	uint32_t ptype[HNS3_PTYPE_NUM] __rte_cache_min_aligned;
+	uint32_t ptype[HNS3_PTYPE_NUM] __rte_cache_aligned;
 };
 
 #define HNS3_FIXED_MAX_TQP_NUM_MODE		0
@@ -839,7 +839,7 @@ struct hns3_adapter {
 
 	uint64_t dev_caps_mask;
 
-	struct hns3_ptype_table ptype_tbl __rte_cache_min_aligned;
+	struct hns3_ptype_table ptype_tbl __rte_cache_aligned;
 };
 
 enum {
diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index 703c4b7..1e2e994 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -289,22 +289,14 @@ struct hns3_rx_bd_errors_stats {
 };
 
 struct hns3_rx_queue {
-	void *io_base;
 	volatile void *io_head_reg;
-	struct hns3_adapter *hns;
 	struct hns3_ptype_table *ptype_tbl;
 	struct rte_mempool *mb_pool;
 	struct hns3_desc *rx_ring;
-	uint64_t rx_ring_phys_addr; /* RX ring DMA address */
-	const struct rte_memzone *mz;
 	struct hns3_entry *sw_ring;
-	struct rte_mbuf *pkt_first_seg;
-	struct rte_mbuf *pkt_last_seg;
 
-	uint16_t queue_id;
 	uint16_t port_id;
 	uint16_t nb_rx_desc;
-	uint16_t rx_buf_len;
 	/*
 	 * threshold for the number of BDs waited to passed to hardware. If the
 	 * number exceeds the threshold, driver will pass these BDs to hardware.
@@ -318,8 +310,6 @@ struct hns3_rx_queue {
 	/* 4 if DEV_RX_OFFLOAD_KEEP_CRC offload set, 0 otherwise */
 	uint8_t crc_len;
 
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	bool configured;        /* indicate if rx queue has been configured */
 	/*
 	 * Indicate whether ignore the outer VLAN field in the Rx BD reported
 	 * by the Hardware. Because the outer VLAN is the PVID if the PVID is
@@ -331,23 +321,45 @@ struct hns3_rx_queue {
 	 * driver does not need to perform PVID-related operation in Rx. At this
 	 * point, the pvid_sw_discard_en will be false.
 	 */
-	bool pvid_sw_discard_en;
-	bool ptype_en;          /* indicate if the ptype field enabled */
-	bool enabled;           /* indicate if Rx queue has been enabled */
+	uint8_t pvid_sw_discard_en:1;
+	uint8_t ptype_en:1;          /* indicate if the ptype field enabled */
+
+	uint64_t mbuf_initializer; /* value to init mbufs used with vector rx */
+	/* offset_table: used for vector, to solve execute re-order problem */
+	uint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1];
+
+	uint16_t bulk_mbuf_num; /* indicate bulk_mbuf valid nums */
 
 	struct hns3_rx_basic_stats basic_stats;
+
+	struct rte_mbuf *pkt_first_seg;
+	struct rte_mbuf *pkt_last_seg;
+
+	struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM];
+
 	/* DFX statistics that driver does not need to discard packets */
 	struct hns3_rx_dfx_stats dfx_stats;
 	/* Error statistics that driver needs to discard packets */
 	struct hns3_rx_bd_errors_stats err_stats;
 
-	struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM];
-	uint16_t bulk_mbuf_num;
-
-	/* offset_table: used for vector, to solve execute re-order problem */
-	uint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1];
-	uint64_t mbuf_initializer; /* value to init mbufs used with vector rx */
 	struct rte_mbuf fake_mbuf; /* fake mbuf used with vector rx */
+
+
+	/*
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	void *io_base;
+	struct hns3_adapter *hns;
+	uint64_t rx_ring_phys_addr; /* RX ring DMA address */
+	const struct rte_memzone *mz;
+
+	uint16_t queue_id;
+	uint16_t rx_buf_len;
+
+	bool configured;        /* indicate if rx queue has been configured */
+	bool rx_deferred_start; /* don't start this queue in dev start */
+	bool enabled;           /* indicate if Rx queue has been enabled */
 };
 
 struct hns3_tx_basic_stats {
@@ -407,16 +419,10 @@ struct hns3_tx_dfx_stats {
 };
 
 struct hns3_tx_queue {
-	void *io_base;
 	volatile void *io_tail_reg;
-	struct hns3_adapter *hns;
 	struct hns3_desc *tx_ring;
-	uint64_t tx_ring_phys_addr; /* TX ring DMA address */
-	const struct rte_memzone *mz;
 	struct hns3_entry *sw_ring;
 
-	uint16_t queue_id;
-	uint16_t port_id;
 	uint16_t nb_tx_desc;
 	/*
 	 * index of next BD whose corresponding rte_mbuf can be released by
@@ -432,21 +438,12 @@ struct hns3_tx_queue {
 	uint16_t tx_free_thresh;
 
 	/*
-	 * For better performance in tx datapath, releasing mbuf in batches is
-	 * required.
-	 * Only checking the VLD bit of the last descriptor in a batch of the
-	 * thresh descriptors does not mean that these descriptors are all sent
-	 * by hardware successfully. So we need to check that the VLD bits of
-	 * all descriptors are cleared. and then free all mbufs in the batch.
-	 * - tx_rs_thresh
-	 *   Number of mbufs released at a time.
-	 *
-	 * - free
-	 *   Tx mbuf free array used for preserving temporarily address of mbuf
-	 *   released back to mempool, when releasing mbuf in batches.
+	 * The minimum length of the packet supported by hardware in the Tx
+	 * direction.
 	 */
-	uint16_t tx_rs_thresh;
-	struct rte_mbuf **free;
+	uint8_t min_tx_pkt_len;
+
+	uint8_t max_non_tso_bd_num; /* max BD number of one non-TSO packet */
 
 	/*
 	 * tso mode.
@@ -464,7 +461,7 @@ struct hns3_tx_queue {
 	 *     checksum of packets that need TSO, so network driver software
 	 *     not need to recalculate it.
 	 */
-	uint8_t tso_mode;
+	uint16_t tso_mode:1;
 	/*
 	 * udp checksum mode.
 	 * value range:
@@ -480,16 +477,10 @@ struct hns3_tx_queue {
 	 *     In this mode, HW does not have the preceding problems and can
 	 *     directly calculate the checksum of these UDP packets.
 	 */
-	uint8_t udp_cksum_mode;
-	/*
-	 * The minimum length of the packet supported by hardware in the Tx
-	 * direction.
-	 */
-	uint32_t min_tx_pkt_len;
+	uint16_t udp_cksum_mode:1;
 
-	uint8_t max_non_tso_bd_num; /* max BD number of one non-TSO packet */
-	bool tx_deferred_start; /* don't start this queue in dev start */
-	bool configured;        /* indicate if tx queue has been configured */
+	uint16_t simple_bd_enable:1;
+	uint16_t tx_push_enable:1;    /* check whether the tx push is enabled */
 	/*
 	 * Indicate whether add the vlan_tci of the mbuf to the inner VLAN field
 	 * of Tx BD. Because the outer VLAN will always be the PVID when the
@@ -502,11 +493,44 @@ struct hns3_tx_queue {
 	 * PVID-related operations in Tx. And pvid_sw_shift_en will be false at
 	 * this point.
 	 */
-	bool pvid_sw_shift_en;
-	bool enabled;           /* indicate if Tx queue has been enabled */
+	uint16_t pvid_sw_shift_en:1;
+
+	/*
+	 * For better performance in tx datapath, releasing mbuf in batches is
+	 * required.
+	 * Only checking the VLD bit of the last descriptor in a batch of the
+	 * thresh descriptors does not mean that these descriptors are all sent
+	 * by hardware successfully. So we need to check that the VLD bits of
+	 * all descriptors are cleared. and then free all mbufs in the batch.
+	 * - tx_rs_thresh
+	 *   Number of mbufs released at a time.
+	 *
+	 * - free
+	 *   Tx mbuf free array used for preserving temporarily address of mbuf
+	 *   released back to mempool, when releasing mbuf in batches.
+	 */
+	uint16_t tx_rs_thresh;
+	struct rte_mbuf **free;
 
 	struct hns3_tx_basic_stats basic_stats;
 	struct hns3_tx_dfx_stats dfx_stats;
+
+
+	/*
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	void *io_base;
+	struct hns3_adapter *hns;
+	uint64_t tx_ring_phys_addr; /* TX ring DMA address */
+	const struct rte_memzone *mz;
+
+	uint16_t port_id;
+	uint16_t queue_id;
+
+	bool configured;        /* indicate if tx queue has been configured */
+	bool tx_deferred_start; /* don't start this queue in dev start */
+	bool enabled;           /* indicate if Tx queue has been enabled */
 };
 
 #define HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 5/6] net/hns3: log FDIR configuration
  2021-04-28  9:53 ` [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                     ` (3 preceding siblings ...)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 4/6] net/hns3: improve IO path data cache usage Min Hu (Connor)
@ 2021-04-28  9:53   ` Min Hu (Connor)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 6/6] net/hns3: fix vector Rx burst can't exceed 32 Min Hu (Connor)
  5 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-28  9:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

The rte flow interface does not support the API of the capability
set. Therefore, fdir configuration logs are added to facilitate
debugging.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_fdir.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/hns3/hns3_fdir.c b/drivers/net/hns3/hns3_fdir.c
index 87c1aef..0ef0938 100644
--- a/drivers/net/hns3/hns3_fdir.c
+++ b/drivers/net/hns3/hns3_fdir.c
@@ -336,6 +336,8 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 	    BIT(INNER_IP_PROTO) | BIT(INNER_IP_TOS) |
 	    BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
 	    BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
+	hns3_dbg(hw, "fdir tuple: inner<vlan_tag1 eth_type ip_src ip_dst "
+		  "ip_proto ip_tos l4_src_port l4_dst_port>");
 
 	/* If use max 400bit key, we can support tuples for ether type */
 	if (pf->fdir.fd_cfg.max_key_length == MAX_KEY_LENGTH) {
@@ -345,6 +347,9 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 		    BIT(OUTER_DST_PORT) | BIT(INNER_VLAN_TAG2) |
 		    BIT(OUTER_TUN_VNI) | BIT(OUTER_TUN_FLOW_ID) |
 		    BIT(OUTER_ETH_TYPE) | BIT(OUTER_IP_PROTO);
+		hns3_dbg(hw, "fdir tuple more: inner<dst_mac src_mac "
+			  "vlan_tag2 sctp_tag> outer<eth_type ip_proto "
+			  "l4_src_port l4_dst_port tun_vni tun_flow_id>");
 	}
 
 	/* roce_type is used to filter roce frames
@@ -352,6 +357,7 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 	 */
 	key_cfg->meta_data_active = BIT(DST_VPORT) | BIT(TUNNEL_PACKET) |
 	    BIT(VLAN_NUMBER);
+	hns3_dbg(hw, "fdir meta data: dst_vport tunnel_packet vlan_number");
 
 	ret = hns3_get_fd_allocation(hw,
 				     &pf->fdir.fd_cfg.rule_num[HNS3_FD_STAGE_1],
@@ -361,6 +367,13 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 	if (ret)
 		return ret;
 
+	hns3_dbg(hw, "fdir: stage1<rules-%u counters-%u> stage2<rules-%u "
+		  "counters=%u>",
+		  pf->fdir.fd_cfg.rule_num[HNS3_FD_STAGE_1],
+		  pf->fdir.fd_cfg.cnt_num[HNS3_FD_STAGE_1],
+		  pf->fdir.fd_cfg.rule_num[HNS3_FD_STAGE_2],
+		  pf->fdir.fd_cfg.cnt_num[HNS3_FD_STAGE_2]);
+
 	return hns3_set_fd_key_config(hns);
 }
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 6/6] net/hns3: fix vector Rx burst can't exceed 32
  2021-04-28  9:53 ` [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                     ` (4 preceding siblings ...)
  2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 5/6] net/hns3: log FDIR configuration Min Hu (Connor)
@ 2021-04-28  9:53   ` Min Hu (Connor)
  5 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-28  9:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

Currently, driver uses the macro HNS3_DEFAULT_RX_BURST whose value is
32 to limit the vector Rx burst size, as a result, the burst size
can't exceed 32.

This patch fixes this problem by support big burst size.
Also adjust HNS3_DEFAULT_RX_BURST to 64 as it performs better than 32.

Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")
Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_rxtx.h         |  2 +-
 drivers/net/hns3/hns3_rxtx_vec.c     | 41 +++++++++++++++++++++++++++++-------
 drivers/net/hns3/hns3_rxtx_vec_sve.c | 37 ++++++++++++++++++++++++++------
 3 files changed, 65 insertions(+), 15 deletions(-)

diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index 1e2e994..ba24e00 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -20,7 +20,7 @@
 #define HNS3_DEFAULT_TX_RS_THRESH	32
 #define HNS3_TX_FAST_FREE_AHEAD		64
 
-#define HNS3_DEFAULT_RX_BURST		32
+#define HNS3_DEFAULT_RX_BURST		64
 #if (HNS3_DEFAULT_RX_BURST > 64)
 #error "PMD HNS3: HNS3_DEFAULT_RX_BURST must <= 64\n"
 #endif
diff --git a/drivers/net/hns3/hns3_rxtx_vec.c b/drivers/net/hns3/hns3_rxtx_vec.c
index dc1e1ae..66d8904 100644
--- a/drivers/net/hns3/hns3_rxtx_vec.c
+++ b/drivers/net/hns3/hns3_rxtx_vec.c
@@ -108,14 +108,13 @@ hns3_recv_pkts_vec(void *__restrict rx_queue,
 {
 	struct hns3_rx_queue *rxq = rx_queue;
 	struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
-	uint64_t bd_err_mask;  /* bit mask indicate whick pkts is error */
+	uint64_t pkt_err_mask;  /* bit mask indicate whick pkts is error */
 	uint16_t nb_rx;
 
-	nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
-
 	rte_prefetch_non_temporal(rxdp);
 
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
+
 	if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
 		hns3_rxq_rearm_mbuf(rxq);
 
@@ -128,10 +127,36 @@ hns3_recv_pkts_vec(void *__restrict rx_queue,
 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf);
 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf);
 
-	bd_err_mask = 0;
-	nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, &bd_err_mask);
-	if (unlikely(bd_err_mask))
-		nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask);
+	if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) {
+		pkt_err_mask = 0;
+		nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts,
+					    &pkt_err_mask);
+		if (unlikely(pkt_err_mask > 0))
+			nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx,
+							pkt_err_mask);
+		return nb_rx;
+	}
+
+	nb_rx = 0;
+	while (nb_pkts > 0) {
+		uint16_t ret, n;
+
+		n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
+		pkt_err_mask = 0;
+		ret = hns3_recv_burst_vec(rxq, &rx_pkts[nb_rx], n,
+					  &pkt_err_mask);
+		nb_pkts -= ret;
+		if (unlikely(pkt_err_mask > 0))
+			nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret,
+							 pkt_err_mask);
+		else
+			nb_rx += ret;
+		if (ret < n)
+			break;
+
+		if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
+			hns3_rxq_rearm_mbuf(rxq);
+	}
 
 	return nb_rx;
 }
diff --git a/drivers/net/hns3/hns3_rxtx_vec_sve.c b/drivers/net/hns3/hns3_rxtx_vec_sve.c
index ef6c875..44e5293 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_sve.c
+++ b/drivers/net/hns3/hns3_rxtx_vec_sve.c
@@ -292,12 +292,11 @@ hns3_recv_pkts_vec_sve(void *__restrict rx_queue,
 {
 	struct hns3_rx_queue *rxq = rx_queue;
 	struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
-	uint64_t bd_err_mask;  /* bit mask indicate whick pkts is error */
+	uint64_t pkt_err_mask;  /* bit mask indicate whick pkts is error */
 	uint16_t nb_rx;
 
 	rte_prefetch_non_temporal(rxdp);
 
-	nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_SVE_DEFAULT_DESCS_PER_LOOP);
 
 	if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
@@ -309,10 +308,36 @@ hns3_recv_pkts_vec_sve(void *__restrict rx_queue,
 
 	hns3_rx_prefetch_mbuf_sve(&rxq->sw_ring[rxq->next_to_use]);
 
-	bd_err_mask = 0;
-	nb_rx = hns3_recv_burst_vec_sve(rxq, rx_pkts, nb_pkts, &bd_err_mask);
-	if (unlikely(bd_err_mask))
-		nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask);
+	if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) {
+		pkt_err_mask = 0;
+		nb_rx = hns3_recv_burst_vec_sve(rxq, rx_pkts, nb_pkts,
+						&pkt_err_mask);
+		if (unlikely(pkt_err_mask > 0))
+			nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx,
+							pkt_err_mask);
+		return nb_rx;
+	}
+
+	nb_rx = 0;
+	while (nb_pkts > 0) {
+		uint16_t ret, n;
+
+		n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
+		pkt_err_mask = 0;
+		ret = hns3_recv_burst_vec_sve(rxq, &rx_pkts[nb_rx], n,
+					      &pkt_err_mask);
+		nb_pkts -= ret;
+		if (unlikely(pkt_err_mask > 0))
+			nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret,
+							 pkt_err_mask);
+		else
+			nb_rx += ret;
+		if (ret < n)
+			break;
+
+		if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
+			hns3_rxq_rearm_mbuf_sve(rxq);
+	}
 
 	return nb_rx;
 }
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD
  2021-04-26  3:34 [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                   ` (6 preceding siblings ...)
  2021-04-28  9:53 ` [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
@ 2021-04-30  6:28 ` Min Hu (Connor)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
                     ` (6 more replies)
  7 siblings, 7 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-30  6:28 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

This patch set contains one patch for performance optimization,
this is: 'net/hns3: improve IO path data cache usage'.
The others are bugfixes for hns3 PMD.

Chengwen Feng (6):
  net/hns3: delete some unused capabilities
  net/hns3: modify write reg opt API impl
  net/hns3: use RTE DIM instead of ARRAY SIZE
  net/hns3: improve IO path data cache usage
  net/hns3: log FDIR configuration
  net/hns3: fix vector Rx burst can't exceed 32
---
v3:
* put the packet judgment error into the subfunction.

v2:
* fixed commit info.
* changed hns3_info to hns3_dbg.
* deleted tx push, simple bd capa.

 drivers/net/hns3/hns3_cmd.c          |  15 +---
 drivers/net/hns3/hns3_cmd.h          |  17 ++---
 drivers/net/hns3/hns3_ethdev.h       |  43 +++++-------
 drivers/net/hns3/hns3_fdir.c         |  13 ++++
 drivers/net/hns3/hns3_flow.c         |  18 ++---
 drivers/net/hns3/hns3_intr.c         |   4 +-
 drivers/net/hns3/hns3_rxtx.h         | 128 +++++++++++++++++++++--------------
 drivers/net/hns3/hns3_rxtx_vec.c     |  36 +++++++---
 drivers/net/hns3/hns3_rxtx_vec.h     |   3 +
 drivers/net/hns3/hns3_rxtx_vec_sve.c |  32 +++++++--
 10 files changed, 183 insertions(+), 126 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 1/6] net/hns3: delete some unused capabilities
  2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
@ 2021-04-30  6:28   ` Min Hu (Connor)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 2/6] net/hns3: modify write reg opt API impl Min Hu (Connor)
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-30  6:28 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch deletes some unused capabilities, include:
1. Delete some unused firmware capabilities definition, which are:
UDP_GSO, ATR, INT_QL, SIMPLE_BD, TX_PUSH, FEC and PAUSE.
2. Delete some unused driver capabilities definition, which are:
UDP_GSO, TX_PUSH.
3. Also redefine HNS3_DEV_SUPPORT_*  as enum type, and change some of
the values. Note: the HNS3_DEV_SUPPORT_* values is used only inside
the driver, so it's safe to change the values.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_cmd.c    | 11 -----------
 drivers/net/hns3/hns3_cmd.h    | 17 +++++++----------
 drivers/net/hns3/hns3_ethdev.h | 29 +++++++++++------------------
 3 files changed, 18 insertions(+), 39 deletions(-)

diff --git a/drivers/net/hns3/hns3_cmd.c b/drivers/net/hns3/hns3_cmd.c
index 5beb3d9..8ebe5e5 100644
--- a/drivers/net/hns3/hns3_cmd.c
+++ b/drivers/net/hns3/hns3_cmd.c
@@ -419,21 +419,14 @@ hns3_get_caps_name(uint32_t caps_id)
 		enum HNS3_CAPS_BITS caps;
 		const char *name;
 	} dev_caps[] = {
-		{ HNS3_CAPS_UDP_GSO_B,         "udp_gso"         },
-		{ HNS3_CAPS_ATR_B,             "atr"             },
 		{ HNS3_CAPS_FD_QUEUE_REGION_B, "fd_queue_region" },
 		{ HNS3_CAPS_PTP_B,             "ptp"             },
-		{ HNS3_CAPS_INT_QL_B,          "int_ql"          },
-		{ HNS3_CAPS_SIMPLE_BD_B,       "simple_bd"       },
-		{ HNS3_CAPS_TX_PUSH_B,         "tx_push"         },
 		{ HNS3_CAPS_PHY_IMP_B,         "phy_imp"         },
 		{ HNS3_CAPS_TQP_TXRX_INDEP_B,  "tqp_txrx_indep"  },
 		{ HNS3_CAPS_HW_PAD_B,          "hw_pad"          },
 		{ HNS3_CAPS_STASH_B,           "stash"           },
 		{ HNS3_CAPS_UDP_TUNNEL_CSUM_B, "udp_tunnel_csum" },
 		{ HNS3_CAPS_RAS_IMP_B,         "ras_imp"         },
-		{ HNS3_CAPS_FEC_B,             "fec"             },
-		{ HNS3_CAPS_PAUSE_B,           "pause"           },
 		{ HNS3_CAPS_RXD_ADV_LAYOUT_B,  "rxd_adv_layout"  }
 	};
 	uint32_t i;
@@ -480,8 +473,6 @@ hns3_parse_capability(struct hns3_hw *hw,
 {
 	uint32_t caps = rte_le_to_cpu_32(cmd->caps[0]);
 
-	if (hns3_get_bit(caps, HNS3_CAPS_UDP_GSO_B))
-		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_UDP_GSO_B, 1);
 	if (hns3_get_bit(caps, HNS3_CAPS_FD_QUEUE_REGION_B))
 		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B,
 			     1);
@@ -498,8 +489,6 @@ hns3_parse_capability(struct hns3_hw *hw,
 			hns3_warn(hw, "ignore PTP capability due to lack of "
 				  "rxd advanced layout capability.");
 	}
-	if (hns3_get_bit(caps, HNS3_CAPS_TX_PUSH_B))
-		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_TX_PUSH_B, 1);
 	if (hns3_get_bit(caps, HNS3_CAPS_PHY_IMP_B))
 		hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_COPPER_B, 1);
 	if (hns3_get_bit(caps, HNS3_CAPS_TQP_TXRX_INDEP_B))
diff --git a/drivers/net/hns3/hns3_cmd.h b/drivers/net/hns3/hns3_cmd.h
index bf1772d..944e3d6 100644
--- a/drivers/net/hns3/hns3_cmd.h
+++ b/drivers/net/hns3/hns3_cmd.h
@@ -306,22 +306,19 @@ struct hns3_rx_priv_buff_cmd {
 #define HNS3_FW_VERSION_BYTE0_M		GENMASK(7, 0)
 
 enum HNS3_CAPS_BITS {
-	HNS3_CAPS_UDP_GSO_B,
-	HNS3_CAPS_ATR_B,
-	HNS3_CAPS_FD_QUEUE_REGION_B,
+	/*
+	 * The following capability index definitions must be the same as those
+	 * of the firmware.
+	 */
+	HNS3_CAPS_FD_QUEUE_REGION_B = 2,
 	HNS3_CAPS_PTP_B,
-	HNS3_CAPS_INT_QL_B,
-	HNS3_CAPS_SIMPLE_BD_B,
-	HNS3_CAPS_TX_PUSH_B,
-	HNS3_CAPS_PHY_IMP_B,
+	HNS3_CAPS_PHY_IMP_B = 7,
 	HNS3_CAPS_TQP_TXRX_INDEP_B,
 	HNS3_CAPS_HW_PAD_B,
 	HNS3_CAPS_STASH_B,
 	HNS3_CAPS_UDP_TUNNEL_CSUM_B,
 	HNS3_CAPS_RAS_IMP_B,
-	HNS3_CAPS_FEC_B,
-	HNS3_CAPS_PAUSE_B,
-	HNS3_CAPS_RXD_ADV_LAYOUT_B,
+	HNS3_CAPS_RXD_ADV_LAYOUT_B = 15,
 };
 
 enum HNS3_API_CAP_BITS {
diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index b2dacb9..608fd0c 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -858,17 +858,17 @@ enum {
 
 #define HNS3_DEVARG_DEV_CAPS_MASK	"dev_caps_mask"
 
-#define HNS3_DEV_SUPPORT_DCB_B			0x0
-#define HNS3_DEV_SUPPORT_COPPER_B		0x1
-#define HNS3_DEV_SUPPORT_UDP_GSO_B		0x2
-#define HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B	0x3
-#define HNS3_DEV_SUPPORT_PTP_B			0x4
-#define HNS3_DEV_SUPPORT_TX_PUSH_B		0x5
-#define HNS3_DEV_SUPPORT_INDEP_TXRX_B		0x6
-#define HNS3_DEV_SUPPORT_STASH_B		0x7
-#define HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B	0x9
-#define HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B	0xA
-#define HNS3_DEV_SUPPORT_RAS_IMP_B		0xB
+enum {
+	HNS3_DEV_SUPPORT_DCB_B,
+	HNS3_DEV_SUPPORT_COPPER_B,
+	HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B,
+	HNS3_DEV_SUPPORT_PTP_B,
+	HNS3_DEV_SUPPORT_INDEP_TXRX_B,
+	HNS3_DEV_SUPPORT_STASH_B,
+	HNS3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
+	HNS3_DEV_SUPPORT_OUTER_UDP_CKSUM_B,
+	HNS3_DEV_SUPPORT_RAS_IMP_B,
+};
 
 #define hns3_dev_dcb_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_DCB_B)
@@ -877,10 +877,6 @@ enum {
 #define hns3_dev_copper_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_COPPER_B)
 
-/* Support UDP GSO offload */
-#define hns3_dev_udp_gso_supported(hw) \
-	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_UDP_GSO_B)
-
 /* Support the queue region action rule of flow directory */
 #define hns3_dev_fd_queue_region_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_FD_QUEUE_REGION_B)
@@ -889,9 +885,6 @@ enum {
 #define hns3_dev_ptp_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_PTP_B)
 
-#define hns3_dev_tx_push_supported(hw) \
-	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_TX_PUSH_B)
-
 /* Support to Independently enable/disable/reset Tx or Rx queues */
 #define hns3_dev_indep_txrx_supported(hw) \
 	hns3_get_bit((hw)->capability, HNS3_DEV_SUPPORT_INDEP_TXRX_B)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 2/6] net/hns3: modify write reg opt API impl
  2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
@ 2021-04-30  6:28   ` Min Hu (Connor)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE Min Hu (Connor)
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-30  6:28 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch modifies hns3_write_reg_opt() API implementation because
the rte_write32() already uses rte_io_wmb().

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_ethdev.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index 608fd0c..23f3722 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -980,13 +980,13 @@ static inline void hns3_write_reg(void *base, uint32_t reg, uint32_t value)
 }
 
 /*
- * The optimized function for writing registers used in the '.rx_pkt_burst' and
- * '.tx_pkt_burst' ops implementation function.
+ * The optimized function for writing registers reduces one address addition
+ * calculation, it was used in the '.rx_pkt_burst' and '.tx_pkt_burst' ops
+ * implementation function.
  */
 static inline void hns3_write_reg_opt(volatile void *addr, uint32_t value)
 {
-	rte_io_wmb();
-	rte_write32_relaxed(rte_cpu_to_le_32(value), addr);
+	rte_write32(rte_cpu_to_le_32(value), addr);
 }
 
 static inline uint32_t hns3_read_reg(void *base, uint32_t reg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE
  2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 2/6] net/hns3: modify write reg opt API impl Min Hu (Connor)
@ 2021-04-30  6:28   ` Min Hu (Connor)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 4/6] net/hns3: improve IO path data cache usage Min Hu (Connor)
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-30  6:28 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch uses RTE_DIM() instead of ARRAY_SIZE().

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_cmd.c    |  4 ++--
 drivers/net/hns3/hns3_ethdev.h |  2 --
 drivers/net/hns3/hns3_flow.c   | 18 +++++++++---------
 drivers/net/hns3/hns3_intr.c   |  4 ++--
 4 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/net/hns3/hns3_cmd.c b/drivers/net/hns3/hns3_cmd.c
index 8ebe5e5..44a4e28 100644
--- a/drivers/net/hns3/hns3_cmd.c
+++ b/drivers/net/hns3/hns3_cmd.c
@@ -241,7 +241,7 @@ hns3_is_special_opcode(uint16_t opcode)
 				  HNS3_OPC_QUERY_ALL_ERR_INFO,};
 	uint32_t i;
 
-	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++)
+	for (i = 0; i < RTE_DIM(spec_opcode); i++)
 		if (spec_opcode[i] == opcode)
 			return true;
 
@@ -272,7 +272,7 @@ hns3_cmd_convert_err_code(uint16_t desc_ret)
 
 	uint32_t i;
 
-	for (i = 0; i < ARRAY_SIZE(hns3_cmdq_status); i++)
+	for (i = 0; i < RTE_DIM(hns3_cmdq_status); i++)
 		if (hns3_cmdq_status[i].imp_errcode == desc_ret)
 			return hns3_cmdq_status[i].linux_errcode;
 
diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index 23f3722..a6622a6 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -1001,8 +1001,6 @@ static inline uint32_t hns3_read_reg(void *base, uint32_t reg)
 #define hns3_read_dev(a, reg) \
 	hns3_read_reg((a)->io_base, (reg))
 
-#define ARRAY_SIZE(x) RTE_DIM(x)
-
 #define NEXT_ITEM_OF_ACTION(act, actions, index)                        \
 	do {								\
 		act = (actions) + (index);				\
diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index 4511a49..49d6568 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1054,37 +1054,37 @@ hns3_parse_normal(const struct rte_flow_item *item, struct hns3_fdir_rule *rule,
 	case RTE_FLOW_ITEM_TYPE_ETH:
 		ret = hns3_parse_eth(item, rule, error);
 		step_mngr->items = L2_next_items;
-		step_mngr->count = ARRAY_SIZE(L2_next_items);
+		step_mngr->count = RTE_DIM(L2_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_VLAN:
 		ret = hns3_parse_vlan(item, rule, error);
 		step_mngr->items = L2_next_items;
-		step_mngr->count = ARRAY_SIZE(L2_next_items);
+		step_mngr->count = RTE_DIM(L2_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_IPV4:
 		ret = hns3_parse_ipv4(item, rule, error);
 		step_mngr->items = L3_next_items;
-		step_mngr->count = ARRAY_SIZE(L3_next_items);
+		step_mngr->count = RTE_DIM(L3_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_IPV6:
 		ret = hns3_parse_ipv6(item, rule, error);
 		step_mngr->items = L3_next_items;
-		step_mngr->count = ARRAY_SIZE(L3_next_items);
+		step_mngr->count = RTE_DIM(L3_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_TCP:
 		ret = hns3_parse_tcp(item, rule, error);
 		step_mngr->items = L4_next_items;
-		step_mngr->count = ARRAY_SIZE(L4_next_items);
+		step_mngr->count = RTE_DIM(L4_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_UDP:
 		ret = hns3_parse_udp(item, rule, error);
 		step_mngr->items = L4_next_items;
-		step_mngr->count = ARRAY_SIZE(L4_next_items);
+		step_mngr->count = RTE_DIM(L4_next_items);
 		break;
 	case RTE_FLOW_ITEM_TYPE_SCTP:
 		ret = hns3_parse_sctp(item, rule, error);
 		step_mngr->items = L4_next_items;
-		step_mngr->count = ARRAY_SIZE(L4_next_items);
+		step_mngr->count = RTE_DIM(L4_next_items);
 		break;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
@@ -1188,7 +1188,7 @@ hns3_parse_fdir_filter(struct rte_eth_dev *dev,
 					  "Fdir not supported in VF");
 
 	step_mngr.items = first_items;
-	step_mngr.count = ARRAY_SIZE(first_items);
+	step_mngr.count = RTE_DIM(first_items);
 	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
 		if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
 			continue;
@@ -1202,7 +1202,7 @@ hns3_parse_fdir_filter(struct rte_eth_dev *dev,
 			if (ret)
 				return ret;
 			step_mngr.items = tunnel_next_items;
-			step_mngr.count = ARRAY_SIZE(tunnel_next_items);
+			step_mngr.count = RTE_DIM(tunnel_next_items);
 		} else {
 			ret = hns3_parse_normal(item, rule, &step_mngr, error);
 			if (ret)
diff --git a/drivers/net/hns3/hns3_intr.c b/drivers/net/hns3/hns3_intr.c
index ba6a044..b743b41 100644
--- a/drivers/net/hns3/hns3_intr.c
+++ b/drivers/net/hns3/hns3_intr.c
@@ -2206,8 +2206,8 @@ hns3_handle_type_reg_error_data(struct hns3_hw *hw,
 	type_id = err_info->type_id & HNS3_ERR_TYPE_MASK;
 	is_ras = err_info->type_id >> HNS3_ERR_TYPE_IS_RAS_OFFSET;
 
-	total_module = ARRAY_SIZE(hns3_hw_module_name);
-	total_type = ARRAY_SIZE(hns3_hw_error_type);
+	total_module = RTE_DIM(hns3_hw_module_name);
+	total_type = RTE_DIM(hns3_hw_error_type);
 
 	hns3_err(hw, "total_module:%u, total_type:%u",
 		 total_module, total_type);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 4/6] net/hns3: improve IO path data cache usage
  2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                     ` (2 preceding siblings ...)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE Min Hu (Connor)
@ 2021-04-30  6:28   ` Min Hu (Connor)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 5/6] net/hns3: log FDIR configuration Min Hu (Connor)
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-30  6:28 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

This patch improves data cache usage by:
1. Rearrange the rxq frequency accessed fields in the IO path to the
first 128B.
2. Rearrange the txq frequency accessed fields in the IO path to the
first 64B.
3. Make sure ptype table align cacheline size which is 128B instead of
min cacheline size which is 64B because the L1/L2 is 64B and L3 is
128B on Kunpeng ARM platform.

The performance gains are 1.5% in 64B packet macfwd scenarios.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_ethdev.h |   4 +-
 drivers/net/hns3/hns3_rxtx.h   | 126 ++++++++++++++++++++++++-----------------
 2 files changed, 77 insertions(+), 53 deletions(-)

diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index a6622a6..133b484 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -738,7 +738,7 @@ struct hns3_ptype_table {
 	 * descriptor, it functions only when firmware report the capability of
 	 * HNS3_CAPS_RXD_ADV_LAYOUT_B and driver enabled it.
 	 */
-	uint32_t ptype[HNS3_PTYPE_NUM] __rte_cache_min_aligned;
+	uint32_t ptype[HNS3_PTYPE_NUM] __rte_cache_aligned;
 };
 
 #define HNS3_FIXED_MAX_TQP_NUM_MODE		0
@@ -842,7 +842,7 @@ struct hns3_adapter {
 
 	uint64_t dev_caps_mask;
 
-	struct hns3_ptype_table ptype_tbl __rte_cache_min_aligned;
+	struct hns3_ptype_table ptype_tbl __rte_cache_aligned;
 };
 
 enum {
diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index 703c4b7..1e2e994 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -289,22 +289,14 @@ struct hns3_rx_bd_errors_stats {
 };
 
 struct hns3_rx_queue {
-	void *io_base;
 	volatile void *io_head_reg;
-	struct hns3_adapter *hns;
 	struct hns3_ptype_table *ptype_tbl;
 	struct rte_mempool *mb_pool;
 	struct hns3_desc *rx_ring;
-	uint64_t rx_ring_phys_addr; /* RX ring DMA address */
-	const struct rte_memzone *mz;
 	struct hns3_entry *sw_ring;
-	struct rte_mbuf *pkt_first_seg;
-	struct rte_mbuf *pkt_last_seg;
 
-	uint16_t queue_id;
 	uint16_t port_id;
 	uint16_t nb_rx_desc;
-	uint16_t rx_buf_len;
 	/*
 	 * threshold for the number of BDs waited to passed to hardware. If the
 	 * number exceeds the threshold, driver will pass these BDs to hardware.
@@ -318,8 +310,6 @@ struct hns3_rx_queue {
 	/* 4 if DEV_RX_OFFLOAD_KEEP_CRC offload set, 0 otherwise */
 	uint8_t crc_len;
 
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	bool configured;        /* indicate if rx queue has been configured */
 	/*
 	 * Indicate whether ignore the outer VLAN field in the Rx BD reported
 	 * by the Hardware. Because the outer VLAN is the PVID if the PVID is
@@ -331,23 +321,45 @@ struct hns3_rx_queue {
 	 * driver does not need to perform PVID-related operation in Rx. At this
 	 * point, the pvid_sw_discard_en will be false.
 	 */
-	bool pvid_sw_discard_en;
-	bool ptype_en;          /* indicate if the ptype field enabled */
-	bool enabled;           /* indicate if Rx queue has been enabled */
+	uint8_t pvid_sw_discard_en:1;
+	uint8_t ptype_en:1;          /* indicate if the ptype field enabled */
+
+	uint64_t mbuf_initializer; /* value to init mbufs used with vector rx */
+	/* offset_table: used for vector, to solve execute re-order problem */
+	uint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1];
+
+	uint16_t bulk_mbuf_num; /* indicate bulk_mbuf valid nums */
 
 	struct hns3_rx_basic_stats basic_stats;
+
+	struct rte_mbuf *pkt_first_seg;
+	struct rte_mbuf *pkt_last_seg;
+
+	struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM];
+
 	/* DFX statistics that driver does not need to discard packets */
 	struct hns3_rx_dfx_stats dfx_stats;
 	/* Error statistics that driver needs to discard packets */
 	struct hns3_rx_bd_errors_stats err_stats;
 
-	struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM];
-	uint16_t bulk_mbuf_num;
-
-	/* offset_table: used for vector, to solve execute re-order problem */
-	uint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1];
-	uint64_t mbuf_initializer; /* value to init mbufs used with vector rx */
 	struct rte_mbuf fake_mbuf; /* fake mbuf used with vector rx */
+
+
+	/*
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	void *io_base;
+	struct hns3_adapter *hns;
+	uint64_t rx_ring_phys_addr; /* RX ring DMA address */
+	const struct rte_memzone *mz;
+
+	uint16_t queue_id;
+	uint16_t rx_buf_len;
+
+	bool configured;        /* indicate if rx queue has been configured */
+	bool rx_deferred_start; /* don't start this queue in dev start */
+	bool enabled;           /* indicate if Rx queue has been enabled */
 };
 
 struct hns3_tx_basic_stats {
@@ -407,16 +419,10 @@ struct hns3_tx_dfx_stats {
 };
 
 struct hns3_tx_queue {
-	void *io_base;
 	volatile void *io_tail_reg;
-	struct hns3_adapter *hns;
 	struct hns3_desc *tx_ring;
-	uint64_t tx_ring_phys_addr; /* TX ring DMA address */
-	const struct rte_memzone *mz;
 	struct hns3_entry *sw_ring;
 
-	uint16_t queue_id;
-	uint16_t port_id;
 	uint16_t nb_tx_desc;
 	/*
 	 * index of next BD whose corresponding rte_mbuf can be released by
@@ -432,21 +438,12 @@ struct hns3_tx_queue {
 	uint16_t tx_free_thresh;
 
 	/*
-	 * For better performance in tx datapath, releasing mbuf in batches is
-	 * required.
-	 * Only checking the VLD bit of the last descriptor in a batch of the
-	 * thresh descriptors does not mean that these descriptors are all sent
-	 * by hardware successfully. So we need to check that the VLD bits of
-	 * all descriptors are cleared. and then free all mbufs in the batch.
-	 * - tx_rs_thresh
-	 *   Number of mbufs released at a time.
-	 *
-	 * - free
-	 *   Tx mbuf free array used for preserving temporarily address of mbuf
-	 *   released back to mempool, when releasing mbuf in batches.
+	 * The minimum length of the packet supported by hardware in the Tx
+	 * direction.
 	 */
-	uint16_t tx_rs_thresh;
-	struct rte_mbuf **free;
+	uint8_t min_tx_pkt_len;
+
+	uint8_t max_non_tso_bd_num; /* max BD number of one non-TSO packet */
 
 	/*
 	 * tso mode.
@@ -464,7 +461,7 @@ struct hns3_tx_queue {
 	 *     checksum of packets that need TSO, so network driver software
 	 *     not need to recalculate it.
 	 */
-	uint8_t tso_mode;
+	uint16_t tso_mode:1;
 	/*
 	 * udp checksum mode.
 	 * value range:
@@ -480,16 +477,10 @@ struct hns3_tx_queue {
 	 *     In this mode, HW does not have the preceding problems and can
 	 *     directly calculate the checksum of these UDP packets.
 	 */
-	uint8_t udp_cksum_mode;
-	/*
-	 * The minimum length of the packet supported by hardware in the Tx
-	 * direction.
-	 */
-	uint32_t min_tx_pkt_len;
+	uint16_t udp_cksum_mode:1;
 
-	uint8_t max_non_tso_bd_num; /* max BD number of one non-TSO packet */
-	bool tx_deferred_start; /* don't start this queue in dev start */
-	bool configured;        /* indicate if tx queue has been configured */
+	uint16_t simple_bd_enable:1;
+	uint16_t tx_push_enable:1;    /* check whether the tx push is enabled */
 	/*
 	 * Indicate whether add the vlan_tci of the mbuf to the inner VLAN field
 	 * of Tx BD. Because the outer VLAN will always be the PVID when the
@@ -502,11 +493,44 @@ struct hns3_tx_queue {
 	 * PVID-related operations in Tx. And pvid_sw_shift_en will be false at
 	 * this point.
 	 */
-	bool pvid_sw_shift_en;
-	bool enabled;           /* indicate if Tx queue has been enabled */
+	uint16_t pvid_sw_shift_en:1;
+
+	/*
+	 * For better performance in tx datapath, releasing mbuf in batches is
+	 * required.
+	 * Only checking the VLD bit of the last descriptor in a batch of the
+	 * thresh descriptors does not mean that these descriptors are all sent
+	 * by hardware successfully. So we need to check that the VLD bits of
+	 * all descriptors are cleared. and then free all mbufs in the batch.
+	 * - tx_rs_thresh
+	 *   Number of mbufs released at a time.
+	 *
+	 * - free
+	 *   Tx mbuf free array used for preserving temporarily address of mbuf
+	 *   released back to mempool, when releasing mbuf in batches.
+	 */
+	uint16_t tx_rs_thresh;
+	struct rte_mbuf **free;
 
 	struct hns3_tx_basic_stats basic_stats;
 	struct hns3_tx_dfx_stats dfx_stats;
+
+
+	/*
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	void *io_base;
+	struct hns3_adapter *hns;
+	uint64_t tx_ring_phys_addr; /* TX ring DMA address */
+	const struct rte_memzone *mz;
+
+	uint16_t port_id;
+	uint16_t queue_id;
+
+	bool configured;        /* indicate if tx queue has been configured */
+	bool tx_deferred_start; /* don't start this queue in dev start */
+	bool enabled;           /* indicate if Tx queue has been enabled */
 };
 
 #define HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 5/6] net/hns3: log FDIR configuration
  2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                     ` (3 preceding siblings ...)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 4/6] net/hns3: improve IO path data cache usage Min Hu (Connor)
@ 2021-04-30  6:28   ` Min Hu (Connor)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 6/6] net/hns3: fix vector Rx burst can't exceed 32 Min Hu (Connor)
  2021-05-04 16:03   ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Ferruh Yigit
  6 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-30  6:28 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

The rte flow interface does not support the API of the capability
set. Therefore, fdir configuration logs are added to facilitate
debugging.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_fdir.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/hns3/hns3_fdir.c b/drivers/net/hns3/hns3_fdir.c
index 87c1aef..0ef0938 100644
--- a/drivers/net/hns3/hns3_fdir.c
+++ b/drivers/net/hns3/hns3_fdir.c
@@ -336,6 +336,8 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 	    BIT(INNER_IP_PROTO) | BIT(INNER_IP_TOS) |
 	    BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
 	    BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
+	hns3_dbg(hw, "fdir tuple: inner<vlan_tag1 eth_type ip_src ip_dst "
+		  "ip_proto ip_tos l4_src_port l4_dst_port>");
 
 	/* If use max 400bit key, we can support tuples for ether type */
 	if (pf->fdir.fd_cfg.max_key_length == MAX_KEY_LENGTH) {
@@ -345,6 +347,9 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 		    BIT(OUTER_DST_PORT) | BIT(INNER_VLAN_TAG2) |
 		    BIT(OUTER_TUN_VNI) | BIT(OUTER_TUN_FLOW_ID) |
 		    BIT(OUTER_ETH_TYPE) | BIT(OUTER_IP_PROTO);
+		hns3_dbg(hw, "fdir tuple more: inner<dst_mac src_mac "
+			  "vlan_tag2 sctp_tag> outer<eth_type ip_proto "
+			  "l4_src_port l4_dst_port tun_vni tun_flow_id>");
 	}
 
 	/* roce_type is used to filter roce frames
@@ -352,6 +357,7 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 	 */
 	key_cfg->meta_data_active = BIT(DST_VPORT) | BIT(TUNNEL_PACKET) |
 	    BIT(VLAN_NUMBER);
+	hns3_dbg(hw, "fdir meta data: dst_vport tunnel_packet vlan_number");
 
 	ret = hns3_get_fd_allocation(hw,
 				     &pf->fdir.fd_cfg.rule_num[HNS3_FD_STAGE_1],
@@ -361,6 +367,13 @@ int hns3_init_fd_config(struct hns3_adapter *hns)
 	if (ret)
 		return ret;
 
+	hns3_dbg(hw, "fdir: stage1<rules-%u counters-%u> stage2<rules-%u "
+		  "counters=%u>",
+		  pf->fdir.fd_cfg.rule_num[HNS3_FD_STAGE_1],
+		  pf->fdir.fd_cfg.cnt_num[HNS3_FD_STAGE_1],
+		  pf->fdir.fd_cfg.rule_num[HNS3_FD_STAGE_2],
+		  pf->fdir.fd_cfg.cnt_num[HNS3_FD_STAGE_2]);
+
 	return hns3_set_fd_key_config(hns);
 }
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 6/6] net/hns3: fix vector Rx burst can't exceed 32
  2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                     ` (4 preceding siblings ...)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 5/6] net/hns3: log FDIR configuration Min Hu (Connor)
@ 2021-04-30  6:28   ` Min Hu (Connor)
  2021-05-04 16:03   ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Ferruh Yigit
  6 siblings, 0 replies; 31+ messages in thread
From: Min Hu (Connor) @ 2021-04-30  6:28 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

From: Chengwen Feng <fengchengwen@huawei.com>

Currently, driver uses the macro HNS3_DEFAULT_RX_BURST whose value is
32 to limit the vector Rx burst size, as a result, the burst size
can't exceed 32.

This patch fixes this problem by support big burst size.
Also adjust HNS3_DEFAULT_RX_BURST to 64 as it performs better than 32.

Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")
Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_rxtx.h         |  2 +-
 drivers/net/hns3/hns3_rxtx_vec.c     | 36 ++++++++++++++++++++++++++++--------
 drivers/net/hns3/hns3_rxtx_vec.h     |  3 +++
 drivers/net/hns3/hns3_rxtx_vec_sve.c | 32 ++++++++++++++++++++++++++------
 4 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index 1e2e994..ba24e00 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -20,7 +20,7 @@
 #define HNS3_DEFAULT_TX_RS_THRESH	32
 #define HNS3_TX_FAST_FREE_AHEAD		64
 
-#define HNS3_DEFAULT_RX_BURST		32
+#define HNS3_DEFAULT_RX_BURST		64
 #if (HNS3_DEFAULT_RX_BURST > 64)
 #error "PMD HNS3: HNS3_DEFAULT_RX_BURST must <= 64\n"
 #endif
diff --git a/drivers/net/hns3/hns3_rxtx_vec.c b/drivers/net/hns3/hns3_rxtx_vec.c
index dc1e1ae..cc8b970 100644
--- a/drivers/net/hns3/hns3_rxtx_vec.c
+++ b/drivers/net/hns3/hns3_rxtx_vec.c
@@ -108,14 +108,13 @@ hns3_recv_pkts_vec(void *__restrict rx_queue,
 {
 	struct hns3_rx_queue *rxq = rx_queue;
 	struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
-	uint64_t bd_err_mask;  /* bit mask indicate whick pkts is error */
+	uint64_t pkt_err_mask;  /* bit mask indicate whick pkts is error */
 	uint16_t nb_rx;
 
-	nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
-
 	rte_prefetch_non_temporal(rxdp);
 
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
+
 	if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
 		hns3_rxq_rearm_mbuf(rxq);
 
@@ -128,10 +127,31 @@ hns3_recv_pkts_vec(void *__restrict rx_queue,
 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf);
 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf);
 
-	bd_err_mask = 0;
-	nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, &bd_err_mask);
-	if (unlikely(bd_err_mask))
-		nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask);
+	if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) {
+		pkt_err_mask = 0;
+		nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts,
+					    &pkt_err_mask);
+		nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, pkt_err_mask);
+		return nb_rx;
+	}
+
+	nb_rx = 0;
+	while (nb_pkts > 0) {
+		uint16_t ret, n;
+
+		n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
+		pkt_err_mask = 0;
+		ret = hns3_recv_burst_vec(rxq, &rx_pkts[nb_rx], n,
+					  &pkt_err_mask);
+		nb_pkts -= ret;
+		nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret,
+						 pkt_err_mask);
+		if (ret < n)
+			break;
+
+		if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
+			hns3_rxq_rearm_mbuf(rxq);
+	}
 
 	return nb_rx;
 }
diff --git a/drivers/net/hns3/hns3_rxtx_vec.h b/drivers/net/hns3/hns3_rxtx_vec.h
index 2baf085..67c75e4 100644
--- a/drivers/net/hns3/hns3_rxtx_vec.h
+++ b/drivers/net/hns3/hns3_rxtx_vec.h
@@ -71,6 +71,9 @@ hns3_rx_reassemble_pkts(struct rte_mbuf **rx_pkts,
 	uint16_t count, i;
 	uint64_t mask;
 
+	if (likely(pkt_err_mask == 0))
+		return nb_pkts;
+
 	count = 0;
 	for (i = 0; i < nb_pkts; i++) {
 		mask = ((uint64_t)1u) << i;
diff --git a/drivers/net/hns3/hns3_rxtx_vec_sve.c b/drivers/net/hns3/hns3_rxtx_vec_sve.c
index ef6c875..bf7f704 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_sve.c
+++ b/drivers/net/hns3/hns3_rxtx_vec_sve.c
@@ -292,12 +292,11 @@ hns3_recv_pkts_vec_sve(void *__restrict rx_queue,
 {
 	struct hns3_rx_queue *rxq = rx_queue;
 	struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
-	uint64_t bd_err_mask;  /* bit mask indicate whick pkts is error */
+	uint64_t pkt_err_mask;  /* bit mask indicate whick pkts is error */
 	uint16_t nb_rx;
 
 	rte_prefetch_non_temporal(rxdp);
 
-	nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_SVE_DEFAULT_DESCS_PER_LOOP);
 
 	if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
@@ -309,10 +308,31 @@ hns3_recv_pkts_vec_sve(void *__restrict rx_queue,
 
 	hns3_rx_prefetch_mbuf_sve(&rxq->sw_ring[rxq->next_to_use]);
 
-	bd_err_mask = 0;
-	nb_rx = hns3_recv_burst_vec_sve(rxq, rx_pkts, nb_pkts, &bd_err_mask);
-	if (unlikely(bd_err_mask))
-		nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask);
+	if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) {
+		pkt_err_mask = 0;
+		nb_rx = hns3_recv_burst_vec_sve(rxq, rx_pkts, nb_pkts,
+						&pkt_err_mask);
+		nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, pkt_err_mask);
+		return nb_rx;
+	}
+
+	nb_rx = 0;
+	while (nb_pkts > 0) {
+		uint16_t ret, n;
+
+		n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
+		pkt_err_mask = 0;
+		ret = hns3_recv_burst_vec_sve(rxq, &rx_pkts[nb_rx], n,
+					      &pkt_err_mask);
+		nb_pkts -= ret;
+		nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret,
+						 pkt_err_mask);
+		if (ret < n)
+			break;
+
+		if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
+			hns3_rxq_rearm_mbuf_sve(rxq);
+	}
 
 	return nb_rx;
 }
-- 
2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD
  2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
                     ` (5 preceding siblings ...)
  2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 6/6] net/hns3: fix vector Rx burst can't exceed 32 Min Hu (Connor)
@ 2021-05-04 16:03   ` Ferruh Yigit
  6 siblings, 0 replies; 31+ messages in thread
From: Ferruh Yigit @ 2021-05-04 16:03 UTC (permalink / raw)
  To: Min Hu (Connor), dev

On 4/30/2021 7:28 AM, Min Hu (Connor) wrote:
> This patch set contains one patch for performance optimization,
> this is: 'net/hns3: improve IO path data cache usage'.
> The others are bugfixes for hns3 PMD.
> 
> Chengwen Feng (6):
>   net/hns3: delete some unused capabilities
>   net/hns3: modify write reg opt API impl
>   net/hns3: use RTE DIM instead of ARRAY SIZE
>   net/hns3: improve IO path data cache usage
>   net/hns3: log FDIR configuration
>   net/hns3: fix vector Rx burst can't exceed 32

Series applied to dpdk-next-net/main, thanks.

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2021-05-04 16:04 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-26  3:34 [dpdk-dev] [PATCH 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
2021-04-26  3:34 ` [dpdk-dev] [PATCH 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
2021-04-27 13:37   ` Ferruh Yigit
2021-04-27 14:26     ` Fengchengwen
2021-04-27 14:30     ` Ferruh Yigit
2021-04-26  3:34 ` [dpdk-dev] [PATCH 2/6] net/hns3: modify write reg opt API impl Min Hu (Connor)
2021-04-26  3:34 ` [dpdk-dev] [PATCH 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE Min Hu (Connor)
2021-04-26  3:34 ` [dpdk-dev] [PATCH 4/6] net/hns3: improve IO path data cache usage Min Hu (Connor)
2021-04-26  3:34 ` [dpdk-dev] [PATCH 5/6] net/hns3: log fdir configuration Min Hu (Connor)
2021-04-27 13:39   ` Ferruh Yigit
2021-04-27 14:15     ` Fengchengwen
2021-04-27 14:25     ` Ferruh Yigit
2021-04-27 14:29       ` Fengchengwen
2021-04-26  3:34 ` [dpdk-dev] [PATCH 6/6] net/hns3: fix vector Rx burst default value Min Hu (Connor)
2021-04-27 13:46   ` Ferruh Yigit
2021-04-27 14:34     ` Fengchengwen
2021-04-28  9:53 ` [dpdk-dev] [PATCH v2 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 2/6] net/hns3: modify write reg opt API impl Min Hu (Connor)
2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE Min Hu (Connor)
2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 4/6] net/hns3: improve IO path data cache usage Min Hu (Connor)
2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 5/6] net/hns3: log FDIR configuration Min Hu (Connor)
2021-04-28  9:53   ` [dpdk-dev] [PATCH v2 6/6] net/hns3: fix vector Rx burst can't exceed 32 Min Hu (Connor)
2021-04-30  6:28 ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Min Hu (Connor)
2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 1/6] net/hns3: delete some unused capabilities Min Hu (Connor)
2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 2/6] net/hns3: modify write reg opt API impl Min Hu (Connor)
2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 3/6] net/hns3: use RTE DIM instead of ARRAY SIZE Min Hu (Connor)
2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 4/6] net/hns3: improve IO path data cache usage Min Hu (Connor)
2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 5/6] net/hns3: log FDIR configuration Min Hu (Connor)
2021-04-30  6:28   ` [dpdk-dev] [PATCH v3 6/6] net/hns3: fix vector Rx burst can't exceed 32 Min Hu (Connor)
2021-05-04 16:03   ` [dpdk-dev] [PATCH v3 0/6] optimization and bugfix for hns3 PMD Ferruh Yigit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).