DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v1 2/2] net/zxdh: provided zxdh basic init
@ 2024-08-28  7:25 Junlong Wang
  0 siblings, 0 replies; only message in thread
From: Junlong Wang @ 2024-08-28  7:25 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Junlong Wang


[-- Attachment #1.1.1: Type: text/plain, Size: 85595 bytes --]

v1: zxdh basic init function code.

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/meson.build   |    3 +
 drivers/net/zxdh/zxdh_common.c |   59 ++
 drivers/net/zxdh/zxdh_common.h |   32 +
 drivers/net/zxdh/zxdh_ethdev.c | 1310 ++++++++++++++++++++++++++++++++
 drivers/net/zxdh/zxdh_ethdev.h |  203 +++++
 drivers/net/zxdh/zxdh_pci.c    |  462 +++++++++++
 drivers/net/zxdh/zxdh_pci.h    |  259 +++++++
 drivers/net/zxdh/zxdh_queue.c  |  138 ++++
 drivers/net/zxdh/zxdh_queue.h  |   85 +++
 drivers/net/zxdh/zxdh_ring.h   |   87 +++
 drivers/net/zxdh/zxdh_rxtx.h   |   48 ++
 11 files changed, 2686 insertions(+)
 create mode 100644 drivers/net/zxdh/zxdh_common.c
 create mode 100644 drivers/net/zxdh/zxdh_common.h
 create mode 100644 drivers/net/zxdh/zxdh_ethdev.h
 create mode 100644 drivers/net/zxdh/zxdh_pci.c
 create mode 100644 drivers/net/zxdh/zxdh_pci.h
 create mode 100644 drivers/net/zxdh/zxdh_queue.c
 create mode 100644 drivers/net/zxdh/zxdh_queue.h
 create mode 100644 drivers/net/zxdh/zxdh_ring.h
 create mode 100644 drivers/net/zxdh/zxdh_rxtx.h

diff --git a/drivers/net/zxdh/meson.build b/drivers/net/zxdh/meson.build
index 217d8920cd..0810073e09 100644
--- a/drivers/net/zxdh/meson.build
+++ b/drivers/net/zxdh/meson.build
@@ -9,6 +9,9 @@ endif
 
 sources = files(
 	'zxdh_ethdev.c',
+	'zxdh_common.c',
+	'zxdh_pci.c',
 	'zxdh_msg.c',
+	'zxdh_queue.c',
 	'zxdh_npsdk.c',
 	)
diff --git a/drivers/net/zxdh/zxdh_common.c b/drivers/net/zxdh/zxdh_common.c
new file mode 100644
index 0000000000..55497f8a24
--- /dev/null
+++ b/drivers/net/zxdh/zxdh_common.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 ZTE Corporation
+ */
+
+#include <stdint.h>
+#include <ethdev_driver.h>
+
+#include "zxdh_ethdev.h"
+#include "zxdh_common.h"
+
+uint32_t zxdh_read_bar_reg(struct rte_eth_dev *dev, uint32_t bar, uint32_t reg)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint64_t baseaddr = (uint64_t)(hw->bar_addr[bar]);
+	uint32_t val      = *((volatile uint32_t *)(baseaddr + reg));
+	return val;
+}
+
+void zxdh_write_bar_reg(struct rte_eth_dev *dev, uint32_t bar, uint32_t reg, uint32_t val)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint64_t baseaddr = (uint64_t)(hw->bar_addr[bar]);
+	*((volatile uint32_t *)(baseaddr + reg)) = val;
+}
+
+int32_t zxdh_acquire_lock(struct zxdh_hw *hw)
+{
+	uint32_t var = zxdh_read_comm_reg((uint64_t)hw->common_cfg, ZXDH_VF_LOCK_REG);
+
+	/* check whether lock is used */
+	if (!(var & ZXDH_VF_LOCK_ENABLE_MASK))
+		return -1;
+
+	return 0;
+}
+
+int32_t zxdh_release_lock(struct zxdh_hw *hw)
+{
+	uint32_t var = zxdh_read_comm_reg((uint64_t)hw->common_cfg, ZXDH_VF_LOCK_REG);
+
+	if (var & ZXDH_VF_LOCK_ENABLE_MASK) {
+		var &= ~ZXDH_VF_LOCK_ENABLE_MASK;
+		zxdh_write_comm_reg((uint64_t)hw->common_cfg, ZXDH_VF_LOCK_REG, var);
+		return 0;
+	}
+
+	return -1;
+}
+
+uint32_t zxdh_read_comm_reg(uint64_t pci_comm_cfg_baseaddr, uint32_t reg)
+{
+	uint32_t val = *((volatile uint32_t *)(pci_comm_cfg_baseaddr + reg));
+	return val;
+}
+
+void zxdh_write_comm_reg(uint64_t pci_comm_cfg_baseaddr, uint32_t reg, uint32_t val)
+{
+	*((volatile uint32_t *)(pci_comm_cfg_baseaddr + reg)) = val;
+}
diff --git a/drivers/net/zxdh/zxdh_common.h b/drivers/net/zxdh/zxdh_common.h
new file mode 100644
index 0000000000..912eb9ad42
--- /dev/null
+++ b/drivers/net/zxdh/zxdh_common.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 ZTE Corporation
+ */
+
+#ifndef _ZXDH_COMMON_H_
+#define _ZXDH_COMMON_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_ethdev.h>
+
+#include "zxdh_ethdev.h"
+
+#define ZXDH_VF_LOCK_ENABLE_MASK      0x1
+#define ZXDH_ACQUIRE_CHANNEL_NUM_MAX   10
+#define ZXDH_VF_LOCK_REG             0x90
+
+uint32_t zxdh_read_bar_reg(struct rte_eth_dev *dev, uint32_t bar, uint32_t reg);
+void zxdh_write_bar_reg(struct rte_eth_dev *dev, uint32_t bar, uint32_t reg, uint32_t val);
+int32_t zxdh_release_lock(struct zxdh_hw *hw);
+int32_t zxdh_acquire_lock(struct zxdh_hw *hw);
+uint32_t zxdh_read_comm_reg(uint64_t pci_comm_cfg_baseaddr, uint32_t reg);
+void zxdh_write_comm_reg(uint64_t pci_comm_cfg_baseaddr, uint32_t reg, uint32_t val);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZXDH_COMMON_H_ */
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index a3c05f9809..425a818109 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -10,6 +10,1316 @@
 #include <rte_kvargs.h>
 #include <rte_hexdump.h>
 
+struct zxdh_hw_internal zxdh_hw_internal[RTE_MAX_ETHPORTS];
+struct zxdh_shared_data *zxdh_shared_data;
+const char *MZ_ZXDH_PMD_SHARED_DATA = "zxdh_pmd_shared_data";
+rte_spinlock_t zxdh_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+struct zxdh_dtb_shared_data g_dtb_data = {0};
+
+#define ZXDH_PMD_DEFAULT_HOST_FEATURES   \
+	(1ULL << ZXDH_NET_F_MRG_RXBUF | \
+	 1ULL << ZXDH_NET_F_STATUS    | \
+	 1ULL << ZXDH_NET_F_MQ        | \
+	 1ULL << ZXDH_F_ANY_LAYOUT    | \
+	 1ULL << ZXDH_F_VERSION_1   | \
+	 1ULL << ZXDH_F_RING_PACKED | \
+	 1ULL << ZXDH_F_IN_ORDER    | \
+	 1ULL << ZXDH_F_ORDER_PLATFORM | \
+	 1ULL << ZXDH_F_NOTIFICATION_DATA |\
+	 1ULL << ZXDH_NET_F_MAC | \
+	 1ULL << ZXDH_NET_F_CSUM |\
+	 1ULL << ZXDH_NET_F_GUEST_CSUM |\
+	 1ULL << ZXDH_NET_F_GUEST_TSO4 |\
+	 1ULL << ZXDH_NET_F_GUEST_TSO6 |\
+	 1ULL << ZXDH_NET_F_HOST_TSO4 |\
+	 1ULL << ZXDH_NET_F_HOST_TSO6 |\
+	 1ULL << ZXDH_NET_F_GUEST_UFO |\
+	 1ULL << ZXDH_NET_F_HOST_UFO)
+
+#define ZXDH_PMD_DEFAULT_GUEST_FEATURES   \
+	(1ULL << ZXDH_NET_F_MRG_RXBUF | \
+	 1ULL << ZXDH_NET_F_STATUS    | \
+	 1ULL << ZXDH_NET_F_MQ        | \
+	 1ULL << ZXDH_F_ANY_LAYOUT    | \
+	 1ULL << ZXDH_F_VERSION_1     | \
+	 1ULL << ZXDH_F_RING_PACKED   | \
+	 1ULL << ZXDH_F_IN_ORDER      | \
+	 1ULL << ZXDH_F_NOTIFICATION_DATA | \
+	 1ULL << ZXDH_NET_F_MAC)
+
+#define ZXDH_RX_QUEUES_MAX  128U
+#define ZXDH_TX_QUEUES_MAX  128U
+
+static unsigned int
+log2above(unsigned int v)
+{
+	unsigned int l;
+	unsigned int r;
+
+	for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
+		r |= (v & 1);
+	return l + r;
+}
+
+static uint16_t zxdh_queue_desc_pre_setup(uint16_t desc)
+{
+	uint32_t nb_desc = desc;
+
+	if (desc < ZXDH_MIN_QUEUE_DEPTH) {
+		PMD_RX_LOG(WARNING,
+			"nb_desc(%u) increased number of descriptors to the min queue depth (%u)",
+			desc, ZXDH_MIN_QUEUE_DEPTH);
+		return ZXDH_MIN_QUEUE_DEPTH;
+	}
+
+	if (desc > ZXDH_MAX_QUEUE_DEPTH) {
+		PMD_RX_LOG(WARNING,
+			"nb_desc(%u) can't be greater than max_rxds (%d), turn to max queue depth",
+			desc, ZXDH_MAX_QUEUE_DEPTH);
+		return ZXDH_MAX_QUEUE_DEPTH;
+	}
+
+	if (!rte_is_power_of_2(desc)) {
+		nb_desc = 1 << log2above(desc);
+		if (nb_desc > ZXDH_MAX_QUEUE_DEPTH)
+			nb_desc = ZXDH_MAX_QUEUE_DEPTH;
+
+		PMD_RX_LOG(WARNING,
+			"nb_desc(%u) increased number of descriptors to the next power of two (%d)",
+			desc, nb_desc);
+	}
+
+	return nb_desc;
+}
+
+static int32_t hw_q_depth_handler(const char *key __rte_unused,
+				const char *value, void *ret_val)
+{
+	uint16_t val = 0;
+	struct zxdh_hw *hw = ret_val;
+
+	val = strtoul(value, NULL, 0);
+	uint16_t q_depth = zxdh_queue_desc_pre_setup(val);
+
+	hw->q_depth = q_depth;
+	return 0;
+}
+
+static int32_t zxdh_dev_devargs_parse(struct rte_devargs *devargs, struct zxdh_hw *hw)
+{
+	struct rte_kvargs *kvlist = NULL;
+	int32_t ret = 0;
+
+	if (devargs == NULL)
+		return 0;
+
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (kvlist == NULL) {
+		PMD_INIT_LOG(ERR, "error when parsing param");
+		return 0;
+	}
+
+	ret = rte_kvargs_process(kvlist, "q_depth", hw_q_depth_handler, hw);
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "Failed to parse q_depth");
+		goto exit;
+	}
+	if (!hw->q_depth)
+		hw->q_depth = ZXDH_MIN_QUEUE_DEPTH;
+
+exit:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
+static int zxdh_init_shared_data(void)
+{
+	const struct rte_memzone *mz;
+	int ret = 0;
+
+	rte_spinlock_lock(&zxdh_shared_data_lock);
+	if (zxdh_shared_data == NULL) {
+		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			/* Allocate shared memory. */
+			mz = rte_memzone_reserve(MZ_ZXDH_PMD_SHARED_DATA,
+					sizeof(*zxdh_shared_data), SOCKET_ID_ANY, 0);
+			if (mz == NULL) {
+				PMD_INIT_LOG(ERR, "Cannot allocate zxdh shared data");
+				ret = -rte_errno;
+				goto error;
+			}
+			zxdh_shared_data = mz->addr;
+			memset(zxdh_shared_data, 0, sizeof(*zxdh_shared_data));
+			rte_spinlock_init(&zxdh_shared_data->lock);
+		} else { /* Lookup allocated shared memory. */
+			mz = rte_memzone_lookup(MZ_ZXDH_PMD_SHARED_DATA);
+			if (mz == NULL) {
+				PMD_INIT_LOG(ERR, "Cannot attach zxdh shared data");
+				ret = -rte_errno;
+				goto error;
+			}
+			zxdh_shared_data = mz->addr;
+		}
+	}
+
+error:
+	rte_spinlock_unlock(&zxdh_shared_data_lock);
+	return ret;
+}
+
+static int zxdh_init_once(struct rte_eth_dev *eth_dev)
+{
+	PMD_INIT_LOG(DEBUG, "port 0x%x init...", eth_dev->data->port_id);
+	if (zxdh_init_shared_data())
+		return -rte_errno;
+
+	struct zxdh_shared_data *sd = zxdh_shared_data;
+	int ret = 0;
+
+	rte_spinlock_lock(&sd->lock);
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		if (!sd->init_done) {
+			++sd->secondary_cnt;
+			sd->init_done = true;
+		}
+		goto out;
+	}
+
+	sd->dev_refcnt++;
+out:
+	rte_spinlock_unlock(&sd->lock);
+	return ret;
+}
+
+static int32_t zxdh_get_pci_dev_config(struct zxdh_hw *hw)
+{
+	hw->host_features = zxdh_vtpci_get_features(hw);
+	hw->host_features = ZXDH_PMD_DEFAULT_HOST_FEATURES;
+
+	uint64_t guest_features = (uint64_t)ZXDH_PMD_DEFAULT_GUEST_FEATURES;
+	uint64_t nego_features = guest_features & hw->host_features;
+
+	hw->guest_features = nego_features;
+
+	if (hw->guest_features & (1ULL << ZXDH_NET_F_MAC)) {
+		zxdh_vtpci_read_dev_config(hw, offsetof(struct zxdh_net_config, mac),
+				&hw->mac_addr, RTE_ETHER_ADDR_LEN);
+		PMD_INIT_LOG(DEBUG, "get dev mac: %02X:%02X:%02X:%02X:%02X:%02X",
+				hw->mac_addr[0], hw->mac_addr[1],
+				hw->mac_addr[2], hw->mac_addr[3],
+				hw->mac_addr[4], hw->mac_addr[5]);
+	} else {
+		rte_eth_random_addr(&hw->mac_addr[0]);
+		PMD_INIT_LOG(DEBUG, "random dev mac: %02X:%02X:%02X:%02X:%02X:%02X",
+				hw->mac_addr[0], hw->mac_addr[1],
+				hw->mac_addr[2], hw->mac_addr[3],
+				hw->mac_addr[4], hw->mac_addr[5]);
+	}
+	uint32_t max_queue_pairs;
+
+	zxdh_vtpci_read_dev_config(hw, offsetof(struct zxdh_net_config, max_virtqueue_pairs),
+			&max_queue_pairs, sizeof(max_queue_pairs));
+	PMD_INIT_LOG(DEBUG, "get max queue pairs %u", max_queue_pairs);
+	if (max_queue_pairs == 0)
+		hw->max_queue_pairs = ZXDH_RX_QUEUES_MAX;
+	else
+		hw->max_queue_pairs = RTE_MIN(ZXDH_RX_QUEUES_MAX, max_queue_pairs);
+
+	PMD_INIT_LOG(DEBUG, "set max queue pairs %d", hw->max_queue_pairs);
+
+	hw->weak_barriers = !vtpci_with_feature(hw, ZXDH_F_ORDER_PLATFORM);
+	return 0;
+}
+
+static void zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint16_t nr_vq = hw->queue_num;
+	uint32_t i, mbuf_num = 0;
+
+	const char *type __rte_unused;
+	struct virtqueue *vq = NULL;
+	struct rte_mbuf *buf = NULL;
+	int32_t queue_type = 0;
+
+	if (hw->vqs == NULL)
+		return;
+
+	for (i = 0; i < nr_vq; i++) {
+		vq = hw->vqs[i];
+		if (!vq)
+			continue;
+
+		queue_type = get_queue_type(i);
+		if (queue_type == VTNET_RQ)
+			type = "rxq";
+		else if (queue_type == VTNET_TQ)
+			type = "txq";
+		else
+			continue;
+
+		PMD_INIT_LOG(DEBUG, "Before freeing %s[%d] used and unused buf", type, i);
+
+		while ((buf = zxdh_virtqueue_detach_unused(vq)) != NULL) {
+			rte_pktmbuf_free(buf);
+			mbuf_num++;
+		}
+
+		PMD_INIT_LOG(DEBUG, "After freeing %s[%d] used and unused buf", type, i);
+	}
+
+	PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
+}
+
+static int32_t zxdh_init_device(struct rte_eth_dev *eth_dev)
+{
+	struct zxdh_hw *hw = eth_dev->data->dev_private;
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+	int ret = zxdh_read_pci_caps(pci_dev, hw);
+
+	if (ret) {
+		PMD_INIT_LOG(ERR, "port 0x%x pci caps read failed .", hw->vport.vport);
+		goto err;
+	}
+	zxdh_hw_internal[hw->port_id].vtpci_ops = &zxdh_modern_ops;
+	zxdh_vtpci_reset(hw);
+	zxdh_get_pci_dev_config(hw);
+	if (hw->vqs) { /* not reachable? */
+		zxdh_dev_free_mbufs(eth_dev);
+		ret = zxdh_free_queues(eth_dev);
+		if (ret < 0) {
+			PMD_INIT_LOG(ERR, "port 0x%x free queue failed.", hw->vport.vport);
+			goto err;
+		}
+	}
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
+	hw->speed = RTE_ETH_SPEED_NUM_UNKNOWN;
+	hw->duplex = RTE_ETH_LINK_FULL_DUPLEX;
+
+	rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr, &eth_dev->data->mac_addrs[0]);
+	PMD_INIT_LOG(DEBUG, "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
+		eth_dev->data->mac_addrs->addr_bytes[0],
+		eth_dev->data->mac_addrs->addr_bytes[1],
+		eth_dev->data->mac_addrs->addr_bytes[2],
+		eth_dev->data->mac_addrs->addr_bytes[3],
+		eth_dev->data->mac_addrs->addr_bytes[4],
+		eth_dev->data->mac_addrs->addr_bytes[5]);
+	/* If host does not support both status and MSI-X then disable LSC */
+	if (vtpci_with_feature(hw, ZXDH_NET_F_STATUS) && (hw->use_msix != ZXDH_MSIX_NONE)) {
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+		PMD_INIT_LOG(DEBUG, "LSC enable");
+	} else {
+		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
+	}
+	return 0;
+
+err:
+	PMD_INIT_LOG(ERR, "port %d init device failed", eth_dev->data->port_id);
+	return ret;
+}
+
+
+static void zxdh_queues_unbind_intr(struct rte_eth_dev *dev)
+{
+	PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
+	struct zxdh_hw *hw = dev->data->dev_private;
+	int32_t i;
+
+	for (i = 0; i < dev->data->nb_rx_queues; ++i) {
+		VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], ZXDH_MSI_NO_VECTOR);
+		VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2 + 1], ZXDH_MSI_NO_VECTOR);
+	}
+}
+
+static int32_t zxdh_intr_unmask(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	if (rte_intr_ack(dev->intr_handle) < 0)
+		return -1;
+
+	hw->use_msix = zxdh_vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
+
+	return 0;
+}
+
+
+static void zxdh_devconf_intr_handler(void *param)
+{
+	struct rte_eth_dev *dev = param;
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint16_t status = 0;
+	/* Read interrupt status which clears interrupt */
+	uint8_t isr = zxdh_vtpci_isr(hw);
+
+	if (zxdh_intr_unmask(dev) < 0)
+		PMD_DRV_LOG(ERR, "interrupt enable failed");
+	if (isr & ZXDH_PCI_ISR_CONFIG) {
+		/** todo provided later
+		 * if (zxdh_dev_link_update(dev, 0) == 0)
+		 * rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
+		 */
+
+		if (vtpci_with_feature(hw, ZXDH_NET_F_STATUS)) {
+			zxdh_vtpci_read_dev_config(hw, offsetof(struct zxdh_net_config, status),
+					&status, sizeof(status));
+			if (status & ZXDH_NET_S_ANNOUNCE)
+				zxdh_notify_peers(dev);
+		}
+	}
+}
+
+/* Interrupt handler triggered by NIC for handling specific interrupt. */
+static void zxdh_frompfvf_intr_handler(void *param)
+{
+	struct rte_eth_dev *dev = param;
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint64_t virt_addr = 0;
+
+	virt_addr = (uint64_t)(hw->bar_addr[ZXDH_BAR0_INDEX] + ZXDH_MSG_CHAN_PFVFSHARE_OFFSET);
+	if (hw->is_pf) {
+		PMD_INIT_LOG(INFO, "zxdh_pf2vf_intr_handler  PF ");
+		zxdh_bar_irq_recv(MSG_CHAN_END_VF, MSG_CHAN_END_PF, virt_addr, dev);
+	} else {
+		PMD_INIT_LOG(INFO, "zxdh_pf2vf_intr_handler  VF ");
+		zxdh_bar_irq_recv(MSG_CHAN_END_PF, MSG_CHAN_END_VF, virt_addr, dev);
+	}
+}
+
+/* Interrupt handler triggered by NIC for handling specific interrupt. */
+static void zxdh_fromriscv_intr_handler(void *param)
+{
+	struct rte_eth_dev *dev = param;
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint64_t virt_addr = 0;
+
+	virt_addr = (uint64_t)(hw->bar_addr[ZXDH_BAR0_INDEX] + ZXDH_CTRLCH_OFFSET);
+	if (hw->is_pf) {
+		PMD_INIT_LOG(INFO, "zxdh_risc2pf_intr_handler  PF ");
+		zxdh_bar_irq_recv(MSG_CHAN_END_RISC, MSG_CHAN_END_PF, virt_addr, dev);
+	} else {
+		PMD_INIT_LOG(INFO, "zxdh_riscvf_intr_handler  VF ");
+		zxdh_bar_irq_recv(MSG_CHAN_END_RISC, MSG_CHAN_END_VF, virt_addr, dev);
+	}
+}
+
+static void zxdh_intr_cb_unreg(struct rte_eth_dev *dev)
+{
+	PMD_INIT_LOG(ERR, "");
+	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
+		rte_intr_callback_unregister(dev->intr_handle, zxdh_devconf_intr_handler, dev);
+
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	/* register callback to update dev config intr */
+	rte_intr_callback_unregister(dev->intr_handle, zxdh_devconf_intr_handler, dev);
+	/* Register rsic_v to pf interrupt callback */
+	struct rte_intr_handle *tmp = hw->risc_intr +
+			(MSIX_FROM_PFVF - ZXDH_MSIX_INTR_MSG_VEC_BASE);
+
+	rte_intr_callback_unregister(tmp, zxdh_frompfvf_intr_handler, dev);
+	tmp = hw->risc_intr + (MSIX_FROM_RISCV - ZXDH_MSIX_INTR_MSG_VEC_BASE);
+	rte_intr_callback_unregister(tmp, zxdh_fromriscv_intr_handler, dev);
+}
+
+static int32_t zxdh_intr_disable(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	if (!hw->intr_enabled)
+		return 0;
+
+	zxdh_intr_cb_unreg(dev);
+	if (rte_intr_disable(dev->intr_handle) < 0)
+		return -1;
+
+	hw->intr_enabled = 0;
+	return 0;
+}
+
+static int32_t zxdh_intr_release(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
+		VTPCI_OPS(hw)->set_config_irq(hw, ZXDH_MSI_NO_VECTOR);
+
+	zxdh_queues_unbind_intr(dev);
+	zxdh_intr_disable(dev);
+
+	rte_intr_efd_disable(dev->intr_handle);
+	rte_intr_vec_list_free(dev->intr_handle);
+	rte_free(hw->risc_intr);
+	hw->risc_intr = NULL;
+	rte_free(hw->dtb_intr);
+	hw->dtb_intr = NULL;
+	return 0;
+}
+
+static int32_t zxdh_setup_risc_interrupts(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint8_t i;
+
+	if (!hw->risc_intr) {
+		PMD_INIT_LOG(ERR, " to allocate risc_intr");
+		hw->risc_intr = rte_zmalloc("risc_intr",
+			ZXDH_MSIX_INTR_MSG_VEC_NUM * sizeof(struct rte_intr_handle), 0);
+		if (hw->risc_intr == NULL) {
+			PMD_INIT_LOG(ERR, "Failed to allocate risc_intr");
+			return -ENOMEM;
+		}
+	}
+
+	for (i = 0; i < ZXDH_MSIX_INTR_MSG_VEC_NUM; i++) {
+		if (dev->intr_handle->efds[i] < 0) {
+			PMD_INIT_LOG(ERR, "[%u]risc interrupt fd is invalid", i);
+			rte_free(hw->risc_intr);
+			hw->risc_intr = NULL;
+			return -1;
+		}
+
+		struct rte_intr_handle *intr_handle = hw->risc_intr + i;
+
+		intr_handle->fd = dev->intr_handle->efds[i];
+		intr_handle->type = dev->intr_handle->type;
+	}
+
+	return 0;
+}
+
+static int32_t zxdh_setup_dtb_interrupts(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	if (!hw->dtb_intr) {
+		hw->dtb_intr = rte_zmalloc("dtb_intr", sizeof(struct rte_intr_handle), 0);
+		if (hw->dtb_intr == NULL) {
+			PMD_INIT_LOG(ERR, "Failed to allocate dtb_intr");
+			return -ENOMEM;
+		}
+	}
+
+	if (dev->intr_handle->efds[ZXDH_MSIX_INTR_DTB_VEC - 1] < 0) {
+		PMD_INIT_LOG(ERR, "[%d]dtb interrupt fd is invalid", ZXDH_MSIX_INTR_DTB_VEC - 1);
+		rte_free(hw->dtb_intr);
+		hw->dtb_intr = NULL;
+		return -1;
+	}
+	hw->dtb_intr->fd = dev->intr_handle->efds[ZXDH_MSIX_INTR_DTB_VEC - 1];
+	hw->dtb_intr->type = dev->intr_handle->type;
+	return 0;
+}
+
+static int32_t zxdh_queues_bind_intr(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	int32_t i;
+	uint16_t vec;
+
+	if (!dev->data->dev_conf.intr_conf.rxq) {
+		PMD_INIT_LOG(INFO, "queue/interrupt mask, nb_rx_queues %u",
+				dev->data->nb_rx_queues);
+		for (i = 0; i < dev->data->nb_rx_queues; ++i) {
+			vec = VTPCI_OPS(hw)->set_queue_irq(hw,
+					hw->vqs[i * 2], ZXDH_MSI_NO_VECTOR);
+			PMD_INIT_LOG(INFO, "vq%d irq set 0x%x, get 0x%x",
+					i * 2, ZXDH_MSI_NO_VECTOR, vec);
+		}
+	} else {
+		PMD_INIT_LOG(DEBUG, "queue/interrupt binding, nb_rx_queues %u",
+				dev->data->nb_rx_queues);
+		for (i = 0; i < dev->data->nb_rx_queues; ++i) {
+			vec = VTPCI_OPS(hw)->set_queue_irq(hw,
+					hw->vqs[i * 2], i + ZXDH_QUE_INTR_VEC_BASE);
+			PMD_INIT_LOG(INFO, "vq%d irq set %d, get %d",
+					i * 2, i + ZXDH_QUE_INTR_VEC_BASE, vec);
+		}
+	}
+	/* mask all txq intr */
+	for (i = 0; i < dev->data->nb_tx_queues; ++i) {
+		vec = VTPCI_OPS(hw)->set_queue_irq(hw,
+				hw->vqs[(i * 2) + 1], ZXDH_MSI_NO_VECTOR);
+		PMD_INIT_LOG(INFO, "vq%d irq set 0x%x, get 0x%x",
+				(i * 2) + 1, ZXDH_MSI_NO_VECTOR, vec);
+	}
+	return 0;
+}
+
+int32_t zxdh_dev_pause(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	rte_spinlock_lock(&hw->state_lock);
+
+	if (hw->started == 0) {
+		/* Device is just stopped. */
+		rte_spinlock_unlock(&hw->state_lock);
+		return -1;
+	}
+	hw->started = 0;
+	hw->admin_status = 0;
+	/*
+	 * Prevent the worker threads from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+/*
+ * Recover hw state to let the worker threads continue.
+ */
+void zxdh_dev_resume(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	hw->admin_status = 1;
+	rte_spinlock_unlock(&hw->state_lock);
+}
+
+/*
+ * Should be called only after device is paused.
+ */
+int32_t zxdh_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts, int32_t nb_pkts)
+{
+	struct zxdh_hw	*hw   = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	int32_t ret = 0;
+
+	hw->inject_pkts = tx_pkts;
+	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
+	hw->inject_pkts = NULL;
+
+	return ret;
+}
+
+void zxdh_notify_peers(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	struct virtnet_rx *rxvq = NULL;
+	struct rte_mbuf *rarp_mbuf = NULL;
+
+	if (!dev->data->rx_queues)
+		return;
+
+	rxvq = dev->data->rx_queues[0];
+	if (!rxvq)
+		return;
+
+	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool, (struct rte_ether_addr *)hw->mac_addr);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "failed to make RARP packet.");
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (zxdh_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	zxdh_inject_pkts(dev, &rarp_mbuf, 1);
+	zxdh_dev_resume(dev);
+}
+
+static void zxdh_intr_cb_reg(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
+		rte_intr_callback_unregister(dev->intr_handle, zxdh_devconf_intr_handler, dev);
+
+	/* register callback to update dev config intr */
+	rte_intr_callback_register(dev->intr_handle, zxdh_devconf_intr_handler, dev);
+	/* Register rsic_v to pf interrupt callback */
+	struct rte_intr_handle *tmp = hw->risc_intr +
+			(MSIX_FROM_PFVF - ZXDH_MSIX_INTR_MSG_VEC_BASE);
+
+	rte_intr_callback_register(tmp, zxdh_frompfvf_intr_handler, dev);
+
+	tmp = hw->risc_intr + (MSIX_FROM_RISCV - ZXDH_MSIX_INTR_MSG_VEC_BASE);
+	rte_intr_callback_register(tmp, zxdh_fromriscv_intr_handler, dev);
+}
+
+static int32_t zxdh_intr_enable(struct rte_eth_dev *dev)
+{
+	int ret = 0;
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	if (!hw->intr_enabled) {
+		zxdh_intr_cb_reg(dev);
+		ret = rte_intr_enable(dev->intr_handle);
+		if (unlikely(ret))
+			PMD_INIT_LOG(ERR, "Failed to enable %s intr", dev->data->name);
+
+		hw->intr_enabled = 1;
+	}
+	return ret;
+}
+
+static int32_t zxdh_configure_intr(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	int32_t ret = 0;
+
+	if (!rte_intr_cap_multiple(dev->intr_handle)) {
+		PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
+		return -ENOTSUP;
+	}
+	zxdh_intr_release(dev);
+	uint8_t nb_efd = ZXDH_MSIX_INTR_DTB_VEC_NUM + ZXDH_MSIX_INTR_MSG_VEC_NUM;
+
+	if (dev->data->dev_conf.intr_conf.rxq)
+		nb_efd += dev->data->nb_rx_queues;
+
+	if (rte_intr_efd_enable(dev->intr_handle, nb_efd)) {
+		PMD_INIT_LOG(ERR, "Fail to create eventfd");
+		return -1;
+	}
+
+	if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec",
+					hw->max_queue_pairs + ZXDH_INTR_NONQUE_NUM)) {
+		PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
+					hw->max_queue_pairs + ZXDH_INTR_NONQUE_NUM);
+		return -ENOMEM;
+	}
+	PMD_INIT_LOG(INFO, "allocate %u rxq vectors", dev->intr_handle->vec_list_size);
+	if (zxdh_setup_risc_interrupts(dev) != 0) {
+		PMD_INIT_LOG(ERR, "Error setting up rsic_v interrupts!");
+		ret = -1;
+		goto free_intr_vec;
+	}
+	if (zxdh_setup_dtb_interrupts(dev) != 0) {
+		PMD_INIT_LOG(ERR, "Error setting up dtb interrupts!");
+		ret = -1;
+		goto free_intr_vec;
+	}
+
+	if (zxdh_queues_bind_intr(dev) < 0) {
+		PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
+		ret = -1;
+		goto free_intr_vec;
+	}
+	/** DO NOT try to remove this! This function will enable msix,
+	 * or QEMU will encounter SIGSEGV when DRIVER_OK is sent.
+	 * And for legacy devices, this should be done before queue/vec
+	 * binding to change the config size from 20 to 24, or
+	 * ZXDH_MSI_QUEUE_VECTOR (22) will be ignored.
+	 **/
+	if (zxdh_intr_enable(dev) < 0) {
+		PMD_DRV_LOG(ERR, "interrupt enable failed");
+		ret = -1;
+		goto free_intr_vec;
+	}
+	return 0;
+
+free_intr_vec:
+	zxdh_intr_release(dev);
+	return ret;
+}
+
+/* dev_ops for zxdh, bare necessities for basic operation */
+static const struct eth_dev_ops zxdh_eth_dev_ops = {
+	.dev_configure			 = NULL,
+	.dev_start				 = NULL,
+	.dev_stop				 = NULL,
+	.dev_close				 = NULL,
+
+	.rx_queue_setup			 = NULL,
+	.rx_queue_intr_enable	 = NULL,
+	.rx_queue_intr_disable	 = NULL,
+
+	.tx_queue_setup			 = NULL,
+};
+
+
+static int32_t set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+{
+	/** todo later
+	 * eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
+	 */
+
+	struct zxdh_hw *hw = eth_dev->data->dev_private;
+
+	if (!vtpci_packed_queue(hw)) {
+		PMD_INIT_LOG(ERR, " port %u not support packed queue", eth_dev->data->port_id);
+		return -1;
+	}
+	if (!vtpci_with_feature(hw, ZXDH_NET_F_MRG_RXBUF)) {
+		PMD_INIT_LOG(ERR, " port %u not support rx mergeable", eth_dev->data->port_id);
+		return -1;
+	}
+	/** todo later provided rx/tx
+	 * eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
+	 * eth_dev->rx_pkt_burst = &zxdh_recv_mergeable_pkts_packed;
+	 */
+
+	return 0;
+}
+
+static void zxdh_msg_cb_reg(struct zxdh_hw *hw)
+{
+	if (hw->is_pf)
+		zxdh_bar_chan_msg_recv_register(MODULE_BAR_MSG_TO_PF, pf_recv_bar_msg);
+	else
+		zxdh_bar_chan_msg_recv_register(MODULE_BAR_MSG_TO_VF, vf_recv_bar_msg);
+}
+
+static void zxdh_priv_res_init(struct zxdh_hw *hw)
+{
+	hw->vlan_fiter = (uint64_t *)rte_malloc("vlan_filter", 64 * sizeof(uint64_t), 1);
+	memset(hw->vlan_fiter, 0, 64 * sizeof(uint64_t));
+	if (hw->is_pf)
+		hw->vfinfo = rte_zmalloc("vfinfo", ZXDH_MAX_VF * sizeof(struct vfinfo), 4);
+	else
+		hw->vfinfo = NULL;
+}
+
+static void set_vfs_pcieid(struct zxdh_hw *hw)
+{
+	if (hw->pfinfo.vf_nums > ZXDH_MAX_VF) {
+		PMD_DRV_LOG(ERR, "vf nums %u out of range", hw->pfinfo.vf_nums);
+		return;
+	}
+	if (hw->vfinfo == NULL) {
+		PMD_DRV_LOG(ERR, " vfinfo uninited");
+		return;
+	}
+
+	PMD_DRV_LOG(INFO, "vf nums %d", hw->pfinfo.vf_nums);
+	int vf_idx;
+
+	for (vf_idx = 0; vf_idx < hw->pfinfo.vf_nums; vf_idx++)
+		hw->vfinfo[vf_idx].pcieid = VF_PCIE_ID(hw->pcie_id, vf_idx);
+}
+
+
+static void zxdh_sriovinfo_init(struct zxdh_hw *hw)
+{
+	hw->pfinfo.pcieid = PF_PCIE_ID(hw->pcie_id);
+
+	if (hw->is_pf)
+		set_vfs_pcieid(hw);
+}
+
+static int zxdh_tbl_entry_offline_destroy(struct zxdh_hw *hw)
+{
+	int ret = 0;
+	uint32_t sdt_no;
+
+	if (!g_dtb_data.init_done)
+		return ret;
+
+	if (hw->is_pf) {
+		sdt_no = MK_SDT_NO(L2_ENTRY, hw->hash_search_index);
+		ret = dpp_dtb_hash_offline_delete(0, g_dtb_data.queueid, sdt_no, 0);
+		PMD_DRV_LOG(DEBUG, "%d dpp_dtb_hash_offline_delete sdt_no %d",
+				hw->port_id, sdt_no);
+		if (ret)
+			PMD_DRV_LOG(ERR, "%d dpp_dtb_hash_offline_delete sdt_no %d failed",
+					hw->port_id, sdt_no);
+
+		sdt_no = MK_SDT_NO(MC, hw->hash_search_index);
+		ret = dpp_dtb_hash_offline_delete(0, g_dtb_data.queueid, sdt_no, 0);
+		PMD_DRV_LOG(DEBUG, "%d dpp_dtb_hash_offline_delete sdt_no %d",
+				hw->port_id, sdt_no);
+		if (ret)
+			PMD_DRV_LOG(ERR, "%d dpp_dtb_hash_offline_delete sdt_no %d failed",
+				hw->port_id, sdt_no);
+	}
+	return ret;
+}
+
+static inline int zxdh_dtb_dump_res_init(struct zxdh_hw *hw __rte_unused,
+			DPP_DEV_INIT_CTRL_T *dpp_ctrl)
+{
+	int ret = 0;
+	int i;
+
+	struct zxdh_dtb_bulk_dump_info dtb_dump_baseres[] = {
+	/* eram */
+	{"zxdh_sdt_vport_att_table", ZXDH_TBL_ERAM_DUMP_SIZE, ZXDH_SDT_VPORT_ATT_TABLE, NULL},
+	{"zxdh_sdt_panel_att_table", ZXDH_TBL_ERAM_DUMP_SIZE, ZXDH_SDT_PANEL_ATT_TABLE, NULL},
+	{"zxdh_sdt_rss_att_table", ZXDH_TBL_ERAM_DUMP_SIZE, ZXDH_SDT_RSS_ATT_TABLE, NULL},
+	{"zxdh_sdt_vlan_att_table", ZXDH_TBL_ERAM_DUMP_SIZE, ZXDH_SDT_VLAN_ATT_TABLE, NULL},
+	/* hash */
+	{"zxdh_sdt_l2_entry_table0", ZXDH_TBL_ZCAM_DUMP_SIZE, ZXDH_SDT_L2_ENTRY_TABLE0, NULL},
+	{"zxdh_sdt_l2_entry_table1", ZXDH_TBL_ZCAM_DUMP_SIZE, ZXDH_SDT_L2_ENTRY_TABLE1, NULL},
+	{"zxdh_sdt_l2_entry_table2", ZXDH_TBL_ZCAM_DUMP_SIZE, ZXDH_SDT_L2_ENTRY_TABLE2, NULL},
+	{"zxdh_sdt_l2_entry_table3", ZXDH_TBL_ZCAM_DUMP_SIZE, ZXDH_SDT_L2_ENTRY_TABLE3, NULL},
+	{"zxdh_sdt_mc_table0", ZXDH_TBL_ZCAM_DUMP_SIZE, ZXDH_SDT_MC_TABLE0, NULL},
+	{"zxdh_sdt_mc_table1", ZXDH_TBL_ZCAM_DUMP_SIZE, ZXDH_SDT_MC_TABLE1, NULL},
+	{"zxdh_sdt_mc_table2", ZXDH_TBL_ZCAM_DUMP_SIZE, ZXDH_SDT_MC_TABLE2, NULL},
+	{"zxdh_sdt_mc_table3", ZXDH_TBL_ZCAM_DUMP_SIZE, ZXDH_SDT_MC_TABLE3, NULL},
+	};
+	for (i = 0; i < (int)RTE_DIM(dtb_dump_baseres); i++) {
+		struct zxdh_dtb_bulk_dump_info *p = dtb_dump_baseres + i;
+		const struct rte_memzone *generic_dump_mz = rte_memzone_reserve_aligned(p->mz_name,
+					p->mz_size, SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
+
+		if (generic_dump_mz == NULL) {
+			PMD_DRV_LOG(ERR,
+				"Cannot alloc mem for dtb tbl bulk dump, mz_name is %s, mz_size is %u",
+				p->mz_name, p->mz_size);
+			ret = -ENOMEM;
+			return ret;
+		}
+		p->mz = generic_dump_mz;
+		dpp_ctrl->dump_addr_info[i].vir_addr = generic_dump_mz->addr_64;
+		dpp_ctrl->dump_addr_info[i].phy_addr = generic_dump_mz->iova;
+		dpp_ctrl->dump_addr_info[i].sdt_no   = p->sdt_no;
+		dpp_ctrl->dump_addr_info[i].size     = p->mz_size;
+
+		g_dtb_data.dtb_table_bulk_dump_mz[dpp_ctrl->dump_sdt_num] = generic_dump_mz;
+		dpp_ctrl->dump_sdt_num++;
+	}
+	return ret;
+}
+
+static void dtb_data_res_free(struct zxdh_hw *hw)
+{
+	struct rte_eth_dev *dev = hw->eth_dev;
+
+	if ((g_dtb_data.init_done) && (g_dtb_data.bind_device == dev))  {
+		PMD_DRV_LOG(INFO, "%s g_dtb_data free queue %d",
+				dev->data->name, g_dtb_data.queueid);
+
+		int ret = 0;
+
+		ret = dpp_np_online_uninstall(0, dev->data->name, g_dtb_data.queueid);
+		if (ret)
+			PMD_DRV_LOG(ERR, "%s dpp_np_online_uninstall failed", dev->data->name);
+
+		if (g_dtb_data.dtb_table_conf_mz) {
+			rte_memzone_free(g_dtb_data.dtb_table_conf_mz);
+			PMD_DRV_LOG(INFO, "%s free  dtb_table_conf_mz  ", dev->data->name);
+			g_dtb_data.dtb_table_conf_mz = NULL;
+		}
+		if (g_dtb_data.dtb_table_dump_mz) {
+			PMD_DRV_LOG(INFO, "%s free  dtb_table_dump_mz  ", dev->data->name);
+			rte_memzone_free(g_dtb_data.dtb_table_dump_mz);
+			g_dtb_data.dtb_table_dump_mz = NULL;
+		}
+		int i;
+
+		for (i = 0; i < ZXDH_MAX_BASE_DTB_TABLE_COUNT; i++) {
+			if (g_dtb_data.dtb_table_bulk_dump_mz[i]) {
+				rte_memzone_free(g_dtb_data.dtb_table_bulk_dump_mz[i]);
+				PMD_DRV_LOG(INFO, "%s free dtb_table_bulk_dump_mz[%d]",
+						dev->data->name, i);
+				g_dtb_data.dtb_table_bulk_dump_mz[i] = NULL;
+			}
+		}
+		g_dtb_data.init_done = 0;
+		g_dtb_data.bind_device = NULL;
+	}
+	if (zxdh_shared_data != NULL)
+		zxdh_shared_data->npsdk_init_done = 0;
+}
+
+static inline int npsdk_dtb_res_init(struct rte_eth_dev *dev)
+{
+	int ret = 0;
+	struct zxdh_hw *hw = dev->data->dev_private;
+
+	if (g_dtb_data.init_done) {
+		PMD_INIT_LOG(DEBUG, "DTB res already init done, dev %s no need init",
+			dev->device->name);
+		return 0;
+	}
+	g_dtb_data.queueid = INVALID_DTBQUE;
+	g_dtb_data.bind_device = dev;
+	g_dtb_data.dev_refcnt++;
+	g_dtb_data.init_done = 1;
+	/* */
+	DPP_DEV_INIT_CTRL_T *dpp_ctrl = malloc(sizeof(*dpp_ctrl) +
+			sizeof(DPP_DTB_ADDR_INFO_T) * 256);
+
+	if (dpp_ctrl == NULL) {
+		PMD_INIT_LOG(ERR, "dev %s annot allocate memory for dpp_ctrl", dev->device->name);
+		ret = -ENOMEM;
+		goto free_res;
+	}
+	memset(dpp_ctrl, 0, sizeof(*dpp_ctrl) + sizeof(DPP_DTB_ADDR_INFO_T) * 256);
+
+	dpp_ctrl->queue_id = 0xff;
+	dpp_ctrl->vport	 = hw->vport.vport;
+	dpp_ctrl->vector = ZXDH_MSIX_INTR_DTB_VEC;
+	strcpy((char *)dpp_ctrl->port_name, dev->device->name);
+	dpp_ctrl->pcie_vir_addr = (uint32_t)hw->bar_addr[0];
+
+	struct bar_offset_params param = {0};
+	struct bar_offset_res  res = {0};
+
+	param.pcie_id = hw->pcie_id;
+	param.virt_addr = hw->bar_addr[0] + ZXDH_CTRLCH_OFFSET;
+	param.type = URI_NP;
+
+	ret = zxdh_get_bar_offset(&param, &res);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "dev %s get npbar offset failed", dev->device->name);
+		goto free_res;
+	}
+	dpp_ctrl->np_bar_len = res.bar_length;
+	dpp_ctrl->np_bar_offset = res.bar_offset;
+	if (!g_dtb_data.dtb_table_conf_mz) {
+		const struct rte_memzone *conf_mz = rte_memzone_reserve_aligned("zxdh_dtb_table_conf_mz",
+				ZXDH_DTB_TABLE_CONF_SIZE, SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
+
+		if (conf_mz == NULL) {
+			PMD_INIT_LOG(ERR,
+				"dev %s annot allocate memory for dtb table conf",
+				dev->device->name);
+			ret = -ENOMEM;
+			goto free_res;
+		}
+		dpp_ctrl->down_vir_addr = conf_mz->addr_64;
+		dpp_ctrl->down_phy_addr = conf_mz->iova;
+		g_dtb_data.dtb_table_conf_mz = conf_mz;
+	}
+	/* */
+	if (!g_dtb_data.dtb_table_dump_mz) {
+		const struct rte_memzone *dump_mz = rte_memzone_reserve_aligned("zxdh_dtb_table_dump_mz",
+				ZXDH_DTB_TABLE_DUMP_SIZE, SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
+
+		if (dump_mz == NULL) {
+			PMD_INIT_LOG(ERR,
+				"dev %s Cannot allocate memory for dtb table dump",
+				dev->device->name);
+			ret = -ENOMEM;
+			goto free_res;
+		}
+		dpp_ctrl->dump_vir_addr = dump_mz->addr_64;
+		dpp_ctrl->dump_phy_addr = dump_mz->iova;
+		g_dtb_data.dtb_table_dump_mz = dump_mz;
+	}
+	/* init bulk dump */
+	zxdh_dtb_dump_res_init(hw, dpp_ctrl);
+
+	ret = dpp_host_np_init(0, dpp_ctrl);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "dev %s dpp host np init failed .ret %d", dev->device->name, ret);
+		goto free_res;
+	}
+
+	PMD_INIT_LOG(INFO, "dev %s dpp host np init ok.dtb queue %d",
+		dev->device->name, dpp_ctrl->queue_id);
+	g_dtb_data.queueid = dpp_ctrl->queue_id;
+	free(dpp_ctrl);
+	return 0;
+
+free_res:
+	dtb_data_res_free(hw);
+	free(dpp_ctrl);
+	return -ret;
+}
+
+static int32_t dpp_res_uni_init(uint32_t type)
+{
+	uint32_t ret = 0;
+	uint32_t dev_id = 0;
+	DPP_APT_HASH_RES_INIT_T HashResInit = {0};
+	DPP_APT_ERAM_RES_INIT_T EramResInit = {0};
+	DPP_APT_STAT_RES_INIT_T StatResInit = {0};
+
+	memset(&HashResInit, 0x0, sizeof(DPP_APT_HASH_RES_INIT_T));
+	memset(&EramResInit, 0x0, sizeof(DPP_APT_ERAM_RES_INIT_T));
+	memset(&StatResInit, 0x0, sizeof(DPP_APT_STAT_RES_INIT_T));
+
+	ret = dpp_apt_hash_res_get(type, &HashResInit);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s hash_res_get failed!", __func__);
+		return -1;
+	}
+	ret = dpp_apt_eram_res_get(type, &EramResInit);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s eram_res_get failed!", __func__);
+		return -1;
+	}
+	ret = dpp_apt_stat_res_get(type, &StatResInit);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s stat_res_get failed!", __func__);
+		return -1;
+	}
+	ret = dpp_apt_hash_global_res_init(dev_id);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s hash_global_res_init failed!", __func__);
+		return -1;
+	}
+
+	ret = dpp_apt_hash_func_res_init(dev_id, HashResInit.func_num, HashResInit.func_res);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s hash_func_res_init failed!", __func__);
+		return -1;
+	}
+
+	ret = dpp_apt_hash_bulk_res_init(dev_id, HashResInit.bulk_num, HashResInit.bulk_res);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s hash_bulk_res_init failed!", __func__);
+		return -1;
+	}
+	ret = dpp_apt_hash_tbl_res_init(dev_id, HashResInit.tbl_num, HashResInit.tbl_res);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s hash_tbl_res_init failed!", __func__);
+		return -1;
+	}
+	ret = dpp_apt_eram_res_init(dev_id, EramResInit.tbl_num, EramResInit.eram_res);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s eram_res_init failed!", __func__);
+		return -1;
+	}
+	ret = dpp_stat_ppu_eram_baddr_set(dev_id, StatResInit.eram_baddr);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s stat_ppu_eram_baddr_set failed!", __func__);
+		return -1;
+	}
+	ret = dpp_stat_ppu_eram_depth_set(dev_id, StatResInit.eram_depth); /* unit: 128bits */
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s stat_ppu_eram_depth_set failed!", __func__);
+		return -1;
+	}
+	ret = dpp_se_cmmu_smmu1_cfg_set(dev_id, StatResInit.ddr_baddr);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s dpp_se_cmmu_smmu1_cfg_set failed!", __func__);
+		return -1;
+	}
+	ret = dpp_stat_ppu_ddr_baddr_set(dev_id, StatResInit.ppu_ddr_offset); /* unit: 128bits */
+	if (ret) {
+		PMD_DRV_LOG(ERR, "%s stat_ppu_ddr_baddr_set failed!", __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline int npsdk_apt_res_init(struct rte_eth_dev *dev __rte_unused)
+{
+	int32_t ret = 0;
+
+	ret = dpp_res_uni_init(SE_NIC_RES_TYPE);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "init stand dpp res failed");
+		return -1;
+	}
+
+	return ret;
+}
+static int zxdh_np_init(struct rte_eth_dev *eth_dev)
+{
+	uint32_t ret = 0;
+	struct zxdh_hw *hw = eth_dev->data->dev_private;
+
+	if ((zxdh_shared_data != NULL) && zxdh_shared_data->npsdk_init_done) {
+		g_dtb_data.dev_refcnt++;
+		zxdh_tbl_entry_offline_destroy(hw);
+		PMD_DRV_LOG(DEBUG, "no need to init dtb  dtb chanenl %d devref %d",
+				g_dtb_data.queueid, g_dtb_data.dev_refcnt);
+		return 0;
+	}
+
+	if (hw->is_pf) {
+		ret = npsdk_dtb_res_init(eth_dev);
+		if (ret) {
+			PMD_DRV_LOG(ERR, "dpp apt init failed, ret:%d ", ret);
+			return -ret;
+		}
+
+		ret = npsdk_apt_res_init(eth_dev);
+		if (ret) {
+			PMD_DRV_LOG(ERR, "dpp apt init failed, ret:%d ", ret);
+			return -ret;
+		}
+	}
+	if (zxdh_shared_data != NULL)
+		zxdh_shared_data->npsdk_init_done = 1;
+
+	return 0;
+}
+
+static void zxdh_priv_res_free(struct zxdh_hw *priv)
+{
+	rte_free(priv->vlan_fiter);
+	priv->vlan_fiter = NULL;
+	rte_free(priv->vfinfo);
+	priv->vfinfo = NULL;
+}
+
+static int zxdh_tbl_entry_destroy(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint32_t sdt_no;
+	int ret = 0;
+
+	if (!g_dtb_data.init_done)
+		return ret;
+
+	if (hw->is_pf) {
+		sdt_no = MK_SDT_NO(L2_ENTRY, hw->hash_search_index);
+		ret = dpp_dtb_hash_online_delete(0, g_dtb_data.queueid, sdt_no);
+		if (ret) {
+			PMD_DRV_LOG(ERR, "%s dpp_dtb_hash_online_delete sdt_no %d failed ",
+				dev->data->name, sdt_no);
+			return -1;
+		}
+
+		sdt_no = MK_SDT_NO(MC, hw->hash_search_index);
+		ret = dpp_dtb_hash_online_delete(0, g_dtb_data.queueid, sdt_no);
+		if (ret) {
+			PMD_DRV_LOG(ERR, "%s dpp_dtb_hash_online_delete sdt_no %d failed ",
+				dev->data->name, sdt_no);
+			return -1;
+		}
+	}
+	return ret;
+}
+
+static void zxdh_np_destroy(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	int ret;
+
+	ret = zxdh_tbl_entry_destroy(dev);
+	if (ret)
+		return;
+
+	if ((!g_dtb_data.init_done) && (!g_dtb_data.dev_refcnt))
+		return;
+
+	if (--g_dtb_data.dev_refcnt == 0)
+		dtb_data_res_free(hw);
+
+	PMD_DRV_LOG(DEBUG, "g_dtb_data dev_refcnt %d", g_dtb_data.dev_refcnt);
+}
+
+static int32_t zxdh_eth_dev_init(struct rte_eth_dev *eth_dev)
+{
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+	int32_t ret;
+
+	eth_dev->dev_ops = &zxdh_eth_dev_ops;
+
+	/**
+	 * Primary process does the whole initialization,
+	 * for secondaryprocesses, we just select the same Rx and Tx function as primary.
+	 */
+	struct zxdh_hw *hw = eth_dev->data->dev_private;
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		VTPCI_OPS(hw) = &zxdh_modern_ops;
+		set_rxtx_funcs(eth_dev);
+		return 0;
+	}
+	/* Allocate memory for storing MAC addresses */
+	eth_dev->data->mac_addrs = rte_zmalloc("zxdh_mac",
+			ZXDH_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
+	if (eth_dev->data->mac_addrs == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to allocate %d bytes store MAC addresses",
+				ZXDH_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
+		return -ENOMEM;
+	}
+	memset(hw, 0, sizeof(*hw));
+	ret = zxdh_dev_devargs_parse(eth_dev->device->devargs, hw);
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "dev args parse failed");
+		return -EINVAL;
+	}
+
+	hw->bar_addr[0] = (uint64_t)pci_dev->mem_resource[0].addr;
+	if (hw->bar_addr[0] == 0) {
+		PMD_INIT_LOG(ERR, "Bad mem resource.");
+		return -EIO;
+	}
+	hw->device_id = pci_dev->id.device_id;
+	hw->port_id = eth_dev->data->port_id;
+	hw->eth_dev = eth_dev;
+	hw->speed = RTE_ETH_SPEED_NUM_UNKNOWN;
+	hw->duplex = RTE_ETH_LINK_FULL_DUPLEX;
+	hw->is_pf = 0;
+
+	rte_spinlock_init(&hw->state_lock);
+	if (pci_dev->id.device_id == ZXDH_E310_PF_DEVICEID ||
+		pci_dev->id.device_id == ZXDH_E312_PF_DEVICEID) {
+		hw->is_pf = 1;
+		hw->pfinfo.vf_nums = pci_dev->max_vfs;
+	}
+
+	/* reset device and get dev config*/
+	ret = zxdh_init_once(eth_dev);
+	if (ret != 0)
+		goto err_zxdh_init;
+
+	ret = zxdh_init_device(eth_dev);
+	if (ret < 0)
+		goto err_zxdh_init;
+
+	ret = zxdh_np_init(eth_dev);
+	if (ret)
+		goto err_zxdh_init;
+
+	zxdh_priv_res_init(hw);
+	zxdh_sriovinfo_init(hw);
+	zxdh_msg_cb_reg(hw);
+	zxdh_configure_intr(eth_dev);
+	return 0;
+
+err_zxdh_init:
+	zxdh_intr_release(eth_dev);
+	zxdh_np_destroy(eth_dev);
+	zxdh_bar_msg_chan_exit();
+	zxdh_priv_res_free(hw);
+	rte_free(eth_dev->data->mac_addrs);
+	eth_dev->data->mac_addrs = NULL;
+	return ret;
+}
+
+int32_t zxdh_eth_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+			struct rte_pci_device *pci_dev)
+{
+	return rte_eth_dev_pci_generic_probe(pci_dev,
+						sizeof(struct zxdh_hw),
+						zxdh_eth_dev_init);
+}
+
+
+static int32_t zxdh_eth_dev_uninit(struct rte_eth_dev *eth_dev __rte_unused)
+{
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		return 0;
+	/** todo later
+	 * zxdh_dev_close(eth_dev);
+	 */
+	return 0;
+}
+
+int32_t zxdh_eth_pci_remove(struct rte_pci_device *pci_dev)
+{
+	int32_t ret = rte_eth_dev_pci_generic_remove(pci_dev, zxdh_eth_dev_uninit);
+
+	if (ret == -ENODEV) { /* Port has already been released by close. */
+		ret = 0;
+	}
+	return ret;
+}
+
+static const struct rte_pci_id pci_id_zxdh_map[] = {
+	{RTE_PCI_DEVICE(PCI_VENDOR_ID_ZTE, ZXDH_E310_PF_DEVICEID)},
+	{RTE_PCI_DEVICE(PCI_VENDOR_ID_ZTE, ZXDH_E310_VF_DEVICEID)},
+	{RTE_PCI_DEVICE(PCI_VENDOR_ID_ZTE, ZXDH_E312_PF_DEVICEID)},
+	{RTE_PCI_DEVICE(PCI_VENDOR_ID_ZTE, ZXDH_E312_VF_DEVICEID)},
+	{.vendor_id = 0, /* sentinel */ },
+};
+static struct rte_pci_driver zxdh_pmd = {
+	.driver = {.name = "net_zxdh", },
+	.id_table = pci_id_zxdh_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+	.probe = zxdh_eth_pci_probe,
+	.remove = zxdh_eth_pci_remove,
+};
+
+RTE_PMD_REGISTER_PCI(net_zxdh, zxdh_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(net_zxdh, pci_id_zxdh_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_zxdh, "* vfio-pci");
 RTE_LOG_REGISTER_SUFFIX(zxdh_logtype_init, init, NOTICE);
 RTE_LOG_REGISTER_SUFFIX(zxdh_logtype_driver, driver, NOTICE);
diff --git a/drivers/net/zxdh/zxdh_ethdev.h b/drivers/net/zxdh/zxdh_ethdev.h
new file mode 100644
index 0000000000..c139d0aa5e
--- /dev/null
+++ b/drivers/net/zxdh/zxdh_ethdev.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 ZTE Corporation
+ */
+
+#ifndef _ZXDH_ETHDEV_H_
+#define _ZXDH_ETHDEV_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include "ethdev_pci.h"
+
+extern struct zxdh_dtb_shared_data g_dtb_data;
+#define PF_PCIE_ID(pcie_id)         ((pcie_id & 0xff00) | 1 << 11)
+#define VF_PCIE_ID(pcie_id, vf_idx) ((pcie_id & 0xff00) | (1 << 11) | (vf_idx & 0xff))
+
+#define ZXDH_QUEUES_NUM_MAX          256
+
+/* ZXDH PCI vendor/device ID. */
+#define PCI_VENDOR_ID_ZTE        0x1cf2
+
+#define ZXDH_E310_PF_DEVICEID     0x8061
+#define ZXDH_E310_VF_DEVICEID     0x8062
+#define ZXDH_E312_PF_DEVICEID     0x8049
+#define ZXDH_E312_VF_DEVICEID     0x8060
+
+#define ZXDH_MAX_UC_MAC_ADDRS  32
+#define ZXDH_MAX_MC_MAC_ADDRS  32
+#define ZXDH_MAX_MAC_ADDRS     (ZXDH_MAX_UC_MAC_ADDRS + ZXDH_MAX_MC_MAC_ADDRS)
+
+/* BAR definitions */
+#define ZXDH_NUM_BARS    2
+#define ZXDH_BAR0_INDEX  0
+
+#define ZXDH_MIN_QUEUE_DEPTH 1024
+#define ZXDH_MAX_QUEUE_DEPTH 32768
+
+#define ZXDH_MAX_VF 256
+
+#define ZXDH_TBL_ERAM_DUMP_SIZE  (4 * 1024 * 1024)
+#define ZXDH_TBL_ZCAM_DUMP_SIZE  (5 * 1024 * 1024)
+
+#define INVALID_DTBQUE  0xFFFF
+#define ZXDH_MAX_BASE_DTB_TABLE_COUNT 30
+#define ZXDH_DTB_TABLE_CONF_SIZE  (32 * (16 + 16 * 1024))
+#define ZXDH_DTB_TABLE_DUMP_SIZE  (32 * (16 + 16 * 1024))
+
+/*
+ * Process  dev config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
+ */
+#define ZXDH_NET_S_LINK_UP   1 /* Link is up */
+#define ZXDH_NET_S_ANNOUNCE  2 /* Announcement is needed */
+
+struct pfinfo {
+	uint16_t pcieid;
+	uint16_t vf_nums;
+};
+struct vfinfo {
+	uint16_t vf_idx;
+	uint16_t pcieid;
+	uint16_t vport;
+	uint8_t flag;
+	uint8_t state;
+	uint8_t rsv;
+	struct rte_ether_addr mac_addr;
+	struct rte_ether_addr vf_mac[ZXDH_MAX_MAC_ADDRS];
+};
+
+union VPORT {
+	uint16_t vport;
+
+	__extension__
+	struct {
+		uint16_t vfid:8;
+		uint16_t pfid:3;
+		uint16_t vf_flag:1;
+		uint16_t epid:3;
+		uint16_t direct_flag:1;
+	};
+};
+
+struct chnl_context {
+	uint16_t valid;
+	uint16_t ph_chno;
+}; /* 4B */
+
+struct zxdh_hw {
+	uint64_t host_features;
+	uint64_t guest_features;
+	uint32_t max_queue_pairs;
+	uint16_t max_mtu;
+	uint8_t  vtnet_hdr_size;
+	uint8_t  vlan_strip;
+	uint8_t  use_msix;
+	uint8_t  intr_enabled;
+	uint8_t  started;
+	uint8_t  weak_barriers;
+
+	bool has_tx_offload;
+	bool has_rx_offload;
+
+	uint8_t  mac_addr[RTE_ETHER_ADDR_LEN];
+	uint16_t port_id;
+
+	uint32_t  notify_off_multiplier;
+	uint32_t  speed;  /* link speed in MB */
+	uint32_t  speed_mode;  /* link speed in 1x 2x 3x */
+	uint8_t   duplex;
+	uint8_t  *isr;
+	uint16_t *notify_base;
+
+	struct zxdh_pci_common_cfg *common_cfg;
+	struct zxdh_net_config     *dev_cfg;
+
+	uint16_t queue_num;
+	uint16_t device_id;
+
+	uint16_t pcie_id;
+	uint8_t  phyport;
+	bool     msg_chan_init;
+
+	uint8_t panel_id;
+	uint8_t rsv[1];
+
+	/**
+	 * App management thread and virtio interrupt handler
+	 * thread both can change device state,
+	 * this lock is meant to avoid such a contention.
+	 */
+	rte_spinlock_t     state_lock;
+	struct rte_mbuf  **inject_pkts;
+	struct virtqueue **vqs;
+
+	uint64_t bar_addr[ZXDH_NUM_BARS];
+	struct rte_intr_handle *risc_intr;  /* Interrupt handle of rsic_v to host */
+	struct rte_intr_handle *dtb_intr;  /* Interrupt handle of rsic_v to host */
+
+	struct chnl_context channel_context[ZXDH_QUEUES_NUM_MAX];
+	union VPORT vport;
+
+	uint8_t is_pf         : 1,
+			switchoffload : 1;
+	uint8_t hash_search_index;
+	uint8_t admin_status;
+
+	uint16_t vfid;
+	uint16_t q_depth;
+	uint64_t *vlan_fiter;
+	struct pfinfo pfinfo;
+	struct vfinfo *vfinfo;
+	struct rte_eth_dev *eth_dev;
+};
+
+/* Shared data between primary and secondary processes. */
+struct zxdh_shared_data {
+	rte_spinlock_t lock; /* Global spinlock for primary and secondary processes. */
+	int init_done;       /* Whether primary has done initialization. */
+	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
+
+	int npsdk_init_done;
+	uint32_t  dev_refcnt;
+	struct zxdh_dtb_shared_data *dtb_data;
+};
+
+struct zxdh_dtb_shared_data {
+	int init_done;
+	char name[32];
+	uint16_t queueid;
+	uint16_t vport;
+	uint32_t vector;
+	const struct rte_memzone *dtb_table_conf_mz;
+	const struct rte_memzone *dtb_table_dump_mz;
+	const struct rte_memzone *dtb_table_bulk_dump_mz[ZXDH_MAX_BASE_DTB_TABLE_COUNT];
+	struct rte_eth_dev *bind_device;
+	uint32_t dev_refcnt;
+};
+
+struct zxdh_dtb_bulk_dump_info {
+	const char *mz_name;
+	uint32_t mz_size;
+	uint32_t sdt_no;        /** <@brief sdt no 0~255 */
+	const struct rte_memzone *mz;
+};
+
+void zxdh_interrupt_handler(void *param);
+int32_t zxdh_dev_pause(struct rte_eth_dev *dev);
+void zxdh_dev_resume(struct rte_eth_dev *dev);
+int32_t zxdh_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts, int32_t nb_pkts);
+void zxdh_notify_peers(struct rte_eth_dev *dev);
+
+int32_t zxdh_eth_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+			struct rte_pci_device *pci_dev);
+int32_t zxdh_eth_pci_remove(struct rte_pci_device *pci_dev);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZXDH_ETHDEV_H_ */
diff --git a/drivers/net/zxdh/zxdh_pci.c b/drivers/net/zxdh/zxdh_pci.c
new file mode 100644
index 0000000000..b32c2e7955
--- /dev/null
+++ b/drivers/net/zxdh/zxdh_pci.c
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 ZTE Corporation
+ */
+
+#include <stdint.h>
+#include <unistd.h>
+
+#ifdef RTE_EXEC_ENV_LINUX
+ #include <dirent.h>
+ #include <fcntl.h>
+#endif
+
+#include <rte_io.h>
+#include <rte_bus.h>
+#include <rte_common.h>
+
+#include "zxdh_pci.h"
+#include "zxdh_logs.h"
+#include "zxdh_queue.h"
+
+/*
+ * Following macros are derived from linux/pci_regs.h, however,
+ * we can't simply include that header here, as there is no such
+ * file for non-Linux platform.
+ */
+#define PCI_CAPABILITY_LIST             0x34
+#define PCI_CAP_ID_VNDR                 0x09
+#define PCI_CAP_ID_MSIX                 0x11
+
+/*
+ * The remaining space is defined by each driver as the per-driver
+ * configuration space.
+ */
+#define ZXDH_PCI_CONFIG(hw)  (((hw)->use_msix == ZXDH_MSIX_ENABLED) ? 24 : 20)
+#define PCI_MSIX_ENABLE 0x8000
+
+static inline int32_t check_vq_phys_addr_ok(struct virtqueue *vq)
+{
+	/**
+	 * Virtio PCI device ZXDH_PCI_QUEUE_PF register is 32bit,
+	 * and only accepts 32 bit page frame number.
+	 * Check if the allocated physical memory exceeds 16TB.
+	 */
+	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >> (ZXDH_PCI_QUEUE_ADDR_SHIFT + 32)) {
+		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
+		return 0;
+	}
+	return 1;
+}
+static inline void io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+	rte_write32(val & ((1ULL << 32) - 1), lo);
+	rte_write32(val >> 32, hi);
+}
+
+static void modern_read_dev_config(struct zxdh_hw *hw,
+								   size_t offset,
+								   void *dst,
+								   int32_t length)
+{
+	int32_t i       = 0;
+	uint8_t *p      = NULL;
+	uint8_t old_gen = 0;
+	uint8_t new_gen = 0;
+
+	do {
+		old_gen = rte_read8(&hw->common_cfg->config_generation);
+
+		p = dst;
+		for (i = 0;  i < length; i++)
+			*p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i);
+
+		new_gen = rte_read8(&hw->common_cfg->config_generation);
+	} while (old_gen != new_gen);
+}
+
+static void modern_write_dev_config(struct zxdh_hw *hw,
+									size_t offset,
+									const void *src,
+									int32_t length)
+{
+	int32_t i = 0;
+	const uint8_t *p = src;
+
+	for (i = 0;  i < length; i++)
+		rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i));
+}
+
+static uint64_t modern_get_features(struct zxdh_hw *hw)
+{
+	uint32_t features_lo = 0;
+	uint32_t features_hi = 0;
+
+	rte_write32(0, &hw->common_cfg->device_feature_select);
+	features_lo = rte_read32(&hw->common_cfg->device_feature);
+
+	rte_write32(1, &hw->common_cfg->device_feature_select);
+	features_hi = rte_read32(&hw->common_cfg->device_feature);
+
+	return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static void modern_set_features(struct zxdh_hw *hw, uint64_t features)
+{
+	rte_write32(0, &hw->common_cfg->guest_feature_select);
+	rte_write32(features & ((1ULL << 32) - 1), &hw->common_cfg->guest_feature);
+	rte_write32(1, &hw->common_cfg->guest_feature_select);
+	rte_write32(features >> 32, &hw->common_cfg->guest_feature);
+}
+
+static uint8_t modern_get_status(struct zxdh_hw *hw)
+{
+	return rte_read8(&hw->common_cfg->device_status);
+}
+
+static void modern_set_status(struct zxdh_hw *hw, uint8_t status)
+{
+	rte_write8(status, &hw->common_cfg->device_status);
+}
+
+static uint8_t modern_get_isr(struct zxdh_hw *hw)
+{
+	return rte_read8(hw->isr);
+}
+
+static uint16_t modern_set_config_irq(struct zxdh_hw *hw, uint16_t vec)
+{
+	rte_write16(vec, &hw->common_cfg->msix_config);
+	return rte_read16(&hw->common_cfg->msix_config);
+}
+
+static uint16_t modern_set_queue_irq(struct zxdh_hw *hw, struct virtqueue *vq, uint16_t vec)
+{
+	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+	rte_write16(vec, &hw->common_cfg->queue_msix_vector);
+	return rte_read16(&hw->common_cfg->queue_msix_vector);
+}
+
+static uint16_t modern_get_queue_num(struct zxdh_hw *hw, uint16_t queue_id)
+{
+	rte_write16(queue_id, &hw->common_cfg->queue_select);
+	return rte_read16(&hw->common_cfg->queue_size);
+}
+
+static void modern_set_queue_num(struct zxdh_hw *hw, uint16_t queue_id, uint16_t vq_size)
+{
+	rte_write16(queue_id, &hw->common_cfg->queue_select);
+	rte_write16(vq_size, &hw->common_cfg->queue_size);
+}
+
+static int32_t modern_setup_queue(struct zxdh_hw *hw, struct virtqueue *vq)
+{
+	uint64_t desc_addr  = 0;
+	uint64_t avail_addr = 0;
+	uint64_t used_addr  = 0;
+	uint16_t notify_off = 0;
+
+	if (!check_vq_phys_addr_ok(vq))
+		return -1;
+
+	desc_addr = vq->vq_ring_mem;
+	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+	if (vtpci_packed_queue(vq->hw)) {
+		used_addr = RTE_ALIGN_CEIL((avail_addr + sizeof(struct vring_packed_desc_event)),
+							ZXDH_PCI_VRING_ALIGN);
+	} else {
+		used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
+						ring[vq->vq_nentries]), ZXDH_PCI_VRING_ALIGN);
+	}
+
+	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
+					   &hw->common_cfg->queue_desc_hi);
+	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
+					   &hw->common_cfg->queue_avail_hi);
+	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
+					   &hw->common_cfg->queue_used_hi);
+
+	notify_off = rte_read16(&hw->common_cfg->queue_notify_off); /* default 0 */
+	notify_off = 0;
+	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
+			notify_off * hw->notify_off_multiplier);
+
+	rte_write16(1, &hw->common_cfg->queue_enable);
+
+	return 0;
+}
+
+static void modern_del_queue(struct zxdh_hw *hw, struct virtqueue *vq)
+{
+	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
+					   &hw->common_cfg->queue_desc_hi);
+	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
+					   &hw->common_cfg->queue_avail_hi);
+	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
+					   &hw->common_cfg->queue_used_hi);
+
+	rte_write16(0, &hw->common_cfg->queue_enable);
+}
+
+static void modern_notify_queue(struct zxdh_hw *hw, struct virtqueue *vq)
+{
+	uint32_t notify_data = 0;
+
+	if (!vtpci_with_feature(hw, ZXDH_F_NOTIFICATION_DATA)) {
+		rte_write16(vq->vq_queue_index, vq->notify_addr);
+		return;
+	}
+
+	if (vtpci_with_feature(hw, ZXDH_F_RING_PACKED)) {
+		/*
+		 * Bit[0:15]: vq queue index
+		 * Bit[16:30]: avail index
+		 * Bit[31]: avail wrap counter
+		 */
+		notify_data = ((uint32_t)(!!(vq->vq_packed.cached_flags &
+						VRING_PACKED_DESC_F_AVAIL)) << 31) |
+						((uint32_t)vq->vq_avail_idx << 16) |
+						vq->vq_queue_index;
+	} else {
+		/*
+		 * Bit[0:15]: vq queue index
+		 * Bit[16:31]: avail index
+		 */
+		notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index;
+	}
+	PMD_DRV_LOG(DEBUG, "queue:%d notify_data 0x%x notify_addr 0x%p",
+				 vq->vq_queue_index, notify_data, vq->notify_addr);
+	rte_write32(notify_data, vq->notify_addr);
+}
+
+const struct zxdh_pci_ops zxdh_modern_ops = {
+	.read_dev_cfg   = modern_read_dev_config,
+	.write_dev_cfg  = modern_write_dev_config,
+	.get_status     = modern_get_status,
+	.set_status     = modern_set_status,
+	.get_features   = modern_get_features,
+	.set_features   = modern_set_features,
+	.get_isr        = modern_get_isr,
+	.set_config_irq = modern_set_config_irq,
+	.set_queue_irq  = modern_set_queue_irq,
+	.get_queue_num  = modern_get_queue_num,
+	.set_queue_num  = modern_set_queue_num,
+	.setup_queue    = modern_setup_queue,
+	.del_queue      = modern_del_queue,
+	.notify_queue   = modern_notify_queue,
+};
+
+void zxdh_vtpci_read_dev_config(struct zxdh_hw *hw, size_t offset, void *dst, int32_t length)
+{
+	VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
+}
+void zxdh_vtpci_write_dev_config(struct zxdh_hw *hw, size_t offset, const void *src, int32_t length)
+{
+	VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
+}
+
+uint16_t zxdh_vtpci_get_features(struct zxdh_hw *hw)
+{
+	return VTPCI_OPS(hw)->get_features(hw);
+}
+
+void zxdh_vtpci_reset(struct zxdh_hw *hw)
+{
+	PMD_INIT_LOG(INFO, "port %u device start reset, just wait...", hw->port_id);
+	uint32_t retry = 0;
+
+	VTPCI_OPS(hw)->set_status(hw, ZXDH_CONFIG_STATUS_RESET);
+	/* Flush status write and wait device ready max 3 seconds. */
+	while (VTPCI_OPS(hw)->get_status(hw) != ZXDH_CONFIG_STATUS_RESET) {
+		++retry;
+		usleep(1000L);
+	}
+	PMD_INIT_LOG(INFO, "port %u device reset %u ms done", hw->port_id, retry);
+}
+
+void zxdh_vtpci_reinit_complete(struct zxdh_hw *hw)
+{
+	zxdh_vtpci_set_status(hw, ZXDH_CONFIG_STATUS_DRIVER_OK);
+}
+
+void zxdh_vtpci_set_status(struct zxdh_hw *hw, uint8_t status)
+{
+	if (status != ZXDH_CONFIG_STATUS_RESET)
+		status |= VTPCI_OPS(hw)->get_status(hw);
+
+	VTPCI_OPS(hw)->set_status(hw, status);
+}
+
+uint8_t zxdh_vtpci_get_status(struct zxdh_hw *hw)
+{
+	return VTPCI_OPS(hw)->get_status(hw);
+}
+
+uint8_t zxdh_vtpci_isr(struct zxdh_hw *hw)
+{
+	return VTPCI_OPS(hw)->get_isr(hw);
+}
+
+static void *get_cfg_addr(struct rte_pci_device *dev, struct zxdh_pci_cap *cap)
+{
+	uint8_t  bar    = cap->bar;
+	uint32_t length = cap->length;
+	uint32_t offset = cap->offset;
+
+	if (bar >= PCI_MAX_RESOURCE) {
+		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
+		return NULL;
+	}
+	if (offset + length < offset) {
+		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows", offset, length);
+		return NULL;
+	}
+	if (offset + length > dev->mem_resource[bar].len) {
+		PMD_INIT_LOG(ERR, "invalid cap: overflows bar space: %u > %" PRIu64,
+			offset + length, dev->mem_resource[bar].len);
+		return NULL;
+	}
+	uint8_t *base = dev->mem_resource[bar].addr;
+
+	if (base == NULL) {
+		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
+		return NULL;
+	}
+	return base + offset;
+}
+
+int32_t zxdh_read_pci_caps(struct rte_pci_device *dev, struct zxdh_hw *hw)
+{
+	if (dev->mem_resource[0].addr == NULL) {
+		PMD_INIT_LOG(ERR, "bar0 base addr is NULL");
+		return -1;
+	}
+	uint8_t pos = 0;
+	int32_t ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+
+	if (ret != 1) {
+		PMD_INIT_LOG(DEBUG, "failed to read pci capability list, ret %d", ret);
+		return -1;
+	}
+	while (pos) {
+		struct zxdh_pci_cap cap;
+
+		ret = rte_pci_read_config(dev, &cap, 2, pos);
+		if (ret != 2) {
+			PMD_INIT_LOG(DEBUG, "failed to read pci cap at pos: %x ret %d", pos, ret);
+			break;
+		}
+		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
+			/**
+			 * Transitional devices would also have this capability,
+			 * that's why we also check if msix is enabled.
+			 * 1st byte is cap ID; 2nd byte is the position of next cap;
+			 * next two bytes are the flags.
+			 */
+			uint16_t flags = 0;
+
+			ret = rte_pci_read_config(dev, &flags, sizeof(flags), pos + 2);
+			if (ret != sizeof(flags)) {
+				PMD_INIT_LOG(ERR, "failed to read pci cap at pos: %x ret %d",
+					pos + 2, ret);
+				break;
+			}
+			hw->use_msix = (flags & PCI_MSIX_ENABLE) ?
+					ZXDH_MSIX_ENABLED : ZXDH_MSIX_DISABLED;
+		}
+		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
+			PMD_INIT_LOG(DEBUG, "[%2x] skipping non VNDR cap id: %02x",
+				pos, cap.cap_vndr);
+			goto next;
+		}
+		ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
+		if (ret != sizeof(cap)) {
+			PMD_INIT_LOG(ERR, "failed to read pci cap at pos: %x ret %d", pos, ret);
+			break;
+		}
+		PMD_INIT_LOG(DEBUG, "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
+			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+		switch (cap.cfg_type) {
+		case ZXDH_PCI_CAP_COMMON_CFG:
+			hw->common_cfg = get_cfg_addr(dev, &cap);
+			break;
+		case ZXDH_PCI_CAP_NOTIFY_CFG: {
+			ret = rte_pci_read_config(dev, &hw->notify_off_multiplier,
+						4, pos + sizeof(cap));
+			if (ret != 4)
+				PMD_INIT_LOG(ERR,
+					"failed to read notify_off_multiplier, ret %d", ret);
+			else
+				hw->notify_base = get_cfg_addr(dev, &cap);
+			break;
+		}
+		case ZXDH_PCI_CAP_DEVICE_CFG:
+			hw->dev_cfg = get_cfg_addr(dev, &cap);
+			break;
+		case ZXDH_PCI_CAP_ISR_CFG:
+			hw->isr = get_cfg_addr(dev, &cap);
+			break;
+		case ZXDH_PCI_CAP_PCI_CFG: {
+			hw->pcie_id = *(uint16_t *)&cap.padding[1];
+			PMD_INIT_LOG(DEBUG, "get pcie id 0x%x", hw->pcie_id);
+			uint16_t pcie_id = hw->pcie_id;
+
+			if ((pcie_id >> 11) & 0x1) /* PF */ {
+				PMD_INIT_LOG(DEBUG, "EP %u PF %u",
+					pcie_id >> 12, (pcie_id >> 8) & 0x7);
+			} else { /* VF */
+				PMD_INIT_LOG(DEBUG, "EP %u PF %u VF %u",
+					pcie_id >> 12, (pcie_id >> 8) & 0x7, pcie_id & 0xff);
+			}
+			break;
+		}
+		}
+next:
+	pos = cap.cap_next;
+	}
+	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+		hw->dev_cfg == NULL || hw->isr == NULL) {
+		PMD_INIT_LOG(ERR, "no modern pci device found.");
+		return -1;
+	}
+	return 0;
+}
+
+enum zxdh_msix_status zxdh_vtpci_msix_detect(struct rte_pci_device *dev)
+{
+	uint8_t pos = 0;
+	int32_t ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+
+	if (ret != 1) {
+		PMD_INIT_LOG(ERR, "failed to read pci capability list, ret %d", ret);
+		return ZXDH_MSIX_NONE;
+	}
+	while (pos) {
+		uint8_t cap[2] = {0};
+
+		ret = rte_pci_read_config(dev, cap, sizeof(cap), pos);
+		if (ret != sizeof(cap)) {
+			PMD_INIT_LOG(ERR, "failed to read pci cap at pos: %x ret %d", pos, ret);
+			break;
+		}
+		if (cap[0] == PCI_CAP_ID_MSIX) {
+			uint16_t flags = 0;
+
+			ret = rte_pci_read_config(dev, &flags, sizeof(flags), pos + sizeof(cap));
+			if (ret != sizeof(flags)) {
+				PMD_INIT_LOG(ERR,
+					"failed to read pci cap at pos: %x ret %d", pos + 2, ret);
+				break;
+			}
+			if (flags & PCI_MSIX_ENABLE)
+				return ZXDH_MSIX_ENABLED;
+			else
+				return ZXDH_MSIX_DISABLED;
+		}
+		pos = cap[1];
+	}
+	return ZXDH_MSIX_NONE;
+	}
diff --git a/drivers/net/zxdh/zxdh_pci.h b/drivers/net/zxdh/zxdh_pci.h
new file mode 100644
index 0000000000..d6f3c552ad
--- /dev/null
+++ b/drivers/net/zxdh/zxdh_pci.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 ZTE Corporation
+ */
+
+#ifndef _ZXDH_PCI_H_
+#define _ZXDH_PCI_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <rte_pci.h>
+#include <rte_bus_pci.h>
+#include <bus_pci_driver.h>
+#include <ethdev_driver.h>
+
+#include "zxdh_ethdev.h"
+
+/* The bit of the ISR which indicates a device has an interrupt. */
+#define ZXDH_PCI_ISR_INTR    0x1
+/* The bit of the ISR which indicates a device configuration change. */
+#define ZXDH_PCI_ISR_CONFIG  0x2
+/* Vector value used to disable MSI for queue. */
+#define ZXDH_MSI_NO_VECTOR   0x7F
+
+/* Status byte for guest to report progress. */
+#define ZXDH_CONFIG_STATUS_RESET           0x00
+#define ZXDH_CONFIG_STATUS_ACK             0x01
+#define ZXDH_CONFIG_STATUS_DRIVER          0x02
+#define ZXDH_CONFIG_STATUS_DRIVER_OK       0x04
+#define ZXDH_CONFIG_STATUS_FEATURES_OK     0x08
+#define ZXDH_CONFIG_STATUS_DEV_NEED_RESET  0x40
+#define ZXDH_CONFIG_STATUS_FAILED          0x80
+
+/* The feature bitmap for net */
+#define ZXDH_NET_F_CSUM              0   /* Host handles pkts w/ partial csum */
+#define ZXDH_NET_F_GUEST_CSUM        1   /* Guest handles pkts w/ partial csum */
+#define ZXDH_NET_F_MTU               3   /* Initial MTU advice. */
+#define ZXDH_NET_F_MAC               5   /* Host has given MAC address. */
+#define ZXDH_NET_F_GUEST_TSO4        7   /* Guest can handle TSOv4 in. */
+#define ZXDH_NET_F_GUEST_TSO6        8   /* Guest can handle TSOv6 in. */
+#define ZXDH_NET_F_GUEST_ECN         9   /* Guest can handle TSO[6] w/ ECN in. */
+#define ZXDH_NET_F_GUEST_UFO         10  /* Guest can handle UFO in. */
+#define ZXDH_NET_F_HOST_TSO4         11  /* Host can handle TSOv4 in. */
+#define ZXDH_NET_F_HOST_TSO6         12  /* Host can handle TSOv6 in. */
+#define ZXDH_NET_F_HOST_ECN          13  /* Host can handle TSO[6] w/ ECN in. */
+#define ZXDH_NET_F_HOST_UFO          14  /* Host can handle UFO in. */
+#define ZXDH_NET_F_MRG_RXBUF         15  /* Host can merge receive buffers. */
+#define ZXDH_NET_F_STATUS            16  /* zxdh_net_config.status available */
+#define ZXDH_NET_F_CTRL_VQ           17  /* Control channel available */
+#define ZXDH_NET_F_CTRL_RX           18  /* Control channel RX mode support */
+#define ZXDH_NET_F_CTRL_VLAN         19  /* Control channel VLAN filtering */
+#define ZXDH_NET_F_CTRL_RX_EXTRA     20  /* Extra RX mode control support */
+#define ZXDH_NET_F_GUEST_ANNOUNCE    21  /* Guest can announce device on the network */
+#define ZXDH_NET_F_MQ                22  /* Device supports Receive Flow Steering */
+#define ZXDH_NET_F_CTRL_MAC_ADDR     23  /* Set MAC address */
+/* Do we get callbacks when the ring is completely used, even if we've suppressed them? */
+#define ZXDH_F_NOTIFY_ON_EMPTY       24
+#define ZXDH_F_ANY_LAYOUT            27 /* Can the device handle any descriptor layout? */
+#define VIRTIO_RING_F_INDIRECT_DESC  28 /* We support indirect buffer descriptors */
+#define ZXDH_F_VERSION_1             32
+#define ZXDH_F_IOMMU_PLATFORM        33
+#define ZXDH_F_RING_PACKED           34
+/* Inorder feature indicates that all buffers are used by the device
+ * in the same order in which they have been made available.
+ */
+#define ZXDH_F_IN_ORDER              35
+/** This feature indicates that memory accesses by the driver
+ * and the device are ordered in a way described by the platform.
+ */
+#define ZXDH_F_ORDER_PLATFORM        36
+/**
+ * This feature indicates that the driver passes extra data
+ * (besides identifying the virtqueue) in its device notifications.
+ */
+#define ZXDH_F_NOTIFICATION_DATA     38
+#define ZXDH_NET_F_SPEED_DUPLEX      63 /* Device set linkspeed and duplex */
+
+/* The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field.
+ */
+/* The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field.
+ */
+#define ZXDH_RING_F_EVENT_IDX                       29
+
+/* Maximum number of virtqueues per device. */
+#define ZXDH_MAX_VIRTQUEUE_PAIRS  8
+#define ZXDH_MAX_VIRTQUEUES       (ZXDH_MAX_VIRTQUEUE_PAIRS * 2 + 1)
+
+
+#define ZXDH_PCI_CAP_COMMON_CFG  1 /* Common configuration */
+#define ZXDH_PCI_CAP_NOTIFY_CFG  2 /* Notifications */
+#define ZXDH_PCI_CAP_ISR_CFG     3 /* ISR Status */
+#define ZXDH_PCI_CAP_DEVICE_CFG  4 /* Device specific configuration */
+#define ZXDH_PCI_CAP_PCI_CFG     5 /* PCI configuration access */
+
+#define VTPCI_OPS(hw)  (zxdh_hw_internal[(hw)->port_id].vtpci_ops)
+#define VTPCI_IO(hw)   (&zxdh_hw_internal[(hw)->port_id].io)
+
+/*
+ * How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size.
+ */
+#define ZXDH_PCI_QUEUE_ADDR_SHIFT                   12
+
+/* The alignment to use between consumer and producer parts of vring. */
+#define ZXDH_PCI_VRING_ALIGN                        4096
+
+/******BAR0  SPACE********************************************************************/
+#define ZXDH_VQMREG_OFFSET    0x0000
+#define ZXDH_FWCAP_OFFSET     0x1000
+#define ZXDH_CTRLCH_OFFSET    0x2000
+#define ZXDH_MAC_OFFSET       0x24000
+#define ZXDH_SPINLOCK_OFFSET  0x4000
+#define ZXDH_FWSHRD_OFFSET    0x5000
+#define ZXDH_QUERES_SHARE_BASE   (ZXDH_FWSHRD_OFFSET)
+#define ZXDH_QUERES_SHARE_SIZE   512
+
+enum zxdh_msix_status {
+	ZXDH_MSIX_NONE     = 0,
+	ZXDH_MSIX_DISABLED = 1,
+	ZXDH_MSIX_ENABLED  = 2
+};
+
+static inline int32_t vtpci_with_feature(struct zxdh_hw *hw, uint64_t bit)
+{
+	return (hw->guest_features & (1ULL << bit)) != 0;
+}
+
+static inline int32_t vtpci_packed_queue(struct zxdh_hw *hw)
+{
+	return vtpci_with_feature(hw, ZXDH_F_RING_PACKED);
+}
+
+/*
+ * While zxdh_hw is stored in shared memory, this structure stores
+ * some infos that may vary in the multiple process model locally.
+ * For example, the vtpci_ops pointer.
+ */
+struct zxdh_hw_internal {
+	const struct zxdh_pci_ops *vtpci_ops;
+	struct rte_pci_ioport io;
+};
+
+/* Fields in ZXDH_PCI_CAP_COMMON_CFG: */
+struct zxdh_pci_common_cfg {
+	/* About the whole device. */
+	uint32_t device_feature_select; /* read-write */
+	uint32_t device_feature;    /* read-only */
+	uint32_t guest_feature_select;  /* read-write */
+	uint32_t guest_feature;     /* read-write */
+	uint16_t msix_config;       /* read-write */
+	uint16_t num_queues;        /* read-only */
+	uint8_t  device_status;     /* read-write */
+	uint8_t  config_generation; /* read-only */
+
+	/* About a specific virtqueue. */
+	uint16_t queue_select;      /* read-write */
+	uint16_t queue_size;        /* read-write, power of 2. */
+	uint16_t queue_msix_vector; /* read-write */
+	uint16_t queue_enable;      /* read-write */
+	uint16_t queue_notify_off;  /* read-only */
+	uint32_t queue_desc_lo;     /* read-write */
+	uint32_t queue_desc_hi;     /* read-write */
+	uint32_t queue_avail_lo;    /* read-write */
+	uint32_t queue_avail_hi;    /* read-write */
+	uint32_t queue_used_lo;     /* read-write */
+	uint32_t queue_used_hi;     /* read-write */
+};
+
+/*
+ * This structure is just a reference to read
+ * net device specific config space; it just a chodu structure
+ *
+ */
+struct zxdh_net_config {
+	/* The config defining mac address (if ZXDH_NET_F_MAC) */
+	uint8_t    mac[RTE_ETHER_ADDR_LEN];
+	/* See ZXDH_NET_F_STATUS and ZXDH_NET_S_* above */
+	uint16_t   status;
+	uint16_t   max_virtqueue_pairs;
+	uint16_t   mtu;
+	/*
+	 * speed, in units of 1Mb. All values 0 to INT_MAX are legal.
+	 * Any other value stands for unknown.
+	 */
+	uint32_t   speed;
+	/* 0x00 - half duplex
+	 * 0x01 - full duplex
+	 * Any other value stands for unknown.
+	 */
+	uint8_t    duplex;
+} __rte_packed;
+
+/* This is the PCI capability header: */
+struct zxdh_pci_cap {
+	uint8_t  cap_vndr;   /* Generic PCI field: PCI_CAP_ID_VNDR */
+	uint8_t  cap_next;   /* Generic PCI field: next ptr. */
+	uint8_t  cap_len;    /* Generic PCI field: capability length */
+	uint8_t  cfg_type;   /* Identifies the structure. */
+	uint8_t  bar;        /* Where to find it. */
+	uint8_t  padding[3]; /* Pad to full dword. */
+	uint32_t offset;     /* Offset within bar. */
+	uint32_t length;     /* Length of the structure, in bytes. */
+};
+struct zxdh_pci_notify_cap {
+	struct zxdh_pci_cap cap;
+	uint32_t notify_off_multiplier;  /* Multiplier for queue_notify_off. */
+};
+
+struct zxdh_pci_ops {
+	void     (*read_dev_cfg)(struct zxdh_hw *hw, size_t offset, void *dst, int32_t len);
+	void     (*write_dev_cfg)(struct zxdh_hw *hw, size_t offset, const void *src, int32_t len);
+
+	uint8_t  (*get_status)(struct zxdh_hw *hw);
+	void     (*set_status)(struct zxdh_hw *hw, uint8_t status);
+
+	uint64_t (*get_features)(struct zxdh_hw *hw);
+	void     (*set_features)(struct zxdh_hw *hw, uint64_t features);
+
+	uint8_t  (*get_isr)(struct zxdh_hw *hw);
+
+	uint16_t (*set_config_irq)(struct zxdh_hw *hw, uint16_t vec);
+
+	uint16_t (*set_queue_irq)(struct zxdh_hw *hw, struct virtqueue *vq, uint16_t vec);
+
+	uint16_t (*get_queue_num)(struct zxdh_hw *hw, uint16_t queue_id);
+	void     (*set_queue_num)(struct zxdh_hw *hw, uint16_t queue_id, uint16_t vq_size);
+
+	int32_t  (*setup_queue)(struct zxdh_hw *hw, struct virtqueue *vq);
+	void     (*del_queue)(struct zxdh_hw *hw, struct virtqueue *vq);
+	void     (*notify_queue)(struct zxdh_hw *hw, struct virtqueue *vq);
+};
+
+extern struct zxdh_hw_internal zxdh_hw_internal[RTE_MAX_ETHPORTS];
+extern const struct zxdh_pci_ops zxdh_modern_ops;
+
+void zxdh_vtpci_reset(struct zxdh_hw *hw);
+void zxdh_vtpci_reinit_complete(struct zxdh_hw *hw);
+uint8_t zxdh_vtpci_get_status(struct zxdh_hw *hw);
+void zxdh_vtpci_set_status(struct zxdh_hw *hw, uint8_t status);
+uint16_t zxdh_vtpci_get_features(struct zxdh_hw *hw);
+void zxdh_vtpci_write_dev_config(struct zxdh_hw *hw, size_t offset,
+		const void *src, int32_t length);
+void zxdh_vtpci_read_dev_config(struct zxdh_hw *hw, size_t offset,
+		void *dst, int32_t length);
+uint8_t zxdh_vtpci_isr(struct zxdh_hw *hw);
+enum zxdh_msix_status zxdh_vtpci_msix_detect(struct rte_pci_device *dev);
+
+int32_t zxdh_read_pci_caps(struct rte_pci_device *dev, struct zxdh_hw *hw);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZXDH_PCI_H_ */
diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c
new file mode 100644
index 0000000000..b6dd487a9d
--- /dev/null
+++ b/drivers/net/zxdh/zxdh_queue.c
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 ZTE Corporation
+ */
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+
+#include "zxdh_queue.h"
+#include "zxdh_logs.h"
+#include "zxdh_pci.h"
+#include "zxdh_common.h"
+
+/**
+ * Two types of mbuf to be cleaned:
+ * 1) mbuf that has been consumed by backend but not used by virtio.
+ * 2) mbuf that hasn't been consued by backend.
+ */
+struct rte_mbuf *zxdh_virtqueue_detach_unused(struct virtqueue *vq)
+{
+	struct rte_mbuf *cookie = NULL;
+	int32_t          idx    = 0;
+
+	if (vq == NULL)
+		return NULL;
+
+	for (idx = 0; idx < vq->vq_nentries; idx++) {
+		cookie = vq->vq_descx[idx].cookie;
+		if (cookie != NULL) {
+			vq->vq_descx[idx].cookie = NULL;
+			return cookie;
+		}
+	}
+
+	return NULL;
+}
+
+static int32_t zxdh_release_channel(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint16_t nr_vq = hw->queue_num;
+	uint32_t var  = 0;
+	uint32_t addr = 0;
+	uint32_t widx = 0;
+	uint32_t bidx = 0;
+	uint16_t pch  = 0;
+	uint16_t lch  = 0;
+	uint16_t timeout = 0;
+
+	while ((timeout++) < ZXDH_ACQUIRE_CHANNEL_NUM_MAX) {
+		if (zxdh_acquire_lock(hw) != 0) {
+			PMD_INIT_LOG(ERR,
+				"Could not acquire lock to release channel, timeout %d", timeout);
+			continue;
+		}
+		break;
+	}
+
+	if (timeout >= ZXDH_ACQUIRE_CHANNEL_NUM_MAX) {
+		PMD_INIT_LOG(ERR, "Acquire lock timeout");
+		return -1;
+	}
+
+	for (lch = 0; lch < nr_vq; lch++) {
+		if (hw->channel_context[lch].valid == 0) {
+			PMD_INIT_LOG(DEBUG, "Logic channel %d does not need to release", lch);
+			continue;
+		}
+
+		/* get coi table offset and index */
+		pch  = hw->channel_context[lch].ph_chno;
+		widx = pch / 32;
+		bidx = pch % 32;
+
+		addr = ZXDH_QUERES_SHARE_BASE + (widx * sizeof(uint32_t));
+		var  = zxdh_read_bar_reg(dev, ZXDH_BAR0_INDEX, addr);
+		var &= ~(1 << bidx);
+		zxdh_write_bar_reg(dev, ZXDH_BAR0_INDEX, addr, var);
+
+		hw->channel_context[lch].valid = 0;
+		hw->channel_context[lch].ph_chno = 0;
+	}
+
+	zxdh_release_lock(hw);
+
+	return 0;
+}
+
+int32_t zxdh_free_queues(struct rte_eth_dev *dev)
+{
+	struct zxdh_hw *hw = dev->data->dev_private;
+	uint16_t nr_vq = hw->queue_num;
+	struct virtqueue *vq = NULL;
+	int32_t queue_type = 0;
+	uint16_t i = 0;
+
+	if (hw->vqs == NULL)
+		return 0;
+
+	/* Clear COI table */
+	if (zxdh_release_channel(dev) < 0) {
+		PMD_INIT_LOG(ERR, "Failed to clear coi table");
+		return -1;
+	}
+
+	for (i = 0; i < nr_vq; i++) {
+		vq = hw->vqs[i];
+		if (vq == NULL)
+			continue;
+
+		VTPCI_OPS(hw)->del_queue(hw, vq);
+		queue_type = get_queue_type(i);
+		if (queue_type == VTNET_RQ) {
+			rte_free(vq->sw_ring);
+			rte_memzone_free(vq->rxq.mz);
+		} else if (queue_type == VTNET_TQ) {
+			rte_memzone_free(vq->txq.mz);
+			rte_memzone_free(vq->txq.virtio_net_hdr_mz);
+		}
+
+		rte_free(vq);
+		hw->vqs[i] = NULL;
+		PMD_INIT_LOG(DEBUG, "Release to queue %d success!", i);
+	}
+
+	rte_free(hw->vqs);
+	hw->vqs = NULL;
+
+	return 0;
+}
+
+int32_t get_queue_type(uint16_t vtpci_queue_idx)
+{
+	if (vtpci_queue_idx % 2 == 0)
+		return VTNET_RQ;
+	else
+		return VTNET_TQ;
+}
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
new file mode 100644
index 0000000000..c2d7bbe889
--- /dev/null
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 ZTE Corporation
+ */
+
+#ifndef _ZXDH_QUEUE_H_
+#define _ZXDH_QUEUE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_net.h>
+#include <ethdev_driver.h>
+
+#include "zxdh_pci.h"
+#include "zxdh_ring.h"
+#include "zxdh_rxtx.h"
+
+
+enum {
+	VTNET_RQ = 0,
+	VTNET_TQ = 1
+};
+
+struct vq_desc_extra {
+	void *cookie;
+	uint16_t ndescs;
+	uint16_t next;
+};
+
+struct virtqueue {
+	struct zxdh_hw  *hw; /**< zxdh_hw structure pointer. */
+	struct {
+		/**< vring keeping descs and events */
+		struct vring_packed ring;
+		bool used_wrap_counter;
+		uint8_t rsv;
+		uint16_t cached_flags; /**< cached flags for descs */
+		uint16_t event_flags_shadow;
+		uint16_t rsv1;
+	} __rte_packed vq_packed;
+	uint16_t vq_used_cons_idx; /**< last consumed descriptor */
+	uint16_t vq_nentries;  /**< vring desc numbers */
+	uint16_t vq_free_cnt;  /**< num of desc available */
+	uint16_t vq_avail_idx; /**< sync until needed */
+	uint16_t vq_free_thresh; /**< free threshold */
+	uint16_t rsv2;
+
+	void *vq_ring_virt_mem;  /**< linear address of vring*/
+	uint32_t vq_ring_size;
+
+	union {
+		struct virtnet_rx rxq;
+		struct virtnet_tx txq;
+	};
+
+	/** < physical address of vring,
+	 * or virtual address for virtio_user.
+	 **/
+	rte_iova_t vq_ring_mem;
+
+	/**
+	 * Head of the free chain in the descriptor table. If
+	 * there are no free descriptors, this will be set to
+	 * VQ_RING_DESC_CHAIN_END.
+	 **/
+	uint16_t  vq_desc_head_idx;
+	uint16_t  vq_desc_tail_idx;
+	uint16_t  vq_queue_index;   /**< PCI queue index */
+	uint16_t  offset; /**< relative offset to obtain addr in mbuf */
+	uint16_t *notify_addr;
+	struct rte_mbuf **sw_ring;  /**< RX software ring. */
+	struct vq_desc_extra vq_descx[0];
+};
+
+struct rte_mbuf *zxdh_virtqueue_detach_unused(struct virtqueue *vq);
+int32_t zxdh_free_queues(struct rte_eth_dev *dev);
+int32_t get_queue_type(uint16_t vtpci_queue_idx);
+
+#endif
diff --git a/drivers/net/zxdh/zxdh_ring.h b/drivers/net/zxdh/zxdh_ring.h
new file mode 100644
index 0000000000..bd7c997993
--- /dev/null
+++ b/drivers/net/zxdh/zxdh_ring.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 ZTE Corporation
+ */
+
+#ifndef _ZXDH_RING_H_
+#define _ZXDH_RING_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_common.h>
+
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT                                   1
+
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE                                  2
+
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT                               4
+
+/* This flag means the descriptor was made available by the driver */
+#define VRING_PACKED_DESC_F_AVAIL                           (1 << (7))
+/* This flag means the descriptor was used by the device */
+#define VRING_PACKED_DESC_F_USED                            (1 << (15))
+
+/* Frequently used combinations */
+#define VRING_PACKED_DESC_F_AVAIL_USED \
+			(VRING_PACKED_DESC_F_AVAIL | VRING_PACKED_DESC_F_USED)
+
+/* The Host uses this in used->flags to advise the Guest: don't kick me
+ * when you add a buffer.  It's unreliable, so it's simply an
+ * optimization.  Guest will still kick if it's out of buffers.
+ **/
+#define VRING_USED_F_NO_NOTIFY                              1
+
+/** The Guest uses this in avail->flags to advise the Host: don't
+ * interrupt me when you consume a buffer.  It's unreliable, so it's
+ * simply an optimization.
+ **/
+#define VRING_AVAIL_F_NO_INTERRUPT                          1
+
+#define RING_EVENT_FLAGS_ENABLE                             0x0
+#define RING_EVENT_FLAGS_DISABLE                            0x1
+#define RING_EVENT_FLAGS_DESC                               0x2
+
+/** VirtIO ring descriptors: 16 bytes.
+ * These can chain together via "next".
+ **/
+struct vring_desc {
+	uint64_t addr;  /*  Address (guest-physical). */
+	uint32_t len;   /* Length. */
+	uint16_t flags; /* The flags as indicated above. */
+	uint16_t next;  /* We chain unused descriptors via this. */
+};
+
+struct vring_avail {
+	uint16_t flags;
+	uint16_t idx;
+	uint16_t ring[0];
+};
+
+/** For support of packed virtqueues in Virtio 1.1 the format of descriptors
+ * looks like this.
+ **/
+struct vring_packed_desc {
+	uint64_t addr;
+	uint32_t len;
+	uint16_t id;
+	uint16_t flags;
+};
+
+struct vring_packed_desc_event {
+	uint16_t desc_event_off_wrap;
+	uint16_t desc_event_flags;
+};
+
+struct vring_packed {
+	uint32_t num;
+	struct vring_packed_desc *desc;
+	struct vring_packed_desc_event *driver;
+	struct vring_packed_desc_event *device;
+};
+
+#endif
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
new file mode 100644
index 0000000000..7aedf568fe
--- /dev/null
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 ZTE Corporation
+ */
+
+#ifndef _ZXDH_RXTX_H_
+#define _ZXDH_RXTX_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+struct virtnet_stats {
+	uint64_t packets;
+	uint64_t bytes;
+	uint64_t errors;
+	uint64_t multicast;
+	uint64_t broadcast;
+	uint64_t truncated_err;
+	uint64_t size_bins[8]; /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
+};
+
+struct virtnet_rx {
+	struct virtqueue         *vq;
+
+	/* dummy mbuf, for wraparound when processing RX ring. */
+	struct rte_mbuf           fake_mbuf;
+
+	uint64_t                  mbuf_initializer; /* value to init mbufs. */
+	struct rte_mempool       *mpool;            /* mempool for mbuf allocation */
+	uint16_t                  queue_id;         /* DPDK queue index. */
+	uint16_t                  port_id;          /* Device port identifier. */
+	struct virtnet_stats      stats;
+	const struct rte_memzone *mz;               /* mem zone to populate RX ring. */
+};
+
+struct virtnet_tx {
+	struct virtqueue         *vq;
+	const struct rte_memzone *virtio_net_hdr_mz;  /* memzone to populate hdr. */
+	rte_iova_t                virtio_net_hdr_mem; /* hdr for each xmit packet */
+	uint16_t                  queue_id;           /* DPDK queue index. */
+	uint16_t                  port_id;            /* Device port identifier. */
+	struct virtnet_stats      stats;
+	const struct rte_memzone *mz;                 /* mem zone to populate TX ring. */
+};
+
+#endif
-- 
2.43.0

[-- Attachment #1.1.2: Type: text/html , Size: 201443 bytes --]

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-08-28  7:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-08-28  7:25 [PATCH v1 2/2] net/zxdh: provided zxdh basic init Junlong Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).