update rx/tx process to latest version.

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/zxdh_common.c |  19 +
 drivers/net/zxdh/zxdh_common.h |   3 +
 drivers/net/zxdh/zxdh_ethdev.c |  45 ++-
 drivers/net/zxdh/zxdh_ethdev.h |  15 +-
 drivers/net/zxdh/zxdh_queue.c  |   9 +-
 drivers/net/zxdh/zxdh_queue.h  | 118 +++---
 drivers/net/zxdh/zxdh_rxtx.c   | 696 +++++++++++++++++++--------------
 drivers/net/zxdh/zxdh_rxtx.h   |  27 ++
 8 files changed, 567 insertions(+), 365 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_common.c b/drivers/net/zxdh/zxdh_common.c
index 0c9696e7ce..3d67fcc7dd 100644
--- a/drivers/net/zxdh/zxdh_common.c
+++ b/drivers/net/zxdh/zxdh_common.c
@@ -13,6 +13,7 @@
 #include "zxdh_logs.h"
 #include "zxdh_msg.h"
 #include "zxdh_common.h"
+#include "zxdh_pci.h"
 
 #define ZXDH_MSG_RSP_SIZE_MAX         512
 
@@ -427,3 +428,21 @@ zxdh_datach_set(struct rte_eth_dev *dev)
 
     return ret;
 }
+
+bool
+zxdh_rx_offload_enabled(struct zxdh_hw *hw)
+{
+    return zxdh_pci_with_feature(hw, ZXDH_NET_F_GUEST_CSUM) ||
+           zxdh_pci_with_feature(hw, ZXDH_NET_F_GUEST_TSO4) ||
+           zxdh_pci_with_feature(hw, ZXDH_NET_F_GUEST_TSO6) ||
+           (hw->vlan_offload_cfg.vlan_strip == 1);
+}
+
+bool
+zxdh_tx_offload_enabled(struct zxdh_hw *hw)
+{
+    return zxdh_pci_with_feature(hw, ZXDH_NET_F_CSUM) ||
+           zxdh_pci_with_feature(hw, ZXDH_NET_F_HOST_TSO4) ||
+           zxdh_pci_with_feature(hw, ZXDH_NET_F_HOST_TSO6) ||
+           zxdh_pci_with_feature(hw, ZXDH_NET_F_HOST_UFO);
+}
diff --git a/drivers/net/zxdh/zxdh_common.h b/drivers/net/zxdh/zxdh_common.h
index 826f1fb95d..d78a822ebf 100644
--- a/drivers/net/zxdh/zxdh_common.h
+++ b/drivers/net/zxdh/zxdh_common.h
@@ -31,4 +31,7 @@ uint32_t zxdh_read_comm_reg(uint64_t pci_comm_cfg_baseaddr, uint32_t reg);
 void zxdh_write_comm_reg(uint64_t pci_comm_cfg_baseaddr, uint32_t reg, uint32_t val);
 int32_t zxdh_datach_set(struct rte_eth_dev *dev);
 
+bool zxdh_rx_offload_enabled(struct zxdh_hw *hw);
+bool zxdh_tx_offload_enabled(struct zxdh_hw *hw);
+
 #endif /* ZXDH_COMMON_H */
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index c76818d015..255d4b5b79 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -23,6 +23,7 @@ struct zxdh_shared_data *zxdh_shared_data;
 const char *ZXDH_PMD_SHARED_DATA_MZ = "zxdh_pmd_shared_data";
 rte_spinlock_t zxdh_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 struct zxdh_dev_shared_data g_dev_sd[ZXDH_SLOT_MAX];
+struct zxdh_net_hdr_dl g_net_hdr_dl[RTE_MAX_ETHPORTS];
 
 #define ZXDH_INVALID_DTBQUE      0xFFFF
 #define ZXDH_INVALID_SLOT_IDX    0xFFFF
@@ -405,6 +406,28 @@ zxdh_configure_intr(struct rte_eth_dev *dev)
     return ret;
 }
 
+static void
+zxdh_update_net_hdr_dl(struct zxdh_hw *hw)
+{
+    struct zxdh_net_hdr_dl *net_hdr_dl = &g_net_hdr_dl[hw->port_id];
+    memset(net_hdr_dl, 0, ZXDH_DL_NET_HDR_SIZE);
+
+    if (zxdh_tx_offload_enabled(hw)) {
+        net_hdr_dl->type_hdr.port = ZXDH_PORT_DTP;
+        net_hdr_dl->type_hdr.pd_len = ZXDH_DL_NET_HDR_SIZE >> 1;
+
+        net_hdr_dl->pipd_hdr_dl.pi_hdr.pi_len = (ZXDH_PI_HDR_SIZE >> 4) - 1;
+        net_hdr_dl->pipd_hdr_dl.pi_hdr.pkt_flag_hi8 = ZXDH_PI_FLAG | ZXDH_PI_TYPE_PI;
+        net_hdr_dl->pipd_hdr_dl.pi_hdr.pkt_type = ZXDH_PKT_FORM_CPU;
+        hw->dl_net_hdr_len = ZXDH_DL_NET_HDR_SIZE;
+
+    } else {
+        net_hdr_dl->type_hdr.port = ZXDH_PORT_NP;
+        net_hdr_dl->type_hdr.pd_len = ZXDH_DL_NET_HDR_NOPI_SIZE >> 1;
+        hw->dl_net_hdr_len = ZXDH_DL_NET_HDR_NOPI_SIZE;
+    }
+}
+
 static int32_t
 zxdh_features_update(struct zxdh_hw *hw,
         const struct rte_eth_rxmode *rxmode,
@@ -451,23 +474,6 @@ zxdh_features_update(struct zxdh_hw *hw,
     return 0;
 }
 
-static bool
-zxdh_rx_offload_enabled(struct zxdh_hw *hw)
-{
-    return zxdh_pci_with_feature(hw, ZXDH_NET_F_GUEST_CSUM) ||
-           zxdh_pci_with_feature(hw, ZXDH_NET_F_GUEST_TSO4) ||
-           zxdh_pci_with_feature(hw, ZXDH_NET_F_GUEST_TSO6);
-}
-
-static bool
-zxdh_tx_offload_enabled(struct zxdh_hw *hw)
-{
-    return zxdh_pci_with_feature(hw, ZXDH_NET_F_CSUM) ||
-           zxdh_pci_with_feature(hw, ZXDH_NET_F_HOST_TSO4) ||
-           zxdh_pci_with_feature(hw, ZXDH_NET_F_HOST_TSO6) ||
-           zxdh_pci_with_feature(hw, ZXDH_NET_F_HOST_UFO);
-}
-
 static void
 zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
 {
@@ -892,6 +898,7 @@ zxdh_dev_configure(struct rte_eth_dev *dev)
     const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
     const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
     struct zxdh_hw *hw = dev->data->dev_private;
+    uint64_t rx_offloads = rxmode->offloads;
     int32_t  ret = 0;
 
     if (dev->data->nb_rx_queues > hw->max_queue_pairs ||
@@ -932,6 +939,9 @@ zxdh_dev_configure(struct rte_eth_dev *dev)
         }
     }
 
+    if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
+        hw->vlan_offload_cfg.vlan_strip = 1;
+
     hw->has_tx_offload = zxdh_tx_offload_enabled(hw);
     hw->has_rx_offload = zxdh_rx_offload_enabled(hw);
 
@@ -982,6 +992,7 @@ zxdh_dev_configure(struct rte_eth_dev *dev)
 
 end:
     zxdh_dev_conf_offload(dev);
+    zxdh_update_net_hdr_dl(hw);
     return ret;
 }
 
diff --git a/drivers/net/zxdh/zxdh_ethdev.h b/drivers/net/zxdh/zxdh_ethdev.h
index c42f638c8d..7fe561ae24 100644
--- a/drivers/net/zxdh/zxdh_ethdev.h
+++ b/drivers/net/zxdh/zxdh_ethdev.h
@@ -58,6 +58,14 @@ struct zxdh_chnl_context {
     uint16_t ph_chno;
 };
 
+struct zxdh_vlan_offload_cfg {
+    uint8_t vlan_strip:1;
+    uint8_t vlan_filter:1;
+    uint8_t vlan_extend:1;
+    uint8_t qinq_strip:1;
+    uint8_t resv:4;
+};
+
 struct zxdh_hw {
     struct rte_eth_dev *eth_dev;
     struct zxdh_pci_common_cfg *common_cfg;
@@ -89,11 +97,10 @@ struct zxdh_hw {
     uint16_t *notify_base;
     uint8_t *isr;
 
-    uint8_t weak_barriers;
     uint8_t intr_enabled;
     uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
-
     uint8_t use_msix;
+
     uint8_t duplex;
     uint8_t is_pf;
     uint8_t msg_chan_init;
@@ -112,7 +119,9 @@ struct zxdh_hw {
     uint8_t que_set_flag;
     uint16_t queue_pool_count;
     uint16_t queue_pool_start;
-    uint8_t rsv[3];
+    struct zxdh_vlan_offload_cfg vlan_offload_cfg;
+    uint8_t dl_net_hdr_len;
+    uint8_t rsv[2];
 };
 
 struct zxdh_dtb_shared_data {
diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c
index 3e3bac2efc..d92d3bcab9 100644
--- a/drivers/net/zxdh/zxdh_queue.c
+++ b/drivers/net/zxdh/zxdh_queue.c
@@ -291,12 +291,10 @@ zxdh_dev_tx_queue_setup(struct rte_eth_dev *dev,
 int32_t
 zxdh_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
-    struct zxdh_hw *hw = dev->data->dev_private;
     struct zxdh_virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
     struct zxdh_virtqueue *vq = rxvq->vq;
 
     zxdh_queue_enable_intr(vq);
-    zxdh_mb(hw->weak_barriers);
     return 0;
 }
 
@@ -314,7 +312,6 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
             struct rte_mbuf **cookie, uint16_t num)
 {
     struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
-    struct zxdh_hw *hw = vq->hw;
     struct zxdh_vq_desc_extra *dxp;
     uint16_t flags = vq->vq_packed.cached_flags;
     int32_t i;
@@ -328,10 +325,8 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
         /* rx pkt fill in data_off */
         start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
         start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
-        vq->vq_desc_head_idx = dxp->next;
-        if (vq->vq_desc_head_idx == ZXDH_VQ_RING_DESC_CHAIN_END)
-            vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
-        zxdh_queue_store_flags_packed(&start_dp[idx], flags, hw->weak_barriers);
+
+        zxdh_queue_store_flags_packed(&start_dp[idx], flags);
         if (++vq->vq_avail_idx >= vq->vq_nentries) {
             vq->vq_avail_idx -= vq->vq_nentries;
             vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index ba946dee29..05f391f080 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -49,13 +49,23 @@ enum { ZXDH_VTNET_RQ = 0, ZXDH_VTNET_TQ = 1 };
 
 #define ZXDH_RQ_QUEUE_IDX                 0
 #define ZXDH_TQ_QUEUE_IDX                 1
+#define ZXDH_UL_1588_HDR_SIZE             8
 #define ZXDH_TYPE_HDR_SIZE        sizeof(struct zxdh_type_hdr)
 #define ZXDH_PI_HDR_SIZE          sizeof(struct zxdh_pi_hdr)
 #define ZXDH_DL_NET_HDR_SIZE      sizeof(struct zxdh_net_hdr_dl)
 #define ZXDH_UL_NET_HDR_SIZE      sizeof(struct zxdh_net_hdr_ul)
+#define ZXDH_DL_PD_HDR_SIZE       sizeof(struct zxdh_pd_hdr_dl)
+#define ZXDH_UL_PD_HDR_SIZE       sizeof(struct zxdh_pd_hdr_ul)
+#define ZXDH_DL_NET_HDR_NOPI_SIZE   (ZXDH_TYPE_HDR_SIZE + \
+                                    ZXDH_DL_PD_HDR_SIZE)
+#define ZXDH_UL_NOPI_HDR_SIZE_MAX   (ZXDH_TYPE_HDR_SIZE + \
+                                    ZXDH_UL_PD_HDR_SIZE + \
+                                    ZXDH_UL_1588_HDR_SIZE)
 #define ZXDH_PD_HDR_SIZE_MAX              256
 #define ZXDH_PD_HDR_SIZE_MIN              ZXDH_TYPE_HDR_SIZE
 
+#define rte_packet_prefetch(p)      do {} while (0)
+
 /*
  * ring descriptors: 16 bytes.
  * These can chain together via "next".
@@ -192,18 +202,29 @@ struct __rte_packed_begin zxdh_pi_hdr {
 } __rte_packed_end; /* 32B */
 
 struct __rte_packed_begin zxdh_pd_hdr_dl {
-    uint32_t ol_flag;
+    uint16_t ol_flag;
+    uint8_t rsv;
+    uint8_t panel_id;
+
+    uint16_t svlan_insert;
+    uint16_t cvlan_insert;
+
     uint8_t tag_idx;
     uint8_t tag_data;
     uint16_t dst_vfid;
-    uint32_t svlan_insert;
-    uint32_t cvlan_insert;
 } __rte_packed_end; /* 16B */
 
-struct __rte_packed_begin zxdh_net_hdr_dl {
-    struct zxdh_type_hdr  type_hdr; /* 4B */
+struct __rte_packed_begin zxdh_pipd_hdr_dl {
     struct zxdh_pi_hdr    pi_hdr; /* 32B */
-    struct zxdh_pd_hdr_dl pd_hdr; /* 16B */
+    struct zxdh_pd_hdr_dl pd_hdr; /* 12B */
+} __rte_packed_end; /* 44B */
+
+struct __rte_packed_begin zxdh_net_hdr_dl {
+    struct zxdh_type_hdr type_hdr; /* 4B */
+    union {
+        struct zxdh_pd_hdr_dl pd_hdr; /* 12B */
+        struct zxdh_pipd_hdr_dl pipd_hdr_dl; /* 44B */
+    };
 } __rte_packed_end;
 
 struct __rte_packed_begin zxdh_pd_hdr_ul {
@@ -211,17 +232,27 @@ struct __rte_packed_begin zxdh_pd_hdr_ul {
     uint32_t rss_hash;
     uint32_t fd;
     uint32_t striped_vlan_tci;
+
+    uint16_t pkt_type_out;
+    uint16_t pkt_type_in;
+    uint16_t pkt_len;
+
     uint8_t tag_idx;
     uint8_t tag_data;
     uint16_t src_vfid;
-    uint16_t pkt_type_out;
-    uint16_t pkt_type_in;
 } __rte_packed_end; /* 24B */
 
-struct __rte_packed_begin zxdh_net_hdr_ul {
-    struct zxdh_type_hdr  type_hdr; /* 4B */
+struct __rte_packed_begin zxdh_pipd_hdr_ul {
     struct zxdh_pi_hdr    pi_hdr; /* 32B */
-    struct zxdh_pd_hdr_ul pd_hdr; /* 24B */
+    struct zxdh_pd_hdr_ul pd_hdr; /* 26B */
+} __rte_packed_end;
+
+struct __rte_packed_begin zxdh_net_hdr_ul {
+    struct zxdh_type_hdr type_hdr; /* 4B */
+    union {
+        struct zxdh_pd_hdr_ul   pd_hdr; /* 26 */
+        struct zxdh_pipd_hdr_ul pipd_hdr_ul; /* 58B */
+    };
 } __rte_packed_end; /* 60B */
 
 
@@ -316,6 +347,19 @@ zxdh_mb(uint8_t weak_barriers)
         rte_mb();
 }
 
+static inline
+int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
+{
+    uint16_t flags;
+    uint16_t used, avail;
+
+    flags = desc->flags;
+    rte_io_rmb();
+    used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
+    avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
+    return avail == used && used == vq->vq_packed.used_wrap_counter;
+}
+
 static inline int32_t
 zxdh_queue_full(const struct zxdh_virtqueue *vq)
 {
@@ -323,48 +367,22 @@ zxdh_queue_full(const struct zxdh_virtqueue *vq)
 }
 
 static inline void
-zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp,
-        uint16_t flags, uint8_t weak_barriers)
-    {
-    if (weak_barriers) {
-    #ifdef RTE_ARCH_X86_64
-        rte_io_wmb();
-        dp->flags = flags;
-    #else
-        rte_atomic_store_explicit(&dp->flags, flags, rte_memory_order_release);
-    #endif
-    } else {
-        rte_io_wmb();
-        dp->flags = flags;
-    }
-}
-
-static inline uint16_t
-zxdh_queue_fetch_flags_packed(struct zxdh_vring_packed_desc *dp,
-        uint8_t weak_barriers)
-    {
-    uint16_t flags;
-    if (weak_barriers) {
-    #ifdef RTE_ARCH_X86_64
-        flags = dp->flags;
-        rte_io_rmb();
-    #else
-        flags = rte_atomic_load_explicit(&dp->flags, rte_memory_order_acquire);
-    #endif
-    } else {
-        flags = dp->flags;
-        rte_io_rmb();
-    }
-
-    return flags;
+zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
+{
+    rte_io_wmb();
+    dp->flags = flags;
 }
 
 static inline int32_t
 zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
 {
-    uint16_t flags = zxdh_queue_fetch_flags_packed(desc, vq->hw->weak_barriers);
-    uint16_t used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
-    uint16_t avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
+    uint16_t flags;
+    uint16_t used, avail;
+
+    flags = desc->flags;
+    rte_io_rmb();
+    used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
+    avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
     return avail == used && used == vq->vq_packed.used_wrap_counter;
 }
 
@@ -378,12 +396,14 @@ zxdh_queue_kick_prepare_packed(struct zxdh_virtqueue *vq)
 {
     uint16_t flags = 0;
 
-    zxdh_mb(vq->hw->weak_barriers);
+    zxdh_mb(1);
     flags = vq->vq_packed.ring.device->desc_event_flags;
 
     return (flags != ZXDH_RING_EVENT_FLAGS_DISABLE);
 }
 
+extern struct zxdh_net_hdr_dl g_net_hdr_dl[RTE_MAX_ETHPORTS];
+
 struct rte_mbuf *zxdh_queue_detach_unused(struct zxdh_virtqueue *vq);
 int32_t zxdh_free_queues(struct rte_eth_dev *dev);
 int32_t zxdh_get_queue_type(uint16_t vtpci_queue_idx);
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index 6353d496f2..5c1795ce1d 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -9,6 +9,8 @@
 
 #include "zxdh_logs.h"
 #include "zxdh_pci.h"
+#include "zxdh_common.h"
+#include "zxdh_rxtx.h"
 #include "zxdh_queue.h"
 
 #define ZXDH_SVLAN_TPID                       0x88a8
@@ -24,8 +26,12 @@
 #define ZXDH_PI_L3TYPE_RSV                    0xC0
 #define ZXDH_PI_L3TYPE_MASK                   0xC0
 
+#define  ZXDH_PD_OFFLOAD_SPEC_PHYPORT         (1 << 15)
 #define  ZXDH_PD_OFFLOAD_SVLAN_INSERT         (1 << 14)
 #define  ZXDH_PD_OFFLOAD_CVLAN_INSERT         (1 << 13)
+#define  ZXDH_PD_OFFLOAD_OUTER_IPCSUM         (1 << 12)
+#define  ZXDH_PD_OFFLOAD_PRIO_MASK            (0x7 << 8)
+#define  ZXDH_PD_OFFLOAD_DELAY_STAT           (1 << 7)
 
 #define ZXDH_PCODE_MASK                       0x1F
 #define ZXDH_PCODE_IP_PKT_TYPE                0x01
@@ -34,9 +40,78 @@
 #define ZXDH_PCODE_NO_IP_PKT_TYPE             0x09
 #define ZXDH_PCODE_NO_REASSMBLE_TCP_PKT_TYPE  0x0C
 
+/* Uplink pd header byte0~1 */
+#define ZXDH_MBUF_F_RX_OUTER_L4_CKSUM_GOOD               0x00080000
+#define ZXDH_MBUF_F_RX_QINQ                              0x00100000
+#define ZXDH_MBUF_F_RX_SEC_OFFLOAD                       0x00200000
+#define ZXDH_MBUF_F_RX_QINQ_STRIPPED                     0x00400000
+#define FELX_4BYTE                                       0x00800000
+#define FELX_8BYTE                                       0x01000000
+#define ZXDH_MBUF_F_RX_FDIR_FLX_MASK                     0x01800000
+#define ZXDH_MBUF_F_RX_FDIR_ID                           0x02000000
+#define ZXDH_MBUF_F_RX_1588_TMST                         0x04000000
+#define ZXDH_MBUF_F_RX_1588_PTP                          0x08000000
+#define ZXDH_MBUF_F_RX_VLAN_STRIPPED                     0x10000000
+#define ZXDH_MBUF_F_RX_OUTER_IP_CKSUM_BAD                0x20000000
+#define ZXDH_MBUF_F_RX_FDIR                              0x40000000
+#define ZXDH_MBUF_F_RX_RSS_HASH                          0x80000000
+
+/* Outer/Inner L2 type */
+#define ZXDH_PD_L2TYPE_MASK                              0xf000
+#define ZXDH_PTYPE_L2_ETHER                              0x1000
+#define ZXDH_PTYPE_L2_ETHER_TIMESYNC                     0x2000
+#define ZXDH_PTYPE_L2_ETHER_ARP                          0x3000
+#define ZXDH_PTYPE_L2_ETHER_LLDP                         0x4000
+#define ZXDH_PTYPE_L2_ETHER_NSH                          0x5000
+#define ZXDH_PTYPE_L2_ETHER_VLAN                         0x6000
+#define ZXDH_PTYPE_L2_ETHER_QINQ                         0x7000
+#define ZXDH_PTYPE_L2_ETHER_PPPOE                        0x8000
+#define ZXDH_PTYPE_L2_ETHER_FCOE                         0x9000
+#define ZXDH_PTYPE_L2_ETHER_MPLS                         0xa000
+
+/* Outer/Inner L3 type */
+#define ZXDH_PD_L3TYPE_MASK                              0x0f00
+#define ZXDH_PTYPE_L3_IPV4                               0x0100
+#define ZXDH_PTYPE_L3_IPV4_EXT                           0x0200
+#define ZXDH_PTYPE_L3_IPV6                               0x0300
+#define ZXDH_PTYPE_L3_IPV4_EXT_UNKNOWN                   0x0400
+#define ZXDH_PTYPE_L3_IPV6_EXT                           0x0500
+#define ZXDH_PTYPE_L3_IPV6_EXT_UNKNOWN                   0x0600
+
+/* Outer/Inner L4 type */
+#define ZXDH_PD_L4TYPE_MASK    0x00f0
+#define ZXDH_PTYPE_L4_TCP      0x0010
+#define ZXDH_PTYPE_L4_UDP      0x0020
+#define ZXDH_PTYPE_L4_FRAG     0x0030
+#define ZXDH_PTYPE_L4_SCTP     0x0040
+#define ZXDH_PTYPE_L4_ICMP     0x0050
+#define ZXDH_PTYPE_L4_NONFRAG  0x0060
+#define ZXDH_PTYPE_L4_IGMP     0x0070
+
 #define ZXDH_TX_MAX_SEGS                      31
 #define ZXDH_RX_MAX_SEGS                      31
 
+#define ZXDH_PI_LRO_FALG    0x00000001
+
+#define ZXDH_MIN_MSS                                     64
+#define ZXDH_VLAN_ID_MASK                                0xfff
+
+#define ZXDH_MTU_MSS_UNIT_SHIFTBIT                       2
+#define ZXDH_MTU_MSS_MASK                                0xFFF
+#define ZXDH_PD_HDR_SIZE_MAX                             256
+
+/* error code */
+#define ZXDH_UDP_CSUM_ERR  0x0020
+#define ZXDH_TCP_CSUM_ERR  0x0040
+#define ZXDH_IPV4_CSUM_ERR 0x0100
+
+#define ZXDH_DTPOFFLOAD_MASK ( \
+        RTE_MBUF_F_TX_IP_CKSUM |        \
+        RTE_MBUF_F_TX_L4_MASK |         \
+        RTE_MBUF_F_TX_TCP_SEG |         \
+        RTE_MBUF_F_TX_SEC_OFFLOAD |     \
+        RTE_MBUF_F_TX_UDP_SEG)
+
 uint32_t zxdh_outer_l2_type[16] = {
     0,
     RTE_PTYPE_L2_ETHER,
@@ -161,259 +236,196 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
     vq->vq_free_cnt += free_cnt;
 }
 
-static void
-zxdh_ring_free_id_packed(struct zxdh_virtqueue *vq, uint16_t id)
+static inline uint16_t
+zxdh_get_mtu(struct zxdh_virtqueue *vq)
 {
-    struct zxdh_vq_desc_extra *dxp = NULL;
-
-    dxp = &vq->vq_descx[id];
-    vq->vq_free_cnt += dxp->ndescs;
-
-    if (vq->vq_desc_tail_idx == ZXDH_VQ_RING_DESC_CHAIN_END)
-        vq->vq_desc_head_idx = id;
-    else
-        vq->vq_descx[vq->vq_desc_tail_idx].next = id;
+    struct rte_eth_dev *eth_dev = vq->hw->eth_dev;
 
-    vq->vq_desc_tail_idx = id;
-    dxp->next = ZXDH_VQ_RING_DESC_CHAIN_END;
-}
-
-static void
-zxdh_xmit_cleanup_normal_packed(struct zxdh_virtqueue *vq, int32_t num)
-{
-    uint16_t used_idx = 0;
-    uint16_t id = 0;
-    uint16_t size = vq->vq_nentries;
-    struct zxdh_vring_packed_desc *desc = vq->vq_packed.ring.desc;
-    struct zxdh_vq_desc_extra *dxp = NULL;
-
-    used_idx = vq->vq_used_cons_idx;
-    /* desc_is_used has a load-acquire or rte_io_rmb inside
-     * and wait for used desc in virtqueue.
-     */
-    while (num-- && zxdh_desc_used(&desc[used_idx], vq)) {
-        id = desc[used_idx].id;
-        dxp = &vq->vq_descx[id];
-        vq->vq_used_cons_idx += dxp->ndescs;
-        if (vq->vq_used_cons_idx >= size) {
-            vq->vq_used_cons_idx -= size;
-            vq->vq_packed.used_wrap_counter ^= 1;
-        }
-        zxdh_ring_free_id_packed(vq, id);
-        if (dxp->cookie != NULL) {
-            rte_pktmbuf_free(dxp->cookie);
-            dxp->cookie = NULL;
-        }
-        used_idx = vq->vq_used_cons_idx;
-    }
+    return eth_dev->data->mtu;
 }
 
 static void
-zxdh_xmit_cleanup_packed(struct zxdh_virtqueue *vq, int32_t num, int32_t in_order)
-{
-    if (in_order)
-        zxdh_xmit_cleanup_inorder_packed(vq, num);
-    else
-        zxdh_xmit_cleanup_normal_packed(vq, num);
-}
-
-static uint8_t
-zxdh_xmit_get_ptype(struct rte_mbuf *m)
-{
-    uint8_t pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
-    uint8_t l3_ptype = ZXDH_PI_L3TYPE_NOIP;
-
-    if ((m->packet_type & RTE_PTYPE_INNER_L3_MASK) == RTE_PTYPE_INNER_L3_IPV4 ||
-            ((!(m->packet_type & RTE_PTYPE_TUNNEL_MASK)) &&
-            (m->packet_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4)) {
-        l3_ptype = ZXDH_PI_L3TYPE_IP;
-        pcode = ZXDH_PCODE_IP_PKT_TYPE;
-    } else if ((m->packet_type & RTE_PTYPE_INNER_L3_MASK) == RTE_PTYPE_INNER_L3_IPV6 ||
-            ((!(m->packet_type & RTE_PTYPE_TUNNEL_MASK)) &&
-            (m->packet_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6)) {
-        l3_ptype = ZXDH_PI_L3TYPE_IPV6;
-        pcode = ZXDH_PCODE_IP_PKT_TYPE;
-    } else {
-        goto end;
-    }
-
-    if ((m->packet_type & RTE_PTYPE_INNER_L4_MASK) == RTE_PTYPE_INNER_L4_TCP ||
-            ((!(m->packet_type & RTE_PTYPE_TUNNEL_MASK)) &&
-            (m->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP))
-        pcode = ZXDH_PCODE_TCP_PKT_TYPE;
-    else if ((m->packet_type & RTE_PTYPE_INNER_L4_MASK) == RTE_PTYPE_INNER_L4_UDP ||
-                ((!(m->packet_type & RTE_PTYPE_TUNNEL_MASK)) &&
-                (m->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP))
-        pcode = ZXDH_PCODE_UDP_PKT_TYPE;
-
-end:
-    return  l3_ptype | ZXDH_PKT_FORM_CPU | pcode;
-}
-
-static void zxdh_xmit_fill_net_hdr(struct rte_mbuf *cookie,
+zxdh_xmit_fill_net_hdr(struct zxdh_virtqueue *vq, struct rte_mbuf *cookie,
                 struct zxdh_net_hdr_dl *hdr)
 {
+    uint16_t mtu_or_mss = 0;
     uint16_t pkt_flag_lw16 = ZXDH_NO_IPID_UPDATE;
     uint16_t l3_offset;
+    uint8_t pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+    uint8_t l3_ptype = ZXDH_PI_L3TYPE_NOIP;
+    struct zxdh_pi_hdr *pi_hdr = NULL;
+    struct zxdh_pd_hdr_dl *pd_hdr = NULL;
+    struct zxdh_hw *hw = vq->hw;
+    struct zxdh_net_hdr_dl *net_hdr_dl = &g_net_hdr_dl[hw->port_id];
+    uint8_t hdr_len = hw->dl_net_hdr_len;
     uint32_t ol_flag = 0;
 
-    hdr->pi_hdr.pkt_flag_lw16 = rte_be_to_cpu_16(pkt_flag_lw16);
+    rte_memcpy(hdr, net_hdr_dl, hdr_len);
+    if (hw->has_tx_offload) {
+        pi_hdr = &hdr->pipd_hdr_dl.pi_hdr;
+        pd_hdr = &hdr->pipd_hdr_dl.pd_hdr;
 
-    hdr->pi_hdr.pkt_type = zxdh_xmit_get_ptype(cookie);
-    l3_offset = ZXDH_DL_NET_HDR_SIZE + cookie->outer_l2_len +
-                cookie->outer_l3_len + cookie->l2_len;
-    hdr->pi_hdr.l3_offset = rte_be_to_cpu_16(l3_offset);
-    hdr->pi_hdr.l4_offset = rte_be_to_cpu_16(l3_offset + cookie->l3_len);
+        pcode = ZXDH_PCODE_IP_PKT_TYPE;
+        if (cookie->ol_flags & RTE_MBUF_F_TX_IPV6)
+            l3_ptype = ZXDH_PI_L3TYPE_IPV6;
+        else if (cookie->ol_flags & RTE_MBUF_F_TX_IPV4)
+            l3_ptype = ZXDH_PI_L3TYPE_IP;
+        else
+            pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+
+        if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+            mtu_or_mss = (cookie->tso_segsz >= ZXDH_MIN_MSS)
+                ? cookie->tso_segsz
+                : ZXDH_MIN_MSS;
+            pi_hdr->pkt_flag_hi8  |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+            pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT | ZXDH_TX_IP_CKSUM_CAL;
+            pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+        } else if (cookie->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
+            mtu_or_mss = zxdh_get_mtu(vq);
+            mtu_or_mss = (mtu_or_mss >= ZXDH_MIN_MSS) ? mtu_or_mss : ZXDH_MIN_MSS;
+            pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+            pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT | ZXDH_TX_TCPUDP_CKSUM_CAL;
+            pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+        } else {
+            pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT;
+            pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT;
+        }
 
-    if (cookie->ol_flags & RTE_MBUF_F_TX_VLAN) {
-        ol_flag |= ZXDH_PD_OFFLOAD_CVLAN_INSERT;
-        hdr->pi_hdr.vlan_id = rte_be_to_cpu_16(cookie->vlan_tci);
-        hdr->pd_hdr.cvlan_insert =
-            rte_be_to_cpu_32((ZXDH_CVLAN_TPID << 16) | cookie->vlan_tci);
+        if (cookie->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
+            pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+
+        if ((cookie->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) ==
+            RTE_MBUF_F_TX_UDP_CKSUM){
+            pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+            pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+        } else if ((cookie->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) ==
+                 RTE_MBUF_F_TX_TCP_CKSUM) {
+            pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+            pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+        }
+
+        pkt_flag_lw16 |= (mtu_or_mss >> ZXDH_MTU_MSS_UNIT_SHIFTBIT) & ZXDH_MTU_MSS_MASK;
+        pi_hdr->pkt_flag_lw16 = rte_be_to_cpu_16(pkt_flag_lw16);
+        pi_hdr->pkt_type = l3_ptype | ZXDH_PKT_FORM_CPU | pcode;
+
+        l3_offset = hdr_len + cookie->l2_len;
+        l3_offset += (cookie->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
+                    cookie->outer_l2_len + cookie->outer_l3_len : 0;
+        pi_hdr->l3_offset = rte_be_to_cpu_16(l3_offset);
+        pi_hdr->l4_offset = rte_be_to_cpu_16(l3_offset + cookie->l3_len);
+        if (cookie->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
+            ol_flag |= ZXDH_PD_OFFLOAD_OUTER_IPCSUM;
+    } else {
+        pd_hdr = &hdr->pd_hdr;
     }
-    if (cookie->ol_flags & RTE_MBUF_F_TX_QINQ) {
-        ol_flag |= ZXDH_PD_OFFLOAD_SVLAN_INSERT;
-        hdr->pd_hdr.svlan_insert =
-            rte_be_to_cpu_32((ZXDH_SVLAN_TPID << 16) | cookie->vlan_tci_outer);
+
+    if (cookie->ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+        ol_flag |= ZXDH_PD_OFFLOAD_CVLAN_INSERT;
+        pd_hdr->cvlan_insert = rte_be_to_cpu_16(cookie->vlan_tci);
+        if (unlikely(cookie->ol_flags & RTE_MBUF_F_TX_QINQ)) {
+            ol_flag |= ZXDH_PD_OFFLOAD_SVLAN_INSERT;
+            pd_hdr->svlan_insert = rte_be_to_cpu_16(cookie->vlan_tci_outer);
+        }
     }
 
-    hdr->pd_hdr.ol_flag = rte_be_to_cpu_32(ol_flag);
+    pd_hdr->ol_flag = rte_be_to_cpu_16(ol_flag);
 }
 
-static inline void zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
-                        struct rte_mbuf *cookie, int32_t in_order)
+static inline void
+zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
+                        struct rte_mbuf *cookie)
 {
     struct zxdh_virtqueue *vq = txvq->vq;
-    uint16_t id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
+    uint16_t id = vq->vq_avail_idx;
     struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
     uint16_t flags = vq->vq_packed.cached_flags;
     struct zxdh_net_hdr_dl *hdr = NULL;
+    uint8_t hdr_len = vq->hw->dl_net_hdr_len;
+    struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
 
     dxp->ndescs = 1;
     dxp->cookie = cookie;
-    hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -ZXDH_DL_NET_HDR_SIZE);
-    zxdh_xmit_fill_net_hdr(cookie, hdr);
-
-    uint16_t idx = vq->vq_avail_idx;
-    struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[idx];
+    hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -hdr_len);
+    zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
 
-    dp->addr = rte_pktmbuf_iova(cookie) - ZXDH_DL_NET_HDR_SIZE;
-    dp->len  = cookie->data_len + ZXDH_DL_NET_HDR_SIZE;
+    dp->addr = rte_pktmbuf_iova(cookie) - hdr_len;
+    dp->len  = cookie->data_len + hdr_len;
     dp->id   = id;
     if (++vq->vq_avail_idx >= vq->vq_nentries) {
         vq->vq_avail_idx -= vq->vq_nentries;
         vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
     }
     vq->vq_free_cnt--;
-    if (!in_order) {
-        vq->vq_desc_head_idx = dxp->next;
-        if (vq->vq_desc_head_idx == ZXDH_VQ_RING_DESC_CHAIN_END)
-            vq->vq_desc_tail_idx = ZXDH_VQ_RING_DESC_CHAIN_END;
-        }
-        zxdh_queue_store_flags_packed(dp, flags, vq->hw->weak_barriers);
+    zxdh_queue_store_flags_packed(dp, flags);
 }
 
-static inline void zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
+static inline void
+zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
                         struct rte_mbuf *cookie,
-                        uint16_t needed,
-                        int32_t use_indirect,
-                        int32_t in_order)
+                        uint16_t needed)
 {
     struct zxdh_tx_region *txr = txvq->zxdh_net_hdr_mz->addr;
     struct zxdh_virtqueue *vq = txvq->vq;
-    struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
-    void *hdr = NULL;
+    uint16_t id = vq->vq_avail_idx;
+    struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
     uint16_t head_idx = vq->vq_avail_idx;
     uint16_t idx = head_idx;
-    uint16_t prev = head_idx;
-    uint16_t head_flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
-    uint16_t seg_num = cookie->nb_segs;
-    uint16_t id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
+    struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
     struct zxdh_vring_packed_desc *head_dp = &vq->vq_packed.ring.desc[idx];
-    struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+    struct zxdh_net_hdr_dl *hdr = NULL;
+
+    uint16_t head_flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+    uint8_t hdr_len = vq->hw->dl_net_hdr_len;
 
     dxp->ndescs = needed;
     dxp->cookie = cookie;
     head_flags |= vq->vq_packed.cached_flags;
-    /* if offload disabled, it is not zeroed below, do it now */
 
-    if (use_indirect) {
-        /**
-         * setup tx ring slot to point to indirect
-         * descriptor list stored in reserved region.
-         * the first slot in indirect ring is already
-         * preset to point to the header in reserved region
-         **/
-        start_dp[idx].addr =
-            txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_packed_indir, txr);
-        start_dp[idx].len  = (seg_num + 1) * sizeof(struct zxdh_vring_packed_desc);
-        /* Packed descriptor id needs to be restored when inorder. */
-        if (in_order)
-            start_dp[idx].id = idx;
-
-        /* reset flags for indirect desc */
-        head_flags = ZXDH_VRING_DESC_F_INDIRECT;
-        head_flags |= vq->vq_packed.cached_flags;
-        hdr = (void *)&txr[idx].tx_hdr;
-        /* loop below will fill in rest of the indirect elements */
-        start_dp = txr[idx].tx_packed_indir;
-        start_dp->len = ZXDH_DL_NET_HDR_SIZE; /* update actual net or type hdr size */
-        idx = 1;
-    } else {
-        /* setup first tx ring slot to point to header stored in reserved region. */
-        start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
-        start_dp[idx].len  = ZXDH_DL_NET_HDR_SIZE;
-        head_flags |= ZXDH_VRING_DESC_F_NEXT;
-        hdr = (void *)&txr[idx].tx_hdr;
-        idx++;
-        if (idx >= vq->vq_nentries) {
-            idx -= vq->vq_nentries;
-            vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
-        }
+    start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
+    start_dp[idx].len  = hdr_len;
+    head_flags |= ZXDH_VRING_DESC_F_NEXT;
+    hdr = (void *)&txr[idx].tx_hdr;
+
+    rte_prefetch1(hdr);
+    idx++;
+    if (idx >= vq->vq_nentries) {
+        idx -= vq->vq_nentries;
+        vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
     }
-    zxdh_xmit_fill_net_hdr(cookie, (struct zxdh_net_hdr_dl *)hdr);
+
+    zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
 
     do {
         start_dp[idx].addr = rte_pktmbuf_iova(cookie);
         start_dp[idx].len  = cookie->data_len;
+        start_dp[idx].id = id;
         if (likely(idx != head_idx)) {
             uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+
             flags |= vq->vq_packed.cached_flags;
             start_dp[idx].flags = flags;
         }
-        prev = idx;
+
         idx++;
         if (idx >= vq->vq_nentries) {
             idx -= vq->vq_nentries;
             vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
         }
     } while ((cookie = cookie->next) != NULL);
-    start_dp[prev].id = id;
-    if (use_indirect) {
-        idx = head_idx;
-        if (++idx >= vq->vq_nentries) {
-            idx -= vq->vq_nentries;
-            vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
-        }
-    }
+
     vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
     vq->vq_avail_idx = idx;
-    if (!in_order) {
-        vq->vq_desc_head_idx = dxp->next;
-        if (vq->vq_desc_head_idx == ZXDH_VQ_RING_DESC_CHAIN_END)
-            vq->vq_desc_tail_idx = ZXDH_VQ_RING_DESC_CHAIN_END;
-    }
-    zxdh_queue_store_flags_packed(head_dp, head_flags, vq->hw->weak_barriers);
+
+    zxdh_queue_store_flags_packed(head_dp, head_flags);
 }
 
 static void
 zxdh_update_packet_stats(struct zxdh_virtnet_stats *stats, struct rte_mbuf *mbuf)
 {
     uint32_t s = mbuf->pkt_len;
-    struct rte_ether_addr *ea = NULL;
 
     stats->bytes += s;
-
+    #ifdef QUEUE_XSTAT
     if (s == 64) {
         stats->size_bins[1]++;
     } else if (s > 64 && s < 1024) {
@@ -438,6 +450,45 @@ zxdh_update_packet_stats(struct zxdh_virtnet_stats *stats, struct rte_mbuf *mbuf
         else
             stats->multicast++;
     }
+    #endif
+}
+
+static void
+zxdh_xmit_flush(struct zxdh_virtqueue *vq)
+{
+    uint16_t id       = 0;
+    uint16_t curr_id  = 0;
+    uint16_t free_cnt = 0;
+    uint16_t size     = vq->vq_nentries;
+    struct zxdh_vring_packed_desc *desc = vq->vq_packed.ring.desc;
+    struct zxdh_vq_desc_extra     *dxp  = NULL;
+    uint16_t used_idx = vq->vq_used_cons_idx;
+
+    /*
+     * The function desc_is_used performs a load-acquire operation
+     * or calls rte_io_rmb to ensure memory consistency. It waits
+     * for a used descriptor in the virtqueue.
+     */
+    while (desc_is_used(&desc[used_idx], vq)) {
+        id = desc[used_idx].id;
+        do {
+            curr_id = used_idx;
+            dxp = &vq->vq_descx[used_idx];
+            used_idx += dxp->ndescs;
+            free_cnt += dxp->ndescs;
+            //num -= dxp->ndescs;
+            if (used_idx >= size) {
+                used_idx -= size;
+                vq->vq_packed.used_wrap_counter ^= 1;
+            }
+            if (dxp->cookie != NULL) {
+                rte_pktmbuf_free(dxp->cookie);
+                dxp->cookie = NULL;
+            }
+        } while (curr_id != id);
+    }
+    vq->vq_used_cons_idx = used_idx;
+    vq->vq_free_cnt += free_cnt;
 }
 
 uint16_t
@@ -445,33 +496,23 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
 {
     struct zxdh_virtnet_tx *txvq = tx_queue;
     struct zxdh_virtqueue  *vq   = txvq->vq;
-    struct zxdh_hw    *hw   = vq->hw;
     uint16_t nb_tx = 0;
 
-    bool in_order = zxdh_pci_with_feature(hw, ZXDH_F_IN_ORDER);
+    zxdh_xmit_flush(vq);
 
-    if (nb_pkts > vq->vq_free_cnt)
-        zxdh_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt, in_order);
     for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
         struct rte_mbuf *txm = tx_pkts[nb_tx];
         int32_t can_push     = 0;
-        int32_t use_indirect = 0;
         int32_t slots        = 0;
         int32_t need         = 0;
 
+        rte_prefetch0(txm);
         /* optimize ring usage */
-        if ((zxdh_pci_with_feature(hw, ZXDH_F_ANY_LAYOUT) ||
-            zxdh_pci_with_feature(hw, ZXDH_F_VERSION_1)) &&
-            rte_mbuf_refcnt_read(txm) == 1 &&
+        if (rte_mbuf_refcnt_read(txm) == 1 &&
             RTE_MBUF_DIRECT(txm) &&
             txm->nb_segs == 1 &&
-            rte_pktmbuf_headroom(txm) >= ZXDH_DL_NET_HDR_SIZE &&
-            rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
-            alignof(struct zxdh_net_hdr_dl))) {
+            txm->data_off >= ZXDH_DL_NET_HDR_SIZE) {
             can_push = 1;
-        } else if (zxdh_pci_with_feature(hw, ZXDH_RING_F_INDIRECT_DESC) &&
-                    txm->nb_segs < ZXDH_MAX_TX_INDIRECT) {
-            use_indirect = 1;
         }
         /**
          * How many main ring entries are needed to this Tx?
@@ -479,46 +520,50 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
          * any_layout => number of segments
          * default    => number of segments + 1
          **/
-        slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
+        slots = txm->nb_segs + !can_push;
         need = slots - vq->vq_free_cnt;
         /* Positive value indicates it need free vring descriptors */
         if (unlikely(need > 0)) {
-            zxdh_xmit_cleanup_packed(vq, need, in_order);
+            zxdh_xmit_cleanup_inorder_packed(vq, need);
             need = slots - vq->vq_free_cnt;
             if (unlikely(need > 0)) {
-                PMD_TX_LOG(ERR, "port[ep:%d, pf:%d, vf:%d, vfid:%d, pcieid:%d], queue:%d[pch:%d]. No free desc to xmit",
-                    hw->vport.epid, hw->vport.pfid, hw->vport.vfid,
-                    hw->vfid, hw->pcie_id, txvq->queue_id,
-                    hw->channel_context[txvq->queue_id].ph_chno);
+                PMD_TX_LOG(ERR,
+                        " No enough %d free tx descriptors to transmit."
+                        "freecnt %d",
+                        need,
+                        vq->vq_free_cnt);
                 break;
             }
         }
-        if (txm->nb_segs > ZXDH_TX_MAX_SEGS) {
-            PMD_TX_LOG(ERR, "%d segs dropped", txm->nb_segs);
-            txvq->stats.truncated_err += nb_pkts - nb_tx;
-            break;
-        }
+
         /* Enqueue Packet buffers */
         if (can_push)
-            zxdh_enqueue_xmit_packed_fast(txvq, txm, in_order);
+            zxdh_enqueue_xmit_packed_fast(txvq, txm);
         else
-            zxdh_enqueue_xmit_packed(txvq, txm, slots, use_indirect, in_order);
+            zxdh_enqueue_xmit_packed(txvq, txm, slots);
         zxdh_update_packet_stats(&txvq->stats, txm);
     }
     txvq->stats.packets += nb_tx;
-    if (likely(nb_tx)) {
-        if (unlikely(zxdh_queue_kick_prepare_packed(vq))) {
-            zxdh_queue_notify(vq);
-            PMD_TX_LOG(DEBUG, "Notified backend after xmit");
-        }
-    }
+    if (likely(nb_tx))
+        zxdh_queue_notify(vq);
     return nb_tx;
 }
 
+static inline int dl_net_hdr_check(struct rte_mbuf *m, struct zxdh_hw *hw)
+{
+    if ((m->ol_flags & ZXDH_DTPOFFLOAD_MASK) && !hw->has_tx_offload) {
+        PMD_TX_LOG(ERR, "port:[%d], vfid[%d]. "
+                    "not support tx_offload", hw->port_id, hw->vfid);
+        return -EINVAL;
+    }
+    return 0;
+}
+
 uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts,
                 uint16_t nb_pkts)
 {
     struct zxdh_virtnet_tx *txvq = tx_queue;
+    struct zxdh_hw *hw = txvq->vq->hw;
     uint16_t nb_tx;
 
     for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
@@ -544,11 +589,20 @@ uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts,
             rte_errno = ENOMEM;
             break;
         }
+
+        error = dl_net_hdr_check(m, hw);
+        if (unlikely(error)) {
+            rte_errno = ENOTSUP;
+            txvq->stats.errors += nb_pkts - nb_tx;
+            txvq->stats.offload_cfg_err += nb_pkts - nb_tx;
+            break;
+        }
     }
     return nb_tx;
 }
 
-static uint16_t zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
+static uint16_t
+zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
                     struct rte_mbuf **rx_pkts,
                     uint32_t *len,
                     uint16_t num)
@@ -575,6 +629,8 @@ static uint16_t zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
                 "vring descriptor with no mbuf cookie at %u", vq->vq_used_cons_idx);
             break;
         }
+        rte_prefetch0(cookie);
+        rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
         rx_pkts[i] = cookie;
         vq->vq_free_cnt++;
         vq->vq_used_cons_idx++;
@@ -586,15 +642,107 @@ static uint16_t zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
     return i;
 }
 
-static int32_t zxdh_rx_update_mbuf(struct rte_mbuf *m, struct zxdh_net_hdr_ul *hdr)
+static inline void
+zxdh_rx_update_mbuf(struct rte_mbuf *m, struct zxdh_net_hdr_ul *hdr)
 {
-    struct zxdh_pd_hdr_ul *pd_hdr = &hdr->pd_hdr;
-    struct zxdh_pi_hdr *pi_hdr = &hdr->pi_hdr;
+    uint8_t has_pi = (uint64_t)(hdr->type_hdr.pd_len << 1) > ZXDH_UL_NOPI_HDR_SIZE_MAX;
+    struct zxdh_pd_hdr_ul *pd_hdr = has_pi ? &hdr->pipd_hdr_ul.pd_hdr : &hdr->pd_hdr;
+    uint32_t pkt_flag = ntohl(pd_hdr->pkt_flag);
     uint32_t idx = 0;
+    uint32_t striped_vlan_tci = rte_be_to_cpu_32(pd_hdr->striped_vlan_tci);
+    uint16_t pkt_type_outer = rte_be_to_cpu_16(pd_hdr->pkt_type_out);
+    uint16_t pkt_type_inner = rte_be_to_cpu_16(pd_hdr->pkt_type_in);
 
-    m->pkt_len = rte_be_to_cpu_16(pi_hdr->ul.pkt_len);
+    if (unlikely(pkt_flag & (ZXDH_MBUF_F_RX_1588_PTP | ZXDH_MBUF_F_RX_1588_TMST))) {
+        if (pkt_flag & ZXDH_MBUF_F_RX_1588_PTP)
+            m->ol_flags |= RTE_MBUF_F_RX_IEEE1588_PTP;
+        if (pkt_flag & ZXDH_MBUF_F_RX_1588_TMST)
+            m->ol_flags |= RTE_MBUF_F_RX_IEEE1588_TMST;
+    }
 
-    uint16_t pkt_type_outer = rte_be_to_cpu_16(pd_hdr->pkt_type_out);
+    if (pkt_flag & ZXDH_MBUF_F_RX_VLAN_STRIPPED) {
+        m->ol_flags |= (RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN);
+        m->vlan_tci = (unlikely(pkt_flag & ZXDH_MBUF_F_RX_QINQ))
+                ? (striped_vlan_tci >> 16) & ZXDH_VLAN_ID_MASK
+                : striped_vlan_tci & ZXDH_VLAN_ID_MASK;
+    }
+
+    if (unlikely(pkt_flag & ZXDH_MBUF_F_RX_QINQ_STRIPPED)) {
+        /*
+         * When PKT_RX_QINQ_STRIPPED is set and PKT_RX_VLAN_STRIPPED is unset:
+         * - Only the outer VLAN is removed from the packet data.
+         * - Both TCI values are saved: the inner TCI in mbuf->vlan_tci and
+         *   the outer TCI in mbuf->vlan_tci_outer.
+         *
+         * When PKT_RX_QINQ is set, PKT_RX_VLAN must also be set, and the inner
+         * TCI is saved in mbuf->vlan_tci.
+         */
+        m->ol_flags |= (RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ);
+        m->ol_flags |= (RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN);
+        m->vlan_tci = striped_vlan_tci & ZXDH_VLAN_ID_MASK;
+        m->vlan_tci_outer = (striped_vlan_tci >> 16) & ZXDH_VLAN_ID_MASK;
+    }
+
+    /* rss hash/fd handle */
+    if (pkt_flag & ZXDH_MBUF_F_RX_RSS_HASH) {
+        m->hash.rss = rte_be_to_cpu_32(pd_hdr->rss_hash);
+        m->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
+    }
+    if (pkt_flag & ZXDH_MBUF_F_RX_FDIR) {
+        m->ol_flags |= RTE_MBUF_F_RX_FDIR;
+        if (pkt_flag & ZXDH_MBUF_F_RX_FDIR_ID) {
+            m->hash.fdir.hi = rte_be_to_cpu_32(pd_hdr->fd);
+            m->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
+        } else if ((pkt_flag & ZXDH_MBUF_F_RX_FDIR_FLX_MASK) == FELX_4BYTE) {
+            m->hash.fdir.hi = rte_be_to_cpu_32(pd_hdr->fd);
+            m->ol_flags |= RTE_MBUF_F_RX_FDIR_FLX;
+        } else if (((pkt_flag & ZXDH_MBUF_F_RX_FDIR_FLX_MASK) == FELX_8BYTE)) {
+            m->hash.fdir.hi = rte_be_to_cpu_32(pd_hdr->rss_hash);
+            m->hash.fdir.lo = rte_be_to_cpu_32(pd_hdr->fd);
+            m->ol_flags |= RTE_MBUF_F_RX_FDIR_FLX;
+        }
+    }
+    /* checksum handle */
+    if (pkt_flag & ZXDH_MBUF_F_RX_OUTER_IP_CKSUM_BAD)
+        m->ol_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
+    if (pkt_flag & ZXDH_MBUF_F_RX_OUTER_L4_CKSUM_GOOD)
+        m->ol_flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD;
+
+    if (has_pi) {
+        struct zxdh_pi_hdr *pi_hdr = &hdr->pipd_hdr_ul.pi_hdr;
+        uint16_t pkt_type_masked = pi_hdr->pkt_type & ZXDH_PCODE_MASK;
+        uint16_t err_code = rte_be_to_cpu_16(pi_hdr->ul.err_code);
+
+        bool is_ip_pkt =
+                (pi_hdr->pkt_type == ZXDH_PCODE_IP_PKT_TYPE) ||
+                ((pi_hdr->pkt_type & ZXDH_PI_L3TYPE_MASK) == ZXDH_PI_L3TYPE_IP);
+
+        bool is_l4_pkt =
+                (pkt_type_masked == ZXDH_PCODE_UDP_PKT_TYPE) ||
+                (pkt_type_masked == ZXDH_PCODE_NO_REASSMBLE_TCP_PKT_TYPE) ||
+                (pkt_type_masked == ZXDH_PCODE_TCP_PKT_TYPE);
+
+        if (is_ip_pkt && (pi_hdr->pkt_flag_hi8 & ZXDH_RX_IP_CKSUM_VERIFY)) {
+            if (err_code & ZXDH_IPV4_CSUM_ERR)
+                m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
+            else
+                m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
+        }
+
+        if (is_l4_pkt && (pi_hdr->pkt_flag_hi8 & ZXDH_RX_TCPUDP_CKSUM_VERIFY)) {
+            if (err_code & (ZXDH_TCP_CSUM_ERR | ZXDH_UDP_CSUM_ERR))
+                m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
+            else
+                m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
+        }
+
+        if (ntohl(pi_hdr->ul.lro_flag) & ZXDH_PI_LRO_FALG)
+            m->ol_flags |= RTE_MBUF_F_RX_LRO;
+
+        m->pkt_len = rte_be_to_cpu_16(pi_hdr->ul.pkt_len);
+    } else {
+        m->pkt_len = rte_be_to_cpu_16(pd_hdr->pkt_len);
+    }
 
     idx = (pkt_type_outer >> 12) & 0xF;
     m->packet_type  = zxdh_outer_l2_type[idx];
@@ -605,8 +753,6 @@ static int32_t zxdh_rx_update_mbuf(struct rte_mbuf *m, struct zxdh_net_hdr_ul *h
     idx = pkt_type_outer         & 0xF;
     m->packet_type |= zxdh_tunnel_type[idx];
 
-    uint16_t pkt_type_inner = rte_be_to_cpu_16(pd_hdr->pkt_type_in);
-
     if (pkt_type_inner) {
         idx = (pkt_type_inner >> 12) & 0xF;
         m->packet_type |= zxdh_inner_l2_type[idx];
@@ -616,7 +762,6 @@ static int32_t zxdh_rx_update_mbuf(struct rte_mbuf *m, struct zxdh_net_hdr_ul *h
         m->packet_type |= zxdh_inner_l4_type[idx];
     }
 
-    return 0;
 }
 
 static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
@@ -633,92 +778,67 @@ static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
     }
 }
 
-uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
+uint16_t
+zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
                 uint16_t nb_pkts)
 {
     struct zxdh_virtnet_rx *rxvq = rx_queue;
     struct zxdh_virtqueue *vq = rxvq->vq;
     struct zxdh_hw *hw = vq->hw;
-    struct rte_eth_dev *dev = hw->eth_dev;
     struct rte_mbuf *rxm = NULL;
     struct rte_mbuf *prev = NULL;
-    uint32_t len[ZXDH_MBUF_BURST_SZ] = {0};
+    struct zxdh_net_hdr_ul *header = NULL;
+    uint32_t lens[ZXDH_MBUF_BURST_SZ] = {0};
     struct rte_mbuf *rcv_pkts[ZXDH_MBUF_BURST_SZ] = {NULL};
-    uint32_t nb_enqueued = 0;
+    uint16_t len = 0;
     uint32_t seg_num = 0;
     uint32_t seg_res = 0;
+    uint32_t error = 0;
     uint16_t hdr_size = 0;
-    int32_t error = 0;
     uint16_t nb_rx = 0;
+    uint16_t i;
+    uint16_t rcvd_pkt_len = 0;
     uint16_t num = nb_pkts;
 
     if (unlikely(num > ZXDH_MBUF_BURST_SZ))
         num = ZXDH_MBUF_BURST_SZ;
 
-    num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
-    uint16_t i;
-    uint16_t rcvd_pkt_len = 0;
+    num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, num);
+    if (num == 0) {
+        rxvq->stats.idle++;
+        goto refill;
+    }
 
     for (i = 0; i < num; i++) {
         rxm = rcv_pkts[i];
-        if (unlikely(len[i] < ZXDH_UL_NET_HDR_SIZE)) {
-            nb_enqueued++;
-            PMD_RX_LOG(ERR, "RX, len:%u err", len[i]);
-            zxdh_discard_rxbuf(vq, rxm);
-            rxvq->stats.errors++;
-            continue;
-        }
-        struct zxdh_net_hdr_ul *header =
-            (struct zxdh_net_hdr_ul *)((char *)rxm->buf_addr +
-            RTE_PKTMBUF_HEADROOM);
+        rx_pkts[nb_rx] = rxm;
+        prev = rxm;
+        len = lens[i];
+        header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
 
         seg_num  = header->type_hdr.num_buffers;
-        if (seg_num == 0) {
-            PMD_RX_LOG(ERR, "dequeue %d pkt, No.%d pkt seg_num is %d", num, i, seg_num);
-            seg_num = 1;
-        }
-        if (seg_num > ZXDH_RX_MAX_SEGS) {
-            PMD_RX_LOG(ERR, "dequeue %d pkt, No.%d pkt seg_num is %d", num, i, seg_num);
-            nb_enqueued++;
-            zxdh_discard_rxbuf(vq, rxm);
-            rxvq->stats.errors++;
-            continue;
-        }
-        /* bit[0:6]-pd_len unit:2B */
-        uint16_t pd_len = header->type_hdr.pd_len << 1;
-        if (pd_len > ZXDH_PD_HDR_SIZE_MAX || pd_len < ZXDH_PD_HDR_SIZE_MIN) {
-            PMD_RX_LOG(ERR, "pd_len:%d is invalid", pd_len);
-            nb_enqueued++;
-            zxdh_discard_rxbuf(vq, rxm);
-            rxvq->stats.errors++;
-            continue;
-        }
+
         /* Private queue only handle type hdr */
-        hdr_size = pd_len;
-        rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
+        hdr_size = ZXDH_TYPE_HDR_SIZE;
+        rxm->pkt_len = ((header->type_hdr.port & 0x7f) << 8) +
+                            header->type_hdr.pd_len;
+        rxm->data_off += hdr_size;
         rxm->nb_segs = seg_num;
         rxm->ol_flags = 0;
-        rxm->vlan_tci = 0;
-        rcvd_pkt_len = (uint32_t)(len[i] - hdr_size);
-        rxm->data_len = (uint16_t)(len[i] - hdr_size);
+        rcvd_pkt_len = len - hdr_size;
+        rxm->data_len = rcvd_pkt_len;
         rxm->port = rxvq->port_id;
-        rx_pkts[nb_rx] = rxm;
-        prev = rxm;
+
         /* Update rte_mbuf according to pi/pd header */
-        if (zxdh_rx_update_mbuf(rxm, header) < 0) {
-            zxdh_discard_rxbuf(vq, rxm);
-            rxvq->stats.errors++;
-            continue;
-        }
+        zxdh_rx_update_mbuf(rxm, header);
         seg_res = seg_num - 1;
         /* Merge remaining segments */
         while (seg_res != 0 && i < (num - 1)) {
             i++;
+            len = lens[i];
             rxm = rcv_pkts[i];
-            rxm->data_off = RTE_PKTMBUF_HEADROOM;
-            rxm->data_len = (uint16_t)(len[i]);
-
-            rcvd_pkt_len += (uint32_t)(len[i]);
+            rxm->data_len = len;
+            rcvd_pkt_len += len;
             prev->next = rxm;
             prev = rxm;
             rxm->next = NULL;
@@ -743,27 +863,26 @@ uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
         uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res, ZXDH_MBUF_BURST_SZ);
         uint16_t extra_idx = 0;
 
-        rcv_cnt = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, len, rcv_cnt);
+        rcv_cnt = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, rcv_cnt);
         if (unlikely(rcv_cnt == 0)) {
             PMD_RX_LOG(ERR, "No enough segments for packet");
             rte_pktmbuf_free(rx_pkts[nb_rx]);
             rxvq->stats.errors++;
+            rxvq->stats.no_segs_err++;
             break;
         }
         while (extra_idx < rcv_cnt) {
             rxm = rcv_pkts[extra_idx];
-            rxm->data_off = RTE_PKTMBUF_HEADROOM;
-            rxm->pkt_len = (uint32_t)(len[extra_idx]);
-            rxm->data_len = (uint16_t)(len[extra_idx]);
+            rcvd_pkt_len += (uint16_t)(lens[extra_idx]);
+            rxm->data_len = lens[extra_idx];
             prev->next = rxm;
             prev = rxm;
             rxm->next = NULL;
-            rcvd_pkt_len += len[extra_idx];
             extra_idx += 1;
         }
         seg_res -= rcv_cnt;
         if (!seg_res) {
-            if (rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len) {
+            if (unlikely(rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len)) {
                 PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
                     rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
                 zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
@@ -777,6 +896,7 @@ uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
     }
     rxvq->stats.packets += nb_rx;
 
+refill:
     /* Allocate new mbuf for the used descriptor */
     if (likely(!zxdh_queue_full(vq))) {
         /* free_cnt may include mrg descs */
@@ -789,16 +909,14 @@ uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
                 for (i = 0; i < free_cnt; i++)
                     rte_pktmbuf_free(new_pkts[i]);
             }
-            nb_enqueued += free_cnt;
+
+            if (unlikely(zxdh_queue_kick_prepare_packed(vq)))
+                zxdh_queue_notify(vq);
         } else {
+            struct rte_eth_dev *dev = hw->eth_dev;
+
             dev->data->rx_mbuf_alloc_failed += free_cnt;
         }
     }
-    if (likely(nb_enqueued)) {
-        if (unlikely(zxdh_queue_kick_prepare_packed(vq))) {
-            zxdh_queue_notify(vq);
-            PMD_RX_LOG(DEBUG, "Notified");
-        }
-    }
     return nb_rx;
 }
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index 79c2a882c3..16ea506517 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -10,13 +10,40 @@
 #include <rte_common.h>
 #include <rte_mbuf_core.h>
 
+#define ZXDH_PORT_NP     0
+#define ZXDH_PORT_DRS    1
+#define ZXDH_PORT_DTP    2
+
+/*PI PKT FLAG */
+#define ZXDH_PKT_FORM_CPU                           0x20
+#define ZXDH_NO_IP_FRAGMENT                         0x2000
+#define ZXDH_NO_IPID_UPDATE                         0x4000
+#define ZXDH_TX_IP_CKSUM_CAL                        0x8000
+#define ZXDH_RX_IP_CKSUM_VERIFY                     0x01
+#define ZXDH_RX_PSEDUO_CKSUM_VALID                  0x02
+#define ZXDH_TX_TCPUDP_CKSUM_CAL                    0x04
+#define ZXDH_RX_TCPUDP_CKSUM_VERIFY                 0x08
+#define ZXDH_NO_TCP_FRAGMENT                        0x10
+#define ZXDH_PI_FLAG                                0x20
+#define ZXDH_PI_TYPE                                0x40
+#define ZXDH_VERSION1                               0x80
+#define ZXDH_PI_TYPE_PI                             0x00
+#define ZXDH_PI_TYPE_VIRTIO95                       0x40
+#define ZXDH_PI_TYPE_VIRTIO11                       0xC0
+
 struct zxdh_virtnet_stats {
     uint64_t packets;
     uint64_t bytes;
     uint64_t errors;
+    uint64_t idle;
+    uint64_t full;
+    uint64_t norefill;
     uint64_t multicast;
     uint64_t broadcast;
     uint64_t truncated_err;
+    uint64_t offload_cfg_err;
+    uint64_t invalid_hdr_len_err;
+    uint64_t no_segs_err;
     uint64_t size_bins[8];
 };
 
-- 
2.27.0