DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks
@ 2019-08-29 15:02 Ciara Loftus
  2019-08-30  7:47 ` Loftus, Ciara
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Ciara Loftus @ 2019-08-29 15:02 UTC (permalink / raw)
  To: dev; +Cc: ciara.loftus, xiaolong.ye, bruce.richardson, Kevin Laatz

This patch enables the unaligned chunks feature for AF_XDP which allows
chunks to be placed at arbitrary places in the umem, as opposed to them
being required to be aligned to 2k. This allows for DPDK application
mempools to be mapped directly into the umem and in turn enable zero copy
transfer between umem and the PMD.

This patch replaces the zero copy via external mbuf mechanism introduced
in commit e9ff8bb71943 ("net/af_xdp: enable zero copy by external mbuf").
The pmd_zero copy vdev argument is also removed as now the PMD will
auto-detect presence of the unaligned chunks feature and enable it if so
and otherwise fall back to copy mode if not detected.

When enabled, this feature significantly improves single-core performance
of the PMD.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
---
 doc/guides/nics/af_xdp.rst             |   1 -
 doc/guides/rel_notes/release_19_11.rst |   9 +
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 304 ++++++++++++++++++-------
 3 files changed, 231 insertions(+), 83 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index ec46f08f0..48dd788ac 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -35,7 +35,6 @@ The following options can be provided to set up an af_xdp port in DPDK.
 *   ``iface`` - name of the Kernel interface to attach to (required);
 *   ``start_queue`` - starting netdev queue id (optional, default 0);
 *   ``queue_count`` - total netdev queue number (optional, default 1);
-*   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
 
 Prerequisites
 -------------
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index 8490d897c..28a8e5372 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -56,6 +56,13 @@ New Features
      Also, make sure to start the actual text at the margin.
      =========================================================
 
+* **Updated the AF_XDP PMD.**
+
+  Updated the AF_XDP PMD. The new features include:
+
+  * Enabled zero copy between application mempools and UMEM by enabling the
+    XDP_UMEM_UNALIGNED_CHUNKS UMEM flag.
+
 
 Removed Items
 -------------
@@ -69,6 +76,8 @@ Removed Items
    Also, make sure to start the actual text at the margin.
    =========================================================
 
+* Removed AF_XDP pmd_zero copy vdev argument. Support is now auto-detected.
+
 
 API Changes
 -----------
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 41ed5b2af..7956d5778 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -58,7 +58,13 @@ static int af_xdp_logtype;
 
 #define ETH_AF_XDP_FRAME_SIZE		2048
 #define ETH_AF_XDP_NUM_BUFFERS		4096
+#ifdef XDP_UMEM_UNALIGNED_CHUNK_FLAG
+#define ETH_AF_XDP_MBUF_OVERHEAD	128 /* sizeof(struct rte_mbuf) */
+#define ETH_AF_XDP_DATA_HEADROOM \
+	(ETH_AF_XDP_MBUF_OVERHEAD + RTE_PKTMBUF_HEADROOM)
+#else
 #define ETH_AF_XDP_DATA_HEADROOM	0
+#endif
 #define ETH_AF_XDP_DFLT_NUM_DESCS	XSK_RING_CONS__DEFAULT_NUM_DESCS
 #define ETH_AF_XDP_DFLT_START_QUEUE_IDX	0
 #define ETH_AF_XDP_DFLT_QUEUE_COUNT	1
@@ -73,7 +79,8 @@ struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_ring *buf_ring;
 	const struct rte_memzone *mz;
-	int pmd_zc;
+	struct rte_mempool *mb_pool;
+	void *buffer;
 };
 
 struct rx_stats {
@@ -98,10 +105,12 @@ struct pkt_rx_queue {
 struct tx_stats {
 	uint64_t tx_pkts;
 	uint64_t tx_bytes;
+	uint64_t tx_dropped;
 };
 
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
+	struct xsk_umem_info *umem;
 
 	struct tx_stats stats;
 
@@ -117,7 +126,6 @@ struct pmd_internals {
 	int max_queue_cnt;
 	int combined_queue_cnt;
 
-	int pmd_zc;
 	struct rte_ether_addr eth_addr;
 
 	struct pkt_rx_queue *rx_queues;
@@ -127,13 +135,11 @@ struct pmd_internals {
 #define ETH_AF_XDP_IFACE_ARG			"iface"
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
-#define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
 	ETH_AF_XDP_START_QUEUE_ARG,
 	ETH_AF_XDP_QUEUE_COUNT_ARG,
-	ETH_AF_XDP_PMD_ZC_ARG,
 	NULL
 };
 
@@ -148,9 +154,30 @@ static inline int
 reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
 {
 	struct xsk_ring_prod *fq = &umem->fq;
-	void *addrs[reserve_size];
 	uint32_t idx;
 	uint16_t i;
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+
+	if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) {
+		AF_XDP_LOG(DEBUG, "Failed to reserve enough fq descs.\n");
+		return -1;
+	}
+
+	for (i = 0; i < reserve_size; i++) {
+		struct rte_mbuf *mbuf;
+		__u64 *fq_addr;
+		uint64_t addr;
+
+		mbuf = rte_pktmbuf_alloc(umem->mb_pool);
+		if (unlikely(mbuf == NULL))
+			break;
+
+		fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
+		addr = (uint64_t)mbuf - (uint64_t)umem->buffer;
+		*fq_addr = addr;
+	}
+#else
+	void *addrs[reserve_size];
 
 	if (rte_ring_dequeue_bulk(umem->buf_ring, addrs, reserve_size, NULL)
 		    != reserve_size) {
@@ -171,21 +198,13 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
 		fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
 		*fq_addr = (uint64_t)addrs[i];
 	}
+#endif
 
 	xsk_ring_prod__submit(fq, reserve_size);
 
 	return 0;
 }
 
-static void
-umem_buf_release_to_fq(void *addr, void *opaque)
-{
-	struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque;
-	uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64;
-
-	rte_ring_enqueue(umem->buf_ring, (void *)umem_addr);
-}
-
 static uint16_t
 eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
@@ -194,12 +213,53 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &umem->fq;
 	uint32_t idx_rx = 0;
-	uint32_t free_thresh = fq->size >> 1;
-	int pmd_zc = umem->pmd_zc;
-	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
 	unsigned long dropped = 0;
 	unsigned long rx_bytes = 0;
 	int rcvd, i;
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+
+	rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx);
+	if (rcvd == 0) {
+#if defined(XDP_USE_NEED_WAKEUP)
+		if (xsk_ring_prod__needs_wakeup(fq))
+			(void)poll(rxq->fds, 1, 1000);
+#endif
+
+		return rcvd;
+	}
+
+	for (i = 0; i < rcvd; i++) {
+		const struct xdp_desc *desc;
+		uint64_t addr;
+		uint32_t len;
+		uint64_t offset;
+
+		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
+		addr = desc->addr;
+		len = desc->len;
+
+		offset = xsk_umem__extract_offset(addr);
+		addr = xsk_umem__extract_addr(addr);
+
+		bufs[i] = (struct rte_mbuf *)
+				xsk_umem__get_data(umem->buffer, addr);
+		bufs[i]->data_off = offset - sizeof(struct rte_mbuf);
+
+		rte_pktmbuf_pkt_len(bufs[i]) = len;
+		rte_pktmbuf_data_len(bufs[i]) = len;
+		rx_bytes += len;
+	}
+
+	xsk_ring_cons__release(rx, rcvd);
+
+	/* statistics */
+	rxq->stats.rx_pkts += (rcvd - dropped);
+	rxq->stats.rx_bytes += rx_bytes;
+
+	(void)reserve_fill_queue(umem, rcvd);
+#else
+	uint32_t free_thresh = fq->size >> 1;
+	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
 
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE);
 
@@ -224,25 +284,14 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		uint64_t addr;
 		uint32_t len;
 		void *pkt;
-		uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE;
-		struct rte_mbuf_ext_shared_info *shinfo;
 
 		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
 		addr = desc->addr;
 		len = desc->len;
 		pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
 
-		if (pmd_zc) {
-			shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt,
-					&buf_len, umem_buf_release_to_fq, umem);
-
-			rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len,
-						  shinfo);
-		} else {
-			rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *),
-							pkt, len);
-			rte_ring_enqueue(umem->buf_ring, (void *)addr);
-		}
+		rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
+		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
 		rx_bytes += len;
@@ -259,6 +308,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	if (rcvd != nb_pkts)
 		rte_mempool_put_bulk(rxq->mb_pool, (void **)&mbufs[rcvd],
 				     nb_pkts - rcvd);
+#endif
 
 	return rcvd;
 }
@@ -275,7 +325,14 @@ pull_umem_cq(struct xsk_umem_info *umem, int size)
 	for (i = 0; i < n; i++) {
 		uint64_t addr;
 		addr = *xsk_ring_cons__comp_addr(cq, idx_cq++);
+
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+		addr = xsk_umem__extract_addr(addr);
+		rte_pktmbuf_free((struct rte_mbuf *)
+					xsk_umem__get_data(umem->buffer, addr));
+#else
 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
+#endif
 	}
 
 	xsk_ring_cons__release(cq, n);
@@ -284,7 +341,7 @@ pull_umem_cq(struct xsk_umem_info *umem, int size)
 static void
 kick_tx(struct pkt_tx_queue *txq)
 {
-	struct xsk_umem_info *umem = txq->pair->umem;
+	struct xsk_umem_info *umem = txq->umem;
 
 #if defined(XDP_USE_NEED_WAKEUP)
 	if (xsk_ring_prod__needs_wakeup(&txq->tx))
@@ -299,28 +356,70 @@ kick_tx(struct pkt_tx_queue *txq)
 			if (errno == EAGAIN)
 				pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
 		}
-	pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
-}
-
-static inline bool
-in_umem_range(struct xsk_umem_info *umem, uint64_t addr)
-{
-	uint64_t mz_base_addr = umem->mz->addr_64;
-
-	return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len;
 }
 
 static uint16_t
 eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct pkt_tx_queue *txq = queue;
-	struct xsk_umem_info *umem = txq->pair->umem;
+	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
-	int pmd_zc = umem->pmd_zc;
-	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
 	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
+	uint16_t dropped = 0;
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+
+	pull_umem_cq(umem, nb_pkts);
+
+	nb_pkts = xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx);
+
+	for (i = 0; i < nb_pkts; i++) {
+		struct xdp_desc *desc;
+		uint64_t addr, offset;
+
+		desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
+		mbuf = bufs[i];
+		desc->len = mbuf->pkt_len;
+
+		if (mbuf->pool == umem->mb_pool) {
+			addr = (uint64_t)mbuf - (uint64_t)umem->buffer;
+			offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
+					(uint64_t)mbuf;
+			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+			desc->addr = addr;
+			desc->addr |= offset;
+		} else {
+			struct rte_mbuf *local_mbuf =
+					rte_pktmbuf_alloc(umem->mb_pool);
+			void *pkt;
+
+			if (!local_mbuf) {
+				rte_pktmbuf_free(mbuf);
+				dropped++;
+				continue;
+			}
+
+			addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer;
+			offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
+					(uint64_t)local_mbuf;
+			pkt = xsk_umem__get_data(umem->buffer, addr + offset);
+			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+			desc->addr = addr;
+			desc->addr |= offset;
+			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+					desc->len);
+			rte_pktmbuf_free(mbuf);
+		}
+
+		tx_bytes += mbuf->pkt_len;
+	}
+
+	xsk_ring_prod__submit(&txq->tx, nb_pkts - dropped);
+
+	kick_tx(txq);
+#else
+	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
 
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE);
 
@@ -333,6 +432,7 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	if (xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx) != nb_pkts) {
 		kick_tx(txq);
+		pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
 		rte_ring_enqueue_bulk(umem->buf_ring, addrs, nb_pkts, NULL);
 		return 0;
 	}
@@ -345,35 +445,23 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf = bufs[i];
 		desc->len = mbuf->pkt_len;
 
-		/*
-		 * We need to make sure the external mbuf address is within
-		 * current port's umem memzone range
-		 */
-		if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) &&
-				in_umem_range(umem, (uint64_t)mbuf->buf_addr)) {
-			desc->addr = (uint64_t)mbuf->buf_addr -
-				umem->mz->addr_64;
-			mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr,
-					(uint64_t)addrs[i]);
-		} else {
-			desc->addr = (uint64_t)addrs[i];
-			pkt = xsk_umem__get_data(umem->mz->addr,
-					desc->addr);
-			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-					desc->len);
-		}
+		desc->addr = (uint64_t)addrs[i];
+		pkt = xsk_umem__get_data(umem->mz->addr,
+					 desc->addr);
+		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
 		tx_bytes += mbuf->pkt_len;
+		rte_pktmbuf_free(mbuf);
 	}
 
 	xsk_ring_prod__submit(&txq->tx, nb_pkts);
 
 	kick_tx(txq);
+	pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
+#endif
 
-	txq->stats.tx_pkts += nb_pkts;
+	txq->stats.tx_pkts += nb_pkts - dropped;
 	txq->stats.tx_bytes += tx_bytes;
-
-	for (i = 0; i < nb_pkts; i++)
-		rte_pktmbuf_free(bufs[i]);
+	txq->stats.tx_dropped += dropped;
 
 	return nb_pkts;
 }
@@ -446,6 +534,7 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		stats->ipackets += stats->q_ipackets[i];
 		stats->ibytes += stats->q_ibytes[i];
 		stats->imissed += rxq->stats.rx_dropped;
+		stats->oerrors += txq->stats.tx_dropped;
 		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
 				XDP_STATISTICS, &xdp_stats, &optlen);
 		if (ret != 0) {
@@ -492,11 +581,16 @@ remove_xdp_program(struct pmd_internals *internals)
 static void
 xdp_umem_destroy(struct xsk_umem_info *umem)
 {
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+	rte_mempool_free(umem->mb_pool);
+	umem->mb_pool = NULL;
+#else
 	rte_memzone_free(umem->mz);
 	umem->mz = NULL;
 
 	rte_ring_free(umem->buf_ring);
 	umem->buf_ring = NULL;
+#endif
 
 	rte_free(umem);
 	umem = NULL;
@@ -546,6 +640,55 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused,
 	return 0;
 }
 
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+static inline uint64_t get_base_addr(struct rte_mempool *mp)
+{
+	struct rte_mempool_memhdr *memhdr;
+
+	memhdr = STAILQ_FIRST(&mp->mem_list);
+	return (uint64_t)memhdr->addr & ~(getpagesize() - 1);
+}
+
+static struct
+xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals __rte_unused,
+				  struct pkt_rx_queue *rxq)
+{
+	struct xsk_umem_info *umem;
+	int ret;
+	struct xsk_umem_config usr_config = {
+		.fill_size = ETH_AF_XDP_DFLT_NUM_DESCS,
+		.comp_size = ETH_AF_XDP_DFLT_NUM_DESCS,
+		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG};
+	void *base_addr = NULL;
+	struct rte_mempool *mb_pool = rxq->mb_pool;
+
+	usr_config.frame_size = rte_pktmbuf_data_room_size(mb_pool) +
+					ETH_AF_XDP_MBUF_OVERHEAD +
+					mb_pool->private_data_size;
+	usr_config.frame_headroom = ETH_AF_XDP_DATA_HEADROOM +
+					mb_pool->private_data_size;
+
+	umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id());
+	if (umem == NULL) {
+		AF_XDP_LOG(ERR, "Failed to allocate umem info");
+		return NULL;
+	}
+
+	umem->mb_pool = mb_pool;
+	base_addr = (void *)get_base_addr(mb_pool);
+
+	ret = xsk_umem__create(&umem->umem, base_addr,
+			       mb_pool->populated_size * usr_config.frame_size,
+			       &umem->fq, &umem->cq,
+			       &usr_config);
+
+	if (ret) {
+		AF_XDP_LOG(ERR, "Failed to create umem");
+		goto err;
+	}
+	umem->buffer = base_addr;
+
+#else
 static struct
 xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
 				  struct pkt_rx_queue *rxq)
@@ -606,6 +749,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
 	}
 	umem->mz = mz;
 
+#endif
 	return umem;
 
 err:
@@ -625,6 +769,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	rxq->umem = xdp_umem_configure(internals, rxq);
 	if (rxq->umem == NULL)
 		return -ENOMEM;
+	txq->umem = rxq->umem;
 
 	cfg.rx_size = ring_size;
 	cfg.tx_size = ring_size;
@@ -669,7 +814,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   struct rte_mempool *mb_pool)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
-	uint32_t buf_size, data_size;
 	struct pkt_rx_queue *rxq;
 	int ret;
 
@@ -677,6 +821,10 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 
 	AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n",
 		   rx_queue_id, rxq->xsk_queue_idx);
+
+#ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG
+	uint32_t buf_size, data_size;
+
 	/* Now get the space available for data in the mbuf */
 	buf_size = rte_pktmbuf_data_room_size(mb_pool) -
 		RTE_PKTMBUF_HEADROOM;
@@ -688,6 +836,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		ret = -ENOMEM;
 		goto err;
 	}
+#endif
 
 	rxq->mb_pool = mb_pool;
 
@@ -700,8 +849,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
 	rxq->fds[0].events = POLLIN;
 
-	rxq->umem->pmd_zc = internals->pmd_zc;
-
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -877,7 +1024,7 @@ xdp_get_channels_info(const char *if_name, int *max_queues,
 
 static int
 parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
-			int *queue_cnt, int *pmd_zc)
+			int *queue_cnt)
 {
 	int ret;
 
@@ -898,11 +1045,6 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
 		goto free_kvlist;
 	}
 
-	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG,
-				 &parse_integer_arg, pmd_zc);
-	if (ret < 0)
-		goto free_kvlist;
-
 free_kvlist:
 	rte_kvargs_free(kvlist);
 	return ret;
@@ -940,7 +1082,7 @@ get_iface_info(const char *if_name,
 
 static struct rte_eth_dev *
 init_internals(struct rte_vdev_device *dev, const char *if_name,
-			int start_queue_idx, int queue_cnt, int pmd_zc)
+			int start_queue_idx, int queue_cnt)
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
@@ -955,7 +1097,6 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 
 	internals->start_queue_idx = start_queue_idx;
 	internals->queue_cnt = queue_cnt;
-	internals->pmd_zc = pmd_zc;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
 
 	if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
@@ -1011,8 +1152,9 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	/* Let rte_eth_dev_close() release the port resources. */
 	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
 
-	if (internals->pmd_zc)
-		AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
+#endif
 
 	return eth_dev;
 
@@ -1034,7 +1176,6 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
-	int pmd_zc = 0;
 
 	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
 		rte_vdev_device_name(dev));
@@ -1062,7 +1203,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		dev->device.numa_node = rte_socket_id();
 
 	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
-			     &xsk_queue_cnt, &pmd_zc) < 0) {
+			     &xsk_queue_cnt) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -1073,7 +1214,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	}
 
 	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
-					xsk_queue_cnt, pmd_zc);
+					xsk_queue_cnt);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -1116,8 +1257,7 @@ RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
 RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
 			      "start_queue=<int> "
-			      "queue_count=<int> "
-			      "pmd_zero_copy=<0|1>");
+			      "queue_count=<int>");
 
 RTE_INIT(af_xdp_init_log)
 {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks
  2019-08-29 15:02 [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks Ciara Loftus
@ 2019-08-30  7:47 ` Loftus, Ciara
  2019-08-30 16:07 ` William Tu
  2019-09-03 22:02 ` Ye Xiaolong
  2 siblings, 0 replies; 8+ messages in thread
From: Loftus, Ciara @ 2019-08-30  7:47 UTC (permalink / raw)
  To: dev; +Cc: Ye, Xiaolong, Richardson, Bruce, Laatz, Kevin

> 
> This patch enables the unaligned chunks feature for AF_XDP which allows
> chunks to be placed at arbitrary places in the umem, as opposed to them
> being required to be aligned to 2k. This allows for DPDK application
> mempools to be mapped directly into the umem and in turn enable zero
> copy transfer between umem and the PMD.
> 
> This patch replaces the zero copy via external mbuf mechanism introduced in
> commit e9ff8bb71943 ("net/af_xdp: enable zero copy by external mbuf").
> The pmd_zero copy vdev argument is also removed as now the PMD will
> auto-detect presence of the unaligned chunks feature and enable it if so and
> otherwise fall back to copy mode if not detected.
> 
> When enabled, this feature significantly improves single-core performance
> of the PMD.
> 
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> ---

Apologies for omitting this detail from the original mail.
Those wishing to try out this feature need to first apply this series which is currently under review to their kernel tree:
https://lore.kernel.org/bpf/20190827022531.15060-1-kevin.laatz@intel.com/T/#u

Thanks,
Ciara

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks
  2019-08-29 15:02 [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks Ciara Loftus
  2019-08-30  7:47 ` Loftus, Ciara
@ 2019-08-30 16:07 ` William Tu
  2019-09-02  8:48   ` Loftus, Ciara
  2019-09-03 22:02 ` Ye Xiaolong
  2 siblings, 1 reply; 8+ messages in thread
From: William Tu @ 2019-08-30 16:07 UTC (permalink / raw)
  To: Ciara Loftus; +Cc: dev, Xiaolong Ye, bruce.richardson, Kevin Laatz

Hi Ciara,

I haven't tried this patch but have a question.

On Thu, Aug 29, 2019 at 8:04 AM Ciara Loftus <ciara.loftus@intel.com> wrote:
>
> This patch enables the unaligned chunks feature for AF_XDP which allows
> chunks to be placed at arbitrary places in the umem, as opposed to them
> being required to be aligned to 2k. This allows for DPDK application
> mempools to be mapped directly into the umem and in turn enable zero copy
> transfer between umem and the PMD.
>
> This patch replaces the zero copy via external mbuf mechanism introduced
> in commit e9ff8bb71943 ("net/af_xdp: enable zero copy by external mbuf").
> The pmd_zero copy vdev argument is also removed as now the PMD will
> auto-detect presence of the unaligned chunks feature and enable it if so
> and otherwise fall back to copy mode if not detected.
>
> When enabled, this feature significantly improves single-core performance
> of the PMD.

Why using unaligned chunk feature improve performance?
Existing external mbuf already has zero copy between umem and PMD, and your
patch also does the same thing. So the improvement is from somewhere else?

Thank you
William

>
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> ---
>  doc/guides/nics/af_xdp.rst             |   1 -
>  doc/guides/rel_notes/release_19_11.rst |   9 +
>  drivers/net/af_xdp/rte_eth_af_xdp.c    | 304 ++++++++++++++++++-------
>  3 files changed, 231 insertions(+), 83 deletions(-)
>
<snip>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks
  2019-08-30 16:07 ` William Tu
@ 2019-09-02  8:48   ` Loftus, Ciara
  2019-09-02  8:55     ` Loftus, Ciara
  0 siblings, 1 reply; 8+ messages in thread
From: Loftus, Ciara @ 2019-09-02  8:48 UTC (permalink / raw)
  To: William Tu; +Cc: dev, Ye, Xiaolong, Richardson, Bruce, Laatz, Kevin

> Hi Ciara,
> 
> I haven't tried this patch but have a question.
> 
> On Thu, Aug 29, 2019 at 8:04 AM Ciara Loftus <ciara.loftus@intel.com> wrote:
> >
> > This patch enables the unaligned chunks feature for AF_XDP which
> > allows chunks to be placed at arbitrary places in the umem, as opposed
> > to them being required to be aligned to 2k. This allows for DPDK
> > application mempools to be mapped directly into the umem and in turn
> > enable zero copy transfer between umem and the PMD.
> >
> > This patch replaces the zero copy via external mbuf mechanism
> > introduced in commit e9ff8bb71943 ("net/af_xdp: enable zero copy by
> external mbuf").
> > The pmd_zero copy vdev argument is also removed as now the PMD will
> > auto-detect presence of the unaligned chunks feature and enable it if
> > so and otherwise fall back to copy mode if not detected.
> >
> > When enabled, this feature significantly improves single-core
> > performance of the PMD.
> 
> Why using unaligned chunk feature improve performance?
> Existing external mbuf already has zero copy between umem and PMD, and
> your patch also does the same thing. So the improvement is from
> somewhere else?

Hi William,

Good question.
The external mbuf way indeed has zero copy however there's some additional complexity in that path in the management of the buf_ring.

For example on the fill/rx path, in the ext mbuf solution one must dequeue an addr from the buf_ring and add it to the fill queue, allocate an mbuf for the external mbuf, get a pointer to the data @ addr and attach the external mbuf. With the new solution, we allocate an mbuf from the mempool, derive the addr from the mbuf itself and add it to the fill queue, and then on rx we can simply cast the pointer to the data @ addr to an mbuf and return it to the user.
On tx/complete, instead of dequeuing from the buf_ring to get a valid addr we can again just derive it from the mbuf itself.

I've performed some testing to compare the old vs new zc and found that for the case where the PMD and IRQs are pinned to separate cores the difference is ~-5%, but for single-core case where the PMD and IRQs are pinned to the same core (with the need_wakeup feature enabled), or when multiple PMDs are forwarding to one another the difference is significant. Please see below:

ports      queues/port pinning    Δ old zc
1          1           0          -4.74%
1          1           1          17.99%
2          1           0          -5.62%
2          1           1          71.77%
1          2           0          114.24%
1          2           1          134.88%

FYI the series has been now merged into the bpf-next tree:
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=bdb15a29cc28f8155e20f7fb58b60ffc452f2d1b

Thanks,
Ciara

> 
> Thank you
> William
> 
> >
> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> > ---
> >  doc/guides/nics/af_xdp.rst             |   1 -
> >  doc/guides/rel_notes/release_19_11.rst |   9 +
> >  drivers/net/af_xdp/rte_eth_af_xdp.c    | 304 ++++++++++++++++++------
> -
> >  3 files changed, 231 insertions(+), 83 deletions(-)
> >
> <snip>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks
  2019-09-02  8:48   ` Loftus, Ciara
@ 2019-09-02  8:55     ` Loftus, Ciara
  2019-09-02 14:44       ` William Tu
  0 siblings, 1 reply; 8+ messages in thread
From: Loftus, Ciara @ 2019-09-02  8:55 UTC (permalink / raw)
  To: Loftus, Ciara, William Tu
  Cc: dev, Ye, Xiaolong, Richardson, Bruce, Laatz, Kevin

> > Hi Ciara,
> >
> > I haven't tried this patch but have a question.
> >
> > On Thu, Aug 29, 2019 at 8:04 AM Ciara Loftus <ciara.loftus@intel.com>
> wrote:
> > >
> > > This patch enables the unaligned chunks feature for AF_XDP which
> > > allows chunks to be placed at arbitrary places in the umem, as
> > > opposed to them being required to be aligned to 2k. This allows for
> > > DPDK application mempools to be mapped directly into the umem and in
> > > turn enable zero copy transfer between umem and the PMD.
> > >
> > > This patch replaces the zero copy via external mbuf mechanism
> > > introduced in commit e9ff8bb71943 ("net/af_xdp: enable zero copy by
> > external mbuf").
> > > The pmd_zero copy vdev argument is also removed as now the PMD will
> > > auto-detect presence of the unaligned chunks feature and enable it
> > > if so and otherwise fall back to copy mode if not detected.
> > >
> > > When enabled, this feature significantly improves single-core
> > > performance of the PMD.
> >
> > Why using unaligned chunk feature improve performance?
> > Existing external mbuf already has zero copy between umem and PMD,
> and
> > your patch also does the same thing. So the improvement is from
> > somewhere else?
> 
> Hi William,
> 
> Good question.
> The external mbuf way indeed has zero copy however there's some
> additional complexity in that path in the management of the buf_ring.
> 
> For example on the fill/rx path, in the ext mbuf solution one must dequeue
> an addr from the buf_ring and add it to the fill queue, allocate an mbuf for
> the external mbuf, get a pointer to the data @ addr and attach the external
> mbuf. With the new solution, we allocate an mbuf from the mempool, derive
> the addr from the mbuf itself and add it to the fill queue, and then on rx we
> can simply cast the pointer to the data @ addr to an mbuf and return it to the
> user.
> On tx/complete, instead of dequeuing from the buf_ring to get a valid addr
> we can again just derive it from the mbuf itself.
> 
> I've performed some testing to compare the old vs new zc and found that for
> the case where the PMD and IRQs are pinned to separate cores the
> difference is ~-5%, but for single-core case where the PMD and IRQs are
> pinned to the same core (with the need_wakeup feature enabled), or when
> multiple PMDs are forwarding to one another the difference is significant.
> Please see below:
> 
> ports      queues/port pinning    Δ old zc
> 1          1           0          -4.74%
> 1          1           1          17.99%
> 2          1           0          -5.62%
> 2          1           1          71.77%
> 1          2           0          114.24%
> 1          2           1          134.88%

Apologies, the last 4 figures above were comparing old memcpy vs zc. Corrected data set below:

ports      qs/port     pinning    Δ old zc
1          1           0          -4.74%
1          1           1          17.99%
2          1           0          -5.80%
2          1           1          37.24%
1          2           0          104.27%
1          2           1          136.73%

> 
> FYI the series has been now merged into the bpf-next tree:
> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-
> next.git/commit/?id=bdb15a29cc28f8155e20f7fb58b60ffc452f2d1b
> 
> Thanks,
> Ciara
> 
> >
> > Thank you
> > William
> >
> > >
> > > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > > Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> > > ---
> > >  doc/guides/nics/af_xdp.rst             |   1 -
> > >  doc/guides/rel_notes/release_19_11.rst |   9 +
> > >  drivers/net/af_xdp/rte_eth_af_xdp.c    | 304 ++++++++++++++++++----
> --
> > -
> > >  3 files changed, 231 insertions(+), 83 deletions(-)
> > >
> > <snip>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks
  2019-09-02  8:55     ` Loftus, Ciara
@ 2019-09-02 14:44       ` William Tu
  0 siblings, 0 replies; 8+ messages in thread
From: William Tu @ 2019-09-02 14:44 UTC (permalink / raw)
  To: Loftus, Ciara; +Cc: dev, Ye, Xiaolong, Richardson, Bruce, Laatz, Kevin

On Mon, Sep 2, 2019 at 1:55 AM Loftus, Ciara <ciara.loftus@intel.com> wrote:
>
> > > Hi Ciara,
> > >
> > > I haven't tried this patch but have a question.
> > >
> > > On Thu, Aug 29, 2019 at 8:04 AM Ciara Loftus <ciara.loftus@intel.com>
> > wrote:
> > > >
> > > > This patch enables the unaligned chunks feature for AF_XDP which
> > > > allows chunks to be placed at arbitrary places in the umem, as
> > > > opposed to them being required to be aligned to 2k. This allows for
> > > > DPDK application mempools to be mapped directly into the umem and in
> > > > turn enable zero copy transfer between umem and the PMD.
> > > >
> > > > This patch replaces the zero copy via external mbuf mechanism
> > > > introduced in commit e9ff8bb71943 ("net/af_xdp: enable zero copy by
> > > external mbuf").
> > > > The pmd_zero copy vdev argument is also removed as now the PMD will
> > > > auto-detect presence of the unaligned chunks feature and enable it
> > > > if so and otherwise fall back to copy mode if not detected.
> > > >
> > > > When enabled, this feature significantly improves single-core
> > > > performance of the PMD.
> > >
> > > Why using unaligned chunk feature improve performance?
> > > Existing external mbuf already has zero copy between umem and PMD,
> > and
> > > your patch also does the same thing. So the improvement is from
> > > somewhere else?
> >
> > Hi William,
> >
> > Good question.
> > The external mbuf way indeed has zero copy however there's some
> > additional complexity in that path in the management of the buf_ring.
> >
> > For example on the fill/rx path, in the ext mbuf solution one must dequeue
> > an addr from the buf_ring and add it to the fill queue, allocate an mbuf for
> > the external mbuf, get a pointer to the data @ addr and attach the external
> > mbuf. With the new solution, we allocate an mbuf from the mempool, derive
> > the addr from the mbuf itself and add it to the fill queue, and then on rx we
> > can simply cast the pointer to the data @ addr to an mbuf and return it to the
> > user.
> > On tx/complete, instead of dequeuing from the buf_ring to get a valid addr
> > we can again just derive it from the mbuf itself.
> >
> > I've performed some testing to compare the old vs new zc and found that for
> > the case where the PMD and IRQs are pinned to separate cores the
> > difference is ~-5%, but for single-core case where the PMD and IRQs are
> > pinned to the same core (with the need_wakeup feature enabled), or when
> > multiple PMDs are forwarding to one another the difference is significant.
> > Please see below:
> >
> > ports      queues/port pinning    Δ old zc
> > 1          1           0          -4.74%
> > 1          1           1          17.99%
> > 2          1           0          -5.62%
> > 2          1           1          71.77%
> > 1          2           0          114.24%
> > 1          2           1          134.88%
>
> Apologies, the last 4 figures above were comparing old memcpy vs zc. Corrected data set below:
>
> ports      qs/port     pinning    Δ old zc
> 1          1           0          -4.74%
> 1          1           1          17.99%
> 2          1           0          -5.80%
> 2          1           1          37.24%
> 1          2           0          104.27%
> 1          2           1          136.73%
>
> >
> > FYI the series has been now merged into the bpf-next tree:
> > https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-
> > next.git/commit/?id=bdb15a29cc28f8155e20f7fb58b60ffc452f2d1b
> >

Great, thanks for sharing the number and explanation.
William

> > Thanks,
> > Ciara
> >
> > >
> > > Thank you
> > > William
> > >
> > > >
> > > > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > > > Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> > > > ---
> > > >  doc/guides/nics/af_xdp.rst             |   1 -
> > > >  doc/guides/rel_notes/release_19_11.rst |   9 +
> > > >  drivers/net/af_xdp/rte_eth_af_xdp.c    | 304 ++++++++++++++++++----
> > --
> > > -
> > > >  3 files changed, 231 insertions(+), 83 deletions(-)
> > > >
> > > <snip>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks
  2019-08-29 15:02 [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks Ciara Loftus
  2019-08-30  7:47 ` Loftus, Ciara
  2019-08-30 16:07 ` William Tu
@ 2019-09-03 22:02 ` Ye Xiaolong
  2019-09-04 10:30   ` Loftus, Ciara
  2 siblings, 1 reply; 8+ messages in thread
From: Ye Xiaolong @ 2019-09-03 22:02 UTC (permalink / raw)
  To: Ciara Loftus; +Cc: dev, bruce.richardson, Kevin Laatz

Hi, Ciara

Thanks for the patch, the performance number is quite impressive.

On 08/29, Ciara Loftus wrote:
>This patch enables the unaligned chunks feature for AF_XDP which allows
>chunks to be placed at arbitrary places in the umem, as opposed to them
>being required to be aligned to 2k. This allows for DPDK application
>mempools to be mapped directly into the umem and in turn enable zero copy
>transfer between umem and the PMD.
>
>This patch replaces the zero copy via external mbuf mechanism introduced
>in commit e9ff8bb71943 ("net/af_xdp: enable zero copy by external mbuf").
>The pmd_zero copy vdev argument is also removed as now the PMD will
>auto-detect presence of the unaligned chunks feature and enable it if so
>and otherwise fall back to copy mode if not detected.
>
>When enabled, this feature significantly improves single-core performance
>of the PMD.
>
>Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
>---
> doc/guides/nics/af_xdp.rst             |   1 -
> doc/guides/rel_notes/release_19_11.rst |   9 +
> drivers/net/af_xdp/rte_eth_af_xdp.c    | 304 ++++++++++++++++++-------
> 3 files changed, 231 insertions(+), 83 deletions(-)
>
>diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
>index ec46f08f0..48dd788ac 100644
>--- a/doc/guides/nics/af_xdp.rst
>+++ b/doc/guides/nics/af_xdp.rst
>@@ -35,7 +35,6 @@ The following options can be provided to set up an af_xdp port in DPDK.
> *   ``iface`` - name of the Kernel interface to attach to (required);
> *   ``start_queue`` - starting netdev queue id (optional, default 0);
> *   ``queue_count`` - total netdev queue number (optional, default 1);
>-*   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
> 
> Prerequisites
> -------------
>diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
>index 8490d897c..28a8e5372 100644
>--- a/doc/guides/rel_notes/release_19_11.rst
>+++ b/doc/guides/rel_notes/release_19_11.rst
>@@ -56,6 +56,13 @@ New Features
>      Also, make sure to start the actual text at the margin.
>      =========================================================
> 
>+* **Updated the AF_XDP PMD.**
>+
>+  Updated the AF_XDP PMD. The new features include:
>+
>+  * Enabled zero copy between application mempools and UMEM by enabling the
>+    XDP_UMEM_UNALIGNED_CHUNKS UMEM flag.
>+

Better to document the kernel dependency in the af_xdp.rst.

> 
> Removed Items
> -------------
>@@ -69,6 +76,8 @@ Removed Items
>    Also, make sure to start the actual text at the margin.
>    =========================================================
> 
>+* Removed AF_XDP pmd_zero copy vdev argument. Support is now auto-detected.
>+
> 
> API Changes
> -----------
>diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
>index 41ed5b2af..7956d5778 100644
>--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
>+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
[snip]
> reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
> {
> 	struct xsk_ring_prod *fq = &umem->fq;
>-	void *addrs[reserve_size];
> 	uint32_t idx;
> 	uint16_t i;
>+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>+
>+	if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) {
>+		AF_XDP_LOG(DEBUG, "Failed to reserve enough fq descs.\n");
>+		return -1;
>+	}
>+
>+	for (i = 0; i < reserve_size; i++) {
>+		struct rte_mbuf *mbuf;
>+		__u64 *fq_addr;
>+		uint64_t addr;
>+
>+		mbuf = rte_pktmbuf_alloc(umem->mb_pool);
>+		if (unlikely(mbuf == NULL))
>+			break;

If this rare case happens, not all of the reserved slots of fq will be filled
with proper mbuf addr, then we just call xsk_ring_prod__submit(fq, reserve_size)
to let kernel receive packets on these addrs, something unexpected may happen.

Thanks,
Xiaolong

>+
>+		fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
>+		addr = (uint64_t)mbuf - (uint64_t)umem->buffer;
>+		*fq_addr = addr;
>+	}
>+#else
>+	void *addrs[reserve_size];
> 
> 	if (rte_ring_dequeue_bulk(umem->buf_ring, addrs, reserve_size, NULL)
> 		    != reserve_size) {
>@@ -171,21 +198,13 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
> 		fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
> 		*fq_addr = (uint64_t)addrs[i];
> 	}
>+#endif
> 
> 	xsk_ring_prod__submit(fq, reserve_size);
> 
> 	return 0;
> }
> 
>-static void
>-umem_buf_release_to_fq(void *addr, void *opaque)
>-{
>-	struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque;
>-	uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64;
>-
>-	rte_ring_enqueue(umem->buf_ring, (void *)umem_addr);
>-}
>-
> static uint16_t
> eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> {
>@@ -194,12 +213,53 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> 	struct xsk_umem_info *umem = rxq->umem;
> 	struct xsk_ring_prod *fq = &umem->fq;
> 	uint32_t idx_rx = 0;
>-	uint32_t free_thresh = fq->size >> 1;
>-	int pmd_zc = umem->pmd_zc;
>-	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
> 	unsigned long dropped = 0;
> 	unsigned long rx_bytes = 0;
> 	int rcvd, i;
>+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>+
>+	rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx);
>+	if (rcvd == 0) {
>+#if defined(XDP_USE_NEED_WAKEUP)
>+		if (xsk_ring_prod__needs_wakeup(fq))
>+			(void)poll(rxq->fds, 1, 1000);
>+#endif
>+
>+		return rcvd;
>+	}
>+
>+	for (i = 0; i < rcvd; i++) {
>+		const struct xdp_desc *desc;
>+		uint64_t addr;
>+		uint32_t len;
>+		uint64_t offset;
>+
>+		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
>+		addr = desc->addr;
>+		len = desc->len;
>+
>+		offset = xsk_umem__extract_offset(addr);
>+		addr = xsk_umem__extract_addr(addr);
>+
>+		bufs[i] = (struct rte_mbuf *)
>+				xsk_umem__get_data(umem->buffer, addr);
>+		bufs[i]->data_off = offset - sizeof(struct rte_mbuf);
>+
>+		rte_pktmbuf_pkt_len(bufs[i]) = len;
>+		rte_pktmbuf_data_len(bufs[i]) = len;
>+		rx_bytes += len;
>+	}
>+
>+	xsk_ring_cons__release(rx, rcvd);
>+
>+	/* statistics */
>+	rxq->stats.rx_pkts += (rcvd - dropped);
>+	rxq->stats.rx_bytes += rx_bytes;
>+
>+	(void)reserve_fill_queue(umem, rcvd);
>+#else
>+	uint32_t free_thresh = fq->size >> 1;
>+	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
> 
> 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE);
> 
>@@ -224,25 +284,14 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> 		uint64_t addr;
> 		uint32_t len;
> 		void *pkt;
>-		uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE;
>-		struct rte_mbuf_ext_shared_info *shinfo;
> 
> 		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
> 		addr = desc->addr;
> 		len = desc->len;
> 		pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
> 
>-		if (pmd_zc) {
>-			shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt,
>-					&buf_len, umem_buf_release_to_fq, umem);
>-
>-			rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len,
>-						  shinfo);
>-		} else {
>-			rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *),
>-							pkt, len);
>-			rte_ring_enqueue(umem->buf_ring, (void *)addr);
>-		}
>+		rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
>+		rte_ring_enqueue(umem->buf_ring, (void *)addr);
> 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
> 		rte_pktmbuf_data_len(mbufs[i]) = len;
> 		rx_bytes += len;
>@@ -259,6 +308,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> 	if (rcvd != nb_pkts)
> 		rte_mempool_put_bulk(rxq->mb_pool, (void **)&mbufs[rcvd],
> 				     nb_pkts - rcvd);
>+#endif
> 
> 	return rcvd;
> }
>@@ -275,7 +325,14 @@ pull_umem_cq(struct xsk_umem_info *umem, int size)
> 	for (i = 0; i < n; i++) {
> 		uint64_t addr;
> 		addr = *xsk_ring_cons__comp_addr(cq, idx_cq++);
>+
>+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>+		addr = xsk_umem__extract_addr(addr);
>+		rte_pktmbuf_free((struct rte_mbuf *)
>+					xsk_umem__get_data(umem->buffer, addr));
>+#else
> 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
>+#endif
> 	}
> 
> 	xsk_ring_cons__release(cq, n);
>@@ -284,7 +341,7 @@ pull_umem_cq(struct xsk_umem_info *umem, int size)
> static void
> kick_tx(struct pkt_tx_queue *txq)
> {
>-	struct xsk_umem_info *umem = txq->pair->umem;
>+	struct xsk_umem_info *umem = txq->umem;
> 
> #if defined(XDP_USE_NEED_WAKEUP)
> 	if (xsk_ring_prod__needs_wakeup(&txq->tx))
>@@ -299,28 +356,70 @@ kick_tx(struct pkt_tx_queue *txq)
> 			if (errno == EAGAIN)
> 				pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
> 		}
>-	pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
>-}
>-
>-static inline bool
>-in_umem_range(struct xsk_umem_info *umem, uint64_t addr)
>-{
>-	uint64_t mz_base_addr = umem->mz->addr_64;
>-
>-	return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len;
> }
> 
> static uint16_t
> eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> {
> 	struct pkt_tx_queue *txq = queue;
>-	struct xsk_umem_info *umem = txq->pair->umem;
>+	struct xsk_umem_info *umem = txq->umem;
> 	struct rte_mbuf *mbuf;
>-	int pmd_zc = umem->pmd_zc;
>-	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
> 	unsigned long tx_bytes = 0;
> 	int i;
> 	uint32_t idx_tx;
>+	uint16_t dropped = 0;
>+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>+
>+	pull_umem_cq(umem, nb_pkts);
>+
>+	nb_pkts = xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx);
>+
>+	for (i = 0; i < nb_pkts; i++) {
>+		struct xdp_desc *desc;
>+		uint64_t addr, offset;
>+
>+		desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
>+		mbuf = bufs[i];
>+		desc->len = mbuf->pkt_len;
>+
>+		if (mbuf->pool == umem->mb_pool) {
>+			addr = (uint64_t)mbuf - (uint64_t)umem->buffer;
>+			offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
>+					(uint64_t)mbuf;
>+			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
>+			desc->addr = addr;
>+			desc->addr |= offset;
>+		} else {
>+			struct rte_mbuf *local_mbuf =
>+					rte_pktmbuf_alloc(umem->mb_pool);
>+			void *pkt;
>+
>+			if (!local_mbuf) {
>+				rte_pktmbuf_free(mbuf);
>+				dropped++;
>+				continue;
>+			}
>+
>+			addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer;
>+			offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
>+					(uint64_t)local_mbuf;
>+			pkt = xsk_umem__get_data(umem->buffer, addr + offset);
>+			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
>+			desc->addr = addr;
>+			desc->addr |= offset;
>+			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
>+					desc->len);
>+			rte_pktmbuf_free(mbuf);
>+		}
>+
>+		tx_bytes += mbuf->pkt_len;
>+	}
>+
>+	xsk_ring_prod__submit(&txq->tx, nb_pkts - dropped);
>+
>+	kick_tx(txq);
>+#else
>+	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
> 
> 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE);
> 
>@@ -333,6 +432,7 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> 
> 	if (xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx) != nb_pkts) {
> 		kick_tx(txq);
>+		pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
> 		rte_ring_enqueue_bulk(umem->buf_ring, addrs, nb_pkts, NULL);
> 		return 0;
> 	}
>@@ -345,35 +445,23 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> 		mbuf = bufs[i];
> 		desc->len = mbuf->pkt_len;
> 
>-		/*
>-		 * We need to make sure the external mbuf address is within
>-		 * current port's umem memzone range
>-		 */
>-		if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) &&
>-				in_umem_range(umem, (uint64_t)mbuf->buf_addr)) {
>-			desc->addr = (uint64_t)mbuf->buf_addr -
>-				umem->mz->addr_64;
>-			mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr,
>-					(uint64_t)addrs[i]);
>-		} else {
>-			desc->addr = (uint64_t)addrs[i];
>-			pkt = xsk_umem__get_data(umem->mz->addr,
>-					desc->addr);
>-			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
>-					desc->len);
>-		}
>+		desc->addr = (uint64_t)addrs[i];
>+		pkt = xsk_umem__get_data(umem->mz->addr,
>+					 desc->addr);
>+		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
> 		tx_bytes += mbuf->pkt_len;
>+		rte_pktmbuf_free(mbuf);
> 	}
> 
> 	xsk_ring_prod__submit(&txq->tx, nb_pkts);
> 
> 	kick_tx(txq);
>+	pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
>+#endif
> 
>-	txq->stats.tx_pkts += nb_pkts;
>+	txq->stats.tx_pkts += nb_pkts - dropped;
> 	txq->stats.tx_bytes += tx_bytes;
>-
>-	for (i = 0; i < nb_pkts; i++)
>-		rte_pktmbuf_free(bufs[i]);
>+	txq->stats.tx_dropped += dropped;
> 
> 	return nb_pkts;
> }
>@@ -446,6 +534,7 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> 		stats->ipackets += stats->q_ipackets[i];
> 		stats->ibytes += stats->q_ibytes[i];
> 		stats->imissed += rxq->stats.rx_dropped;
>+		stats->oerrors += txq->stats.tx_dropped;
> 		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
> 				XDP_STATISTICS, &xdp_stats, &optlen);
> 		if (ret != 0) {
>@@ -492,11 +581,16 @@ remove_xdp_program(struct pmd_internals *internals)
> static void
> xdp_umem_destroy(struct xsk_umem_info *umem)
> {
>+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>+	rte_mempool_free(umem->mb_pool);
>+	umem->mb_pool = NULL;
>+#else
> 	rte_memzone_free(umem->mz);
> 	umem->mz = NULL;
> 
> 	rte_ring_free(umem->buf_ring);
> 	umem->buf_ring = NULL;
>+#endif
> 
> 	rte_free(umem);
> 	umem = NULL;
>@@ -546,6 +640,55 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused,
> 	return 0;
> }
> 
>+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>+static inline uint64_t get_base_addr(struct rte_mempool *mp)
>+{
>+	struct rte_mempool_memhdr *memhdr;
>+
>+	memhdr = STAILQ_FIRST(&mp->mem_list);
>+	return (uint64_t)memhdr->addr & ~(getpagesize() - 1);
>+}
>+
>+static struct
>+xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals __rte_unused,
>+				  struct pkt_rx_queue *rxq)
>+{
>+	struct xsk_umem_info *umem;
>+	int ret;
>+	struct xsk_umem_config usr_config = {
>+		.fill_size = ETH_AF_XDP_DFLT_NUM_DESCS,
>+		.comp_size = ETH_AF_XDP_DFLT_NUM_DESCS,
>+		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG};
>+	void *base_addr = NULL;
>+	struct rte_mempool *mb_pool = rxq->mb_pool;
>+
>+	usr_config.frame_size = rte_pktmbuf_data_room_size(mb_pool) +
>+					ETH_AF_XDP_MBUF_OVERHEAD +
>+					mb_pool->private_data_size;
>+	usr_config.frame_headroom = ETH_AF_XDP_DATA_HEADROOM +
>+					mb_pool->private_data_size;
>+
>+	umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id());
>+	if (umem == NULL) {
>+		AF_XDP_LOG(ERR, "Failed to allocate umem info");
>+		return NULL;
>+	}
>+
>+	umem->mb_pool = mb_pool;
>+	base_addr = (void *)get_base_addr(mb_pool);
>+
>+	ret = xsk_umem__create(&umem->umem, base_addr,
>+			       mb_pool->populated_size * usr_config.frame_size,
>+			       &umem->fq, &umem->cq,
>+			       &usr_config);
>+
>+	if (ret) {
>+		AF_XDP_LOG(ERR, "Failed to create umem");
>+		goto err;
>+	}
>+	umem->buffer = base_addr;
>+
>+#else
> static struct
> xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
> 				  struct pkt_rx_queue *rxq)
>@@ -606,6 +749,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
> 	}
> 	umem->mz = mz;
> 
>+#endif
> 	return umem;
> 
> err:
>@@ -625,6 +769,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
> 	rxq->umem = xdp_umem_configure(internals, rxq);
> 	if (rxq->umem == NULL)
> 		return -ENOMEM;
>+	txq->umem = rxq->umem;
> 
> 	cfg.rx_size = ring_size;
> 	cfg.tx_size = ring_size;
>@@ -669,7 +814,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
> 		   struct rte_mempool *mb_pool)
> {
> 	struct pmd_internals *internals = dev->data->dev_private;
>-	uint32_t buf_size, data_size;
> 	struct pkt_rx_queue *rxq;
> 	int ret;
> 
>@@ -677,6 +821,10 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
> 
> 	AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n",
> 		   rx_queue_id, rxq->xsk_queue_idx);
>+
>+#ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG
>+	uint32_t buf_size, data_size;
>+
> 	/* Now get the space available for data in the mbuf */
> 	buf_size = rte_pktmbuf_data_room_size(mb_pool) -
> 		RTE_PKTMBUF_HEADROOM;
>@@ -688,6 +836,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
> 		ret = -ENOMEM;
> 		goto err;
> 	}
>+#endif
> 
> 	rxq->mb_pool = mb_pool;
> 
>@@ -700,8 +849,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
> 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
> 	rxq->fds[0].events = POLLIN;
> 
>-	rxq->umem->pmd_zc = internals->pmd_zc;
>-
> 	dev->data->rx_queues[rx_queue_id] = rxq;
> 	return 0;
> 
>@@ -877,7 +1024,7 @@ xdp_get_channels_info(const char *if_name, int *max_queues,
> 
> static int
> parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
>-			int *queue_cnt, int *pmd_zc)
>+			int *queue_cnt)
> {
> 	int ret;
> 
>@@ -898,11 +1045,6 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
> 		goto free_kvlist;
> 	}
> 
>-	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG,
>-				 &parse_integer_arg, pmd_zc);
>-	if (ret < 0)
>-		goto free_kvlist;
>-
> free_kvlist:
> 	rte_kvargs_free(kvlist);
> 	return ret;
>@@ -940,7 +1082,7 @@ get_iface_info(const char *if_name,
> 
> static struct rte_eth_dev *
> init_internals(struct rte_vdev_device *dev, const char *if_name,
>-			int start_queue_idx, int queue_cnt, int pmd_zc)
>+			int start_queue_idx, int queue_cnt)
> {
> 	const char *name = rte_vdev_device_name(dev);
> 	const unsigned int numa_node = dev->device.numa_node;
>@@ -955,7 +1097,6 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
> 
> 	internals->start_queue_idx = start_queue_idx;
> 	internals->queue_cnt = queue_cnt;
>-	internals->pmd_zc = pmd_zc;
> 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
> 
> 	if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
>@@ -1011,8 +1152,9 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
> 	/* Let rte_eth_dev_close() release the port resources. */
> 	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
> 
>-	if (internals->pmd_zc)
>-		AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
>+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>+	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
>+#endif
> 
> 	return eth_dev;
> 
>@@ -1034,7 +1176,6 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> 	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
> 	struct rte_eth_dev *eth_dev = NULL;
> 	const char *name;
>-	int pmd_zc = 0;
> 
> 	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
> 		rte_vdev_device_name(dev));
>@@ -1062,7 +1203,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> 		dev->device.numa_node = rte_socket_id();
> 
> 	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
>-			     &xsk_queue_cnt, &pmd_zc) < 0) {
>+			     &xsk_queue_cnt) < 0) {
> 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
> 		return -EINVAL;
> 	}
>@@ -1073,7 +1214,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> 	}
> 
> 	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
>-					xsk_queue_cnt, pmd_zc);
>+					xsk_queue_cnt);
> 	if (eth_dev == NULL) {
> 		AF_XDP_LOG(ERR, "Failed to init internals\n");
> 		return -1;
>@@ -1116,8 +1257,7 @@ RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
> RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
> 			      "iface=<string> "
> 			      "start_queue=<int> "
>-			      "queue_count=<int> "
>-			      "pmd_zero_copy=<0|1>");
>+			      "queue_count=<int>");
> 
> RTE_INIT(af_xdp_init_log)
> {
>-- 
>2.17.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks
  2019-09-03 22:02 ` Ye Xiaolong
@ 2019-09-04 10:30   ` Loftus, Ciara
  0 siblings, 0 replies; 8+ messages in thread
From: Loftus, Ciara @ 2019-09-04 10:30 UTC (permalink / raw)
  To: Ye, Xiaolong; +Cc: dev, Richardson, Bruce, Laatz, Kevin

> 
> Hi, Ciara
> 
> Thanks for the patch, the performance number is quite impressive.
> 
> On 08/29, Ciara Loftus wrote:
> >This patch enables the unaligned chunks feature for AF_XDP which allows
> >chunks to be placed at arbitrary places in the umem, as opposed to them
> >being required to be aligned to 2k. This allows for DPDK application
> >mempools to be mapped directly into the umem and in turn enable zero
> >copy transfer between umem and the PMD.
> >
> >This patch replaces the zero copy via external mbuf mechanism
> >introduced in commit e9ff8bb71943 ("net/af_xdp: enable zero copy by
> external mbuf").
> >The pmd_zero copy vdev argument is also removed as now the PMD will
> >auto-detect presence of the unaligned chunks feature and enable it if
> >so and otherwise fall back to copy mode if not detected.
> >
> >When enabled, this feature significantly improves single-core
> >performance of the PMD.
> >
> >Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> >Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> >---
> > doc/guides/nics/af_xdp.rst             |   1 -
> > doc/guides/rel_notes/release_19_11.rst |   9 +
> > drivers/net/af_xdp/rte_eth_af_xdp.c    | 304 ++++++++++++++++++-------
> > 3 files changed, 231 insertions(+), 83 deletions(-)
> >
> >diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
> >index ec46f08f0..48dd788ac 100644
> >--- a/doc/guides/nics/af_xdp.rst
> >+++ b/doc/guides/nics/af_xdp.rst
> >@@ -35,7 +35,6 @@ The following options can be provided to set up an
> af_xdp port in DPDK.
> > *   ``iface`` - name of the Kernel interface to attach to (required);
> > *   ``start_queue`` - starting netdev queue id (optional, default 0);
> > *   ``queue_count`` - total netdev queue number (optional, default 1);
> >-*   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
> >
> > Prerequisites
> > -------------
> >diff --git a/doc/guides/rel_notes/release_19_11.rst
> >b/doc/guides/rel_notes/release_19_11.rst
> >index 8490d897c..28a8e5372 100644
> >--- a/doc/guides/rel_notes/release_19_11.rst
> >+++ b/doc/guides/rel_notes/release_19_11.rst
> >@@ -56,6 +56,13 @@ New Features
> >      Also, make sure to start the actual text at the margin.
> >
> =========================================================
> >
> >+* **Updated the AF_XDP PMD.**
> >+
> >+  Updated the AF_XDP PMD. The new features include:
> >+
> >+  * Enabled zero copy between application mempools and UMEM by
> enabling the
> >+    XDP_UMEM_UNALIGNED_CHUNKS UMEM flag.
> >+
> 
> Better to document the kernel dependency in the af_xdp.rst.

Will do.

> 
> >
> > Removed Items
> > -------------
> >@@ -69,6 +76,8 @@ Removed Items
> >    Also, make sure to start the actual text at the margin.
> >
> =========================================================
> >
> >+* Removed AF_XDP pmd_zero copy vdev argument. Support is now auto-
> detected.
> >+
> >
> > API Changes
> > -----------
> >diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c
> >b/drivers/net/af_xdp/rte_eth_af_xdp.c
> >index 41ed5b2af..7956d5778 100644
> >--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> >+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> [snip]
> > reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
> >{
> > 	struct xsk_ring_prod *fq = &umem->fq;
> >-	void *addrs[reserve_size];
> > 	uint32_t idx;
> > 	uint16_t i;
> >+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
> >+
> >+	if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) {
> >+		AF_XDP_LOG(DEBUG, "Failed to reserve enough fq
> descs.\n");
> >+		return -1;
> >+	}
> >+
> >+	for (i = 0; i < reserve_size; i++) {
> >+		struct rte_mbuf *mbuf;
> >+		__u64 *fq_addr;
> >+		uint64_t addr;
> >+
> >+		mbuf = rte_pktmbuf_alloc(umem->mb_pool);
> >+		if (unlikely(mbuf == NULL))
> >+			break;
> 
> If this rare case happens, not all of the reserved slots of fq will be filled with
> proper mbuf addr, then we just call xsk_ring_prod__submit(fq,
> reserve_size) to let kernel receive packets on these addrs, something
> unexpected may happen.

Good catch. I'll fix this in the v2.

Thanks!
Ciara

> 
> Thanks,
> Xiaolong
> 
> >+

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-09-04 10:30 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-29 15:02 [dpdk-dev] [PATCH] net/af_xdp: enable support for unaligned umem chunks Ciara Loftus
2019-08-30  7:47 ` Loftus, Ciara
2019-08-30 16:07 ` William Tu
2019-09-02  8:48   ` Loftus, Ciara
2019-09-02  8:55     ` Loftus, Ciara
2019-09-02 14:44       ` William Tu
2019-09-03 22:02 ` Ye Xiaolong
2019-09-04 10:30   ` Loftus, Ciara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).