DPDK patches and discussions
 help / color / mirror / Atom feed
From: Qi Zhang <qi.z.zhang@intel.com>
To: dev@dpdk.org
Cc: magnus.karlsson@intel.com, bjorn.topel@intel.com,
	Qi Zhang <qi.z.zhang@intel.com>
Subject: [dpdk-dev] [RFC v2 5/7] net/af_xdp: enable share mempool
Date: Thu,  8 Mar 2018 21:52:47 +0800	[thread overview]
Message-ID: <20180308135249.28187-6-qi.z.zhang@intel.com> (raw)
In-Reply-To: <20180308135249.28187-1-qi.z.zhang@intel.com>

Try to check if external mempool (from rx_queue_setup) is fit for
af_xdp, if it is, it will be registered to af_xdp socket directly and
there will be no packet data copy on Rx and Tx.

Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 193 +++++++++++++++++++++++-------------
 1 file changed, 126 insertions(+), 67 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 65c4c37bf..7e839f0da 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -56,7 +56,6 @@ struct xdp_umem {
 	unsigned int frame_size;
 	unsigned int frame_size_log2;
 	unsigned int nframes;
-	int mr_fd;
 	struct rte_mempool *mb_pool;
 };
 
@@ -69,6 +68,7 @@ struct pmd_internals {
 	struct xdp_queue tx;
 	struct xdp_umem *umem;
 	struct rte_mempool *ext_mb_pool;
+	uint8_t share_mb_pool;
 
 	unsigned long rx_pkts;
 	unsigned long rx_bytes;
@@ -159,20 +159,30 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		char *pkt;
 		uint32_t idx = descs[i].idx;
 
-		mbuf = rte_pktmbuf_alloc(internals->ext_mb_pool);
-		rte_pktmbuf_pkt_len(mbuf) =
-			rte_pktmbuf_data_len(mbuf) =
-			descs[i].len;
-		if (mbuf) {
-			pkt = get_pkt_data(internals, idx, descs[i].offset);
-			memcpy(rte_pktmbuf_mtod(mbuf, void *),
-			       pkt, descs[i].len);
-			rx_bytes += descs[i].len;
-			bufs[count++] = mbuf;
+		if (!internals->share_mb_pool) {
+			mbuf = rte_pktmbuf_alloc(internals->ext_mb_pool);
+			rte_pktmbuf_pkt_len(mbuf) =
+				rte_pktmbuf_data_len(mbuf) =
+				descs[i].len;
+			if (mbuf) {
+				pkt = get_pkt_data(internals, idx,
+						   descs[i].offset);
+				memcpy(rte_pktmbuf_mtod(mbuf, void *), pkt,
+				       descs[i].len);
+				rx_bytes += descs[i].len;
+				bufs[count++] = mbuf;
+			} else {
+				dropped++;
+			}
+			rte_pktmbuf_free(idx_to_mbuf(internals, idx));
 		} else {
-			dropped++;
+			mbuf = idx_to_mbuf(internals, idx);
+			rte_pktmbuf_pkt_len(mbuf) =
+				rte_pktmbuf_data_len(mbuf) =
+				descs[i].len;
+			bufs[count++] = mbuf;
+			rx_bytes += descs[i].len;
 		}
-		rte_pktmbuf_free(idx_to_mbuf(internals, idx));
 	}
 
 	internals->rx_pkts += (rcvd - dropped);
@@ -206,52 +216,72 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	uint16_t i, valid;
 	unsigned long tx_bytes = 0;
 	int ret;
+	uint8_t share_mempool = 0;
 
 	nb_pkts = nb_pkts < ETH_AF_XDP_TX_BATCH_SIZE ?
 		  nb_pkts : ETH_AF_XDP_TX_BATCH_SIZE;
 
 	if (txq->num_free < ETH_AF_XDP_TX_BATCH_SIZE * 2) {
 		int n = xq_deq(txq, descs, ETH_AF_XDP_TX_BATCH_SIZE);
-
 		for (i = 0; i < n; i++)
 			rte_pktmbuf_free(idx_to_mbuf(internals, descs[i].idx));
 	}
 
 	nb_pkts = nb_pkts > txq->num_free ? txq->num_free : nb_pkts;
-	ret = rte_mempool_get_bulk(internals->umem->mb_pool,
-				   (void *)mbufs,
-				   nb_pkts);
-	if (ret)
+	if (nb_pkts == 0)
 		return 0;
 
+	if (bufs[0]->pool == internals->ext_mb_pool && internals->share_mb_pool)
+		share_mempool = 1;
+
+	if (!share_mempool) {
+		ret = rte_mempool_get_bulk(internals->umem->mb_pool,
+					   (void *)mbufs,
+					   nb_pkts);
+		if (ret)
+			return 0;
+	}
+
 	valid = 0;
 	for (i = 0; i < nb_pkts; i++) {
 		char *pkt;
-		unsigned int buf_len =
-			internals->umem->frame_size - ETH_AF_XDP_DATA_HEADROOM;
 		mbuf = bufs[i];
-		if (mbuf->pkt_len <= buf_len) {
-			descs[valid].idx = mbuf_to_idx(internals, mbufs[i]);
-			descs[valid].offset = ETH_AF_XDP_DATA_HEADROOM;
-			descs[valid].flags = 0;
-			descs[valid].len = mbuf->pkt_len;
-			pkt = get_pkt_data(internals, descs[i].idx,
-					   descs[i].offset);
-			memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-					   descs[i].len);
-			valid++;
+		if (!share_mempool) {
+			if (mbuf->pkt_len <=
+				(internals->umem->frame_size -
+				 ETH_AF_XDP_DATA_HEADROOM)) {
+				descs[valid].idx =
+					mbuf_to_idx(internals, mbufs[i]);
+				descs[valid].offset = ETH_AF_XDP_DATA_HEADROOM;
+				descs[valid].flags = 0;
+				descs[valid].len = mbuf->pkt_len;
+				pkt = get_pkt_data(internals, descs[i].idx,
+						   descs[i].offset);
+				memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+				       descs[i].len);
+				valid++;
+				tx_bytes += mbuf->pkt_len;
+			}
+			/* packet will be consumed anyway */
+			rte_pktmbuf_free(mbuf);
+		} else {
+			descs[i].idx = mbuf_to_idx(internals, mbuf);
+			descs[i].offset = ETH_AF_XDP_DATA_HEADROOM;
+			descs[i].flags = 0;
+			descs[i].len = mbuf->pkt_len;
 			tx_bytes += mbuf->pkt_len;
+			valid++;
 		}
-		/* packet will be consumed anyway */
-		rte_pktmbuf_free(mbuf);
 	}
 
 	xq_enq(txq, descs, valid);
 	kick_tx(internals->sfd);
 
-	if (valid < nb_pkts) {
-		for (i = valid; i < nb_pkts; i++)
-			rte_pktmbuf_free(mbufs[i]);
+	if (!share_mempool) {
+		if (valid < nb_pkts) {
+			for (i = valid; i < nb_pkts; i++)
+				rte_pktmbuf_free(mbufs[i]);
+		}
 	}
 
 	internals->err_pkts += (nb_pkts - valid);
@@ -376,46 +406,81 @@ static void *get_base_addr(struct rte_mempool *mb_pool)
 	return NULL;
 }
 
-static struct xdp_umem *xsk_alloc_and_mem_reg_buffers(int sfd,
-						      size_t nbuffers,
-						      const char *pool_name)
+static uint8_t
+check_mempool(struct rte_mempool *mp)
+{
+	RTE_ASSERT(mp);
+
+	/* must continues */
+	if (mp->nb_mem_chunks > 1)
+		return 0;
+
+	/* check header size */
+	if (mp->header_size != RTE_CACHE_LINE_SIZE)
+		return 0;
+
+	/* check base address */
+	if ((uint64_t)get_base_addr(mp) % getpagesize() != 0)
+		return 0;
+
+	/* check chunk size */
+	if ((mp->elt_size + mp->header_size + mp->trailer_size) %
+			ETH_AF_XDP_FRAME_SIZE != 0)
+		return 0;
+
+	return 1;
+}
+
+static struct xdp_umem *
+xsk_alloc_and_mem_reg_buffers(struct pmd_internals *internals)
 {
 	struct xdp_mr_req req = { .frame_size = ETH_AF_XDP_FRAME_SIZE,
 				  .data_headroom = ETH_AF_XDP_DATA_HEADROOM };
+	char pool_name[0x100];
+	int nbuffers;
 	struct xdp_umem *umem = calloc(1, sizeof(*umem));
 
 	if (!umem)
 		return NULL;
 
-	umem->mb_pool =
-		rte_pktmbuf_pool_create_with_flags(
-			pool_name, nbuffers,
-			250, 0,
-			(ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_MBUF_OVERHEAD),
-			MEMPOOL_F_NO_SPREAD | MEMPOOL_F_PAGE_ALIGN,
-			SOCKET_ID_ANY);
-
-	if (!umem->mb_pool) {
-		free(umem);
-		return NULL;
-	}
+	internals->share_mb_pool = check_mempool(internals->ext_mb_pool);
+	if (!internals->share_mb_pool) {
+		snprintf(pool_name, 0x100, "%s_%s_%d", "af_xdp_pool",
+			 internals->if_name, internals->queue_idx);
+		umem->mb_pool =
+			rte_pktmbuf_pool_create_with_flags(
+				pool_name,
+				ETH_AF_XDP_NUM_BUFFERS,
+				250, 0,
+				(ETH_AF_XDP_FRAME_SIZE -
+				 ETH_AF_XDP_MBUF_OVERHEAD),
+				MEMPOOL_F_NO_SPREAD | MEMPOOL_F_PAGE_ALIGN,
+				SOCKET_ID_ANY);
+		if (!umem->mb_pool) {
+			free(umem);
+			return NULL;
+		}
 
-	if (umem->mb_pool->nb_mem_chunks > 1) {
-		rte_mempool_free(umem->mb_pool);
-		free(umem);
-		return NULL;
+		if (umem->mb_pool->nb_mem_chunks > 1) {
+			rte_mempool_free(umem->mb_pool);
+			free(umem);
+			return NULL;
+		}
+		nbuffers = ETH_AF_XDP_NUM_BUFFERS;
+	} else {
+		umem->mb_pool = internals->ext_mb_pool;
+		nbuffers = umem->mb_pool->populated_size;
 	}
 
 	req.addr = (uint64_t)get_base_addr(umem->mb_pool);
-	req.len = nbuffers * req.frame_size;
-	setsockopt(sfd, SOL_XDP, XDP_MEM_REG, &req, sizeof(req));
+	req.len = ETH_AF_XDP_NUM_BUFFERS * req.frame_size;
+	setsockopt(internals->sfd, SOL_XDP, XDP_MEM_REG, &req, sizeof(req));
 
 	umem->frame_size = ETH_AF_XDP_FRAME_SIZE;
 	umem->frame_size_log2 = 11;
 	umem->buffer = (char *)req.addr;
 	umem->size = nbuffers * req.frame_size;
 	umem->nframes = nbuffers;
-	umem->mr_fd = sfd;
 
 	return umem;
 }
@@ -425,19 +490,13 @@ xdp_configure(struct pmd_internals *internals)
 {
 	struct sockaddr_xdp sxdp;
 	struct xdp_ring_req req;
-	char pool_name[0x100];
-
 	int ret = 0;
 
-	snprintf(pool_name, 0x100, "%s_%s_%d", "af_xdp_pool",
-		 internals->if_name, internals->queue_idx);
-	internals->umem = xsk_alloc_and_mem_reg_buffers(internals->sfd,
-							ETH_AF_XDP_NUM_BUFFERS,
-							pool_name);
+	internals->umem = xsk_alloc_and_mem_reg_buffers(internals);
 	if (!internals->umem)
 		return -1;
 
-	req.mr_fd = internals->umem->mr_fd;
+	req.mr_fd = internals->sfd;
 	req.desc_nr = internals->ring_size;
 
 	ret = setsockopt(internals->sfd, SOL_XDP, XDP_RX_RING,
@@ -498,7 +557,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		RTE_PKTMBUF_HEADROOM;
 	data_size = internals->umem->frame_size;
 
-	if (data_size > buf_size) {
+	if (data_size - ETH_AF_XDP_DATA_HEADROOM > buf_size) {
 		RTE_LOG(ERR, PMD,
 			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
 			dev->device->name, data_size, buf_size);
@@ -764,7 +823,7 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 
 	internals = eth_dev->data->dev_private;
 	if (internals->umem) {
-		if (internals->umem->mb_pool)
+		if (internals->umem->mb_pool && !internals->share_mb_pool)
 			rte_mempool_free(internals->umem->mb_pool);
 		rte_free(internals->umem);
 	}
-- 
2.13.6

  parent reply	other threads:[~2018-03-08 13:52 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-08 13:52 [dpdk-dev] [RFC v2 0/7] PMD driver for AF_XDP Qi Zhang
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 1/7] net/af_xdp: new PMD driver Qi Zhang
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 2/7] lib/mbuf: enable parse flags when create mempool Qi Zhang
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 3/7] lib/mempool: allow page size aligned mempool Qi Zhang
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 4/7] net/af_xdp: use mbuf mempool for buffer management Qi Zhang
2018-03-08 13:52 ` Qi Zhang [this message]
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 6/7] net/af_xdp: load BPF file Qi Zhang
2018-03-08 14:20   ` Zhang, Qi Z
2018-03-08 23:15   ` Stephen Hemminger
2018-05-09  7:02     ` Björn Töpel
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 7/7] app/testpmd: enable parameter for mempool flags Qi Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180308135249.28187-6-qi.z.zhang@intel.com \
    --to=qi.z.zhang@intel.com \
    --cc=bjorn.topel@intel.com \
    --cc=dev@dpdk.org \
    --cc=magnus.karlsson@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).