DPDK patches and discussions
 help / color / mirror / Atom feed
* [RFC PATCH] net/af_xdp: reenable secondary process support
@ 2021-12-10 15:32 Ciara Loftus
  2021-12-11 21:49 ` Stephen Hemminger
  2022-01-12  7:54 ` [PATCH] net/af_xdp: re-enable " Ciara Loftus
  0 siblings, 2 replies; 36+ messages in thread
From: Ciara Loftus @ 2021-12-10 15:32 UTC (permalink / raw)
  To: dev; +Cc: stephen, Ciara Loftus

Secondary process support had been disabled for the AF_XDP PMD
because there was no logic in place to share the AF_XDP socket
file descriptors between the processes. This commit introduces
this logic using the IPC APIs.

Since AF_XDP rings are single-producer single-consumer, rx/tx
in the secondary process is disabled. However other operations
including retrieval of stats are permitted.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
---
 doc/guides/nics/af_xdp.rst             |   9 ++
 doc/guides/nics/features/af_xdp.ini    |   1 +
 doc/guides/rel_notes/release_22_03.rst |   4 +
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 210 +++++++++++++++++++++++--
 4 files changed, 210 insertions(+), 14 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index c9d0e1ad6c..40ace58de0 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -143,4 +143,13 @@ Limitations
   NAPI context from a watchdog timer instead of from softirqs. More information
   on this feature can be found at [1].
 
+- **Secondary Processes**
+
+  Rx and Tx are not supported for secondary processes due to the single-producer
+  single-consumer nature of the AF_XDP rings. However other operations including
+  statistics retrieval are permitted.
+  The maximum number of queues permitted for PMDs operating in this model is 8
+  as this is the maximum number of fds that can be sent through the IPC APIs as
+  defined by RTE_MP_MAX_FD_NUM.
+
   [1] https://lwn.net/Articles/837010/
\ No newline at end of file
diff --git a/doc/guides/nics/features/af_xdp.ini b/doc/guides/nics/features/af_xdp.ini
index 54b738e616..8e7e075aaf 100644
--- a/doc/guides/nics/features/af_xdp.ini
+++ b/doc/guides/nics/features/af_xdp.ini
@@ -9,4 +9,5 @@ Power mgmt address monitor = Y
 MTU update           = Y
 Promiscuous mode     = Y
 Stats per queue      = Y
+Multiprocess aware   = Y
 x86-64               = Y
diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
index 6d99d1eaa9..ec719e9738 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -55,6 +55,10 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Updated AF_XDP PMD.**
+
+  * Re-enabled secondary process support. RX/TX is not supported.
+
 
 Removed Items
 -------------
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 96c2c9d939..8608a4f379 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -81,6 +81,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
 
 #define ETH_AF_XDP_ETH_OVERHEAD		(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
 
+#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
+
+static int afxdp_dev_count;
+
+/* Message header to synchronize fds via IPC */
+struct ipc_hdr {
+	char port_name[RTE_DEV_NAME_MAX_LEN];
+	/* The file descriptors are in the dedicated part
+	 * of the Unix message to be translated by the kernel.
+	 */
+};
+
 struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_ring *buf_ring;
@@ -148,6 +160,10 @@ struct pmd_internals {
 	struct pkt_tx_queue *tx_queues;
 };
 
+struct pmd_process_private {
+	int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
+};
+
 #define ETH_AF_XDP_IFACE_ARG			"iface"
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
@@ -857,11 +873,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct xdp_statistics xdp_stats;
 	struct pkt_rx_queue *rxq;
 	struct pkt_tx_queue *txq;
 	socklen_t optlen;
-	int i, ret;
+	int i, ret, fd;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
 		optlen = sizeof(struct xdp_statistics);
@@ -877,8 +894,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		stats->ibytes += stats->q_ibytes[i];
 		stats->imissed += rxq->stats.rx_dropped;
 		stats->oerrors += txq->stats.tx_dropped;
-		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
-				XDP_STATISTICS, &xdp_stats, &optlen);
+		fd = process_private->rxq_xsk_fds[i];
+		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+					   &xdp_stats, &optlen) : -1;
 		if (ret != 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
@@ -945,8 +963,10 @@ eth_dev_close(struct rte_eth_dev *dev)
 	struct pkt_rx_queue *rxq;
 	int i;
 
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_free(dev->process_private);
 		return 0;
+	}
 
 	AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
 		rte_socket_id());
@@ -1351,6 +1371,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   struct rte_mempool *mb_pool)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct pkt_rx_queue *rxq;
 	int ret;
 
@@ -1389,6 +1410,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
 	rxq->fds[0].events = POLLIN;
 
+	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
+
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -1690,6 +1713,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
+	struct pmd_process_private *process_private;
 	struct pmd_internals *internals;
 	struct rte_eth_dev *eth_dev;
 	int ret;
@@ -1755,9 +1779,17 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	if (ret)
 		goto err_free_tx;
 
+	process_private = (struct pmd_process_private *)
+		rte_zmalloc_socket(name, sizeof(struct pmd_process_private),
+				   RTE_CACHE_LINE_SIZE, numa_node);
+	if (process_private == NULL) {
+		AF_XDP_LOG(ERR, "Failed to alloc memory for process private\n");
+		goto err_free_tx;
+	}
+
 	eth_dev = rte_eth_vdev_allocate(dev, 0);
 	if (eth_dev == NULL)
-		goto err_free_tx;
+		goto err_free_pp;
 
 	eth_dev->data->dev_private = internals;
 	eth_dev->data->dev_link = pmd_link;
@@ -1766,6 +1798,10 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	eth_dev->dev_ops = &ops;
 	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
 	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
+	eth_dev->process_private = process_private;
+
+	for (i = 0; i < queue_cnt; i++)
+		process_private->rxq_xsk_fds[i] = -1;
 
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
@@ -1773,6 +1809,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 
 	return eth_dev;
 
+err_free_pp:
+	rte_free(process_private);
 err_free_tx:
 	rte_free(internals->tx_queues);
 err_free_rx:
@@ -1782,6 +1820,115 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	return NULL;
 }
 
+/* Secondary process requests rxq fds from primary. */
+static int
+afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
+{
+	struct pmd_process_private *process_private = dev->process_private;
+	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
+	struct rte_mp_msg request, *reply;
+	struct rte_mp_reply replies;
+	struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
+	int i, ret;
+
+	/* Prepare the request */
+	memset(&request, 0, sizeof(request));
+	strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name));
+	strlcpy(request_param->port_name, name,
+		sizeof(request_param->port_name));
+	request.len_param = sizeof(*request_param);
+
+	/* Send the request and receive the reply */
+	AF_XDP_LOG(DEBUG, "Sending IPC request for %s\n", name);
+	ret = rte_mp_request_sync(&request, &replies, &timeout);
+	if (ret < 0 || replies.nb_received != 1) {
+		AF_XDP_LOG(ERR, "Failed to request fds from primary: %d",
+			   rte_errno);
+		return -1;
+	}
+	reply = replies.msgs;
+	AF_XDP_LOG(DEBUG, "Received IPC reply for %s\n", name);
+	if (dev->data->nb_rx_queues != reply->num_fds) {
+		AF_XDP_LOG(ERR, "Incorrect number of fds received: %d != %d\n",
+			   reply->num_fds, dev->data->nb_rx_queues);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < reply->num_fds; i++)
+		process_private->rxq_xsk_fds[i] = reply->fds[i];
+
+	free(reply);
+	return 0;
+}
+
+/* Primary process sends rxq fds to secondary. */
+static int
+afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer)
+{
+	struct rte_eth_dev *dev;
+	struct pmd_process_private *process_private;
+	struct rte_mp_msg reply;
+	const struct ipc_hdr *request_param =
+		(const struct ipc_hdr *)request->param;
+	struct ipc_hdr *reply_param =
+		(struct ipc_hdr *)reply.param;
+	const char *request_name = request_param->port_name;
+	uint16_t port_id;
+	int i, ret;
+
+	AF_XDP_LOG(DEBUG, "Received IPC request for %s\n", request_name);
+
+	/* Find the requested port */
+	ret = rte_eth_dev_get_port_by_name(request_name, &port_id);
+	if (ret) {
+		AF_XDP_LOG(ERR, "Failed to get port id for %s\n", request_name);
+		return -1;
+	}
+	dev = &rte_eth_devices[port_id];
+	process_private = dev->process_private;
+
+	/* Populate the reply with the xsk fd for each queue */
+	reply.num_fds = 0;
+	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
+		AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max number of fds (%d)\n",
+			   dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i];
+
+	/* Send the reply */
+	strlcpy(reply.name, request->name, sizeof(reply.name));
+	strlcpy(reply_param->port_name, request_name,
+		sizeof(reply_param->port_name));
+	reply.len_param = sizeof(*reply_param);
+	AF_XDP_LOG(DEBUG, "Sending IPC reply for %s\n", reply_param->port_name);
+	if (rte_mp_reply(&reply, peer) < 0) {
+		AF_XDP_LOG(ERR, "Failed to reply to IPC request\n");
+		return -1;
+	}
+	return 0;
+}
+
+/* Secondary proccess rx function. RX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_rx_noop(void *queue __rte_unused,
+		struct rte_mbuf **bufs __rte_unused,
+		uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
+/* Secondary proccess tx function. TX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_tx_noop(void *queue __rte_unused,
+			struct rte_mbuf **bufs __rte_unused,
+			uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 static int
 rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 {
@@ -1791,19 +1938,39 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
 	int shared_umem = 0;
 	char prog_path[PATH_MAX] = {'\0'};
-	int busy_budget = -1;
+	int busy_budget = -1, ret;
 	struct rte_eth_dev *eth_dev = NULL;
-	const char *name;
+	const char *name = rte_vdev_device_name(dev);
 
-	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
-		rte_vdev_device_name(dev));
+	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name);
 
-	name = rte_vdev_device_name(dev);
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		AF_XDP_LOG(ERR, "Failed to probe %s. "
-				"AF_XDP PMD does not support secondary processes.\n",
-				name);
-		return -ENOTSUP;
+		eth_dev = rte_eth_dev_attach_secondary(name);
+		if (eth_dev == NULL) {
+			AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
+			return -EINVAL;
+		}
+		eth_dev->dev_ops = &ops;
+		eth_dev->device = &dev->device;
+		eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop;
+		eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop;
+		eth_dev->process_private = (struct pmd_process_private *)
+			rte_zmalloc_socket(name,
+					   sizeof(struct pmd_process_private),
+					   RTE_CACHE_LINE_SIZE,
+					   eth_dev->device->numa_node);
+		if (eth_dev->process_private == NULL) {
+			AF_XDP_LOG(ERR,
+				"Failed to alloc memory for process private\n");
+			return -ENOMEM;
+		}
+
+		/* Obtain the xsk fds from the primary process. */
+		if (afxdp_mp_request_fds(name, eth_dev))
+			return -1;
+
+		rte_eth_dev_probing_finish(eth_dev);
+		return 0;
 	}
 
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
@@ -1838,6 +2005,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		return -1;
 	}
 
+	/* Register IPC callback which shares xsk fds from primary to secondary */
+	if (!afxdp_dev_count) {
+		ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
+		if (ret < 0) {
+			AF_XDP_LOG(ERR, "%s: Failed to register IPC callback: %s",
+				   name, strerror(rte_errno));
+			return -1;
+		}
+	}
+	afxdp_dev_count++;
+
 	rte_eth_dev_probing_finish(eth_dev);
 
 	return 0;
@@ -1860,6 +2038,10 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 		return 0;
 
 	eth_dev_close(eth_dev);
+	rte_free(eth_dev->process_private);
+	if (afxdp_dev_count == 1)
+		rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
+	afxdp_dev_count--;
 	rte_eth_dev_release_port(eth_dev);
 
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] net/af_xdp: reenable secondary process support
  2021-12-10 15:32 [RFC PATCH] net/af_xdp: reenable secondary process support Ciara Loftus
@ 2021-12-11 21:49 ` Stephen Hemminger
  2022-01-12  7:54 ` [PATCH] net/af_xdp: re-enable " Ciara Loftus
  1 sibling, 0 replies; 36+ messages in thread
From: Stephen Hemminger @ 2021-12-11 21:49 UTC (permalink / raw)
  To: Ciara Loftus; +Cc: dev

On Fri, 10 Dec 2021 15:32:45 +0000
Ciara Loftus <ciara.loftus@intel.com> wrote:

> Secondary process support had been disabled for the AF_XDP PMD
> because there was no logic in place to share the AF_XDP socket
> file descriptors between the processes. This commit introduces
> this logic using the IPC APIs.
> 
> Since AF_XDP rings are single-producer single-consumer, rx/tx
> in the secondary process is disabled. However other operations
> including retrieval of stats are permitted.
> 
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> ---
>  doc/guides/nics/af_xdp.rst             |   9 ++
>  doc/guides/nics/features/af_xdp.ini    |   1 +
>  doc/guides/rel_notes/release_22_03.rst |   4 +
>  drivers/net/af_xdp/rte_eth_af_xdp.c    | 210 +++++++++++++++++++++++--
>  4 files changed, 210 insertions(+), 14 deletions(-)

Could also fix (and change your editor settings) the missing newline
at the end of files in XDP.

Specifically:
  drivers/net/af_xdp/meson.build
  doc/guides/nics/af_xdp.rst

Overall in DPDK:

$ find app *tools config doc drivers examples lib -type f | xargs pcregrep -LMr '\n\Z' | grep -v '.svg$' | grep -v '.png$' | grep -v '.data$' | grep -v '.pyc$'
app/test/test_cfgfiles/etc/empty.ini
doc/guides/cryptodevs/features/null.ini
doc/guides/cryptodevs/features/ccp.ini
doc/guides/cryptodevs/features/qat.ini
doc/guides/cryptodevs/features/caam_jr.ini
doc/guides/cryptodevs/features/cn9k.ini
doc/guides/cryptodevs/features/bcmfs.ini
doc/guides/cryptodevs/features/cn10k.ini
doc/guides/cryptodevs/features/aesni_mb.ini
doc/guides/cryptodevs/features/armv8.ini
doc/guides/nics/af_xdp.rst
drivers/net/af_xdp/meson.build
examples/flow_classify/ipv4_rules_file.txt

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH] net/af_xdp: re-enable secondary process support
  2021-12-10 15:32 [RFC PATCH] net/af_xdp: reenable secondary process support Ciara Loftus
  2021-12-11 21:49 ` Stephen Hemminger
@ 2022-01-12  7:54 ` Ciara Loftus
  2022-02-04 12:54   ` [PATCH v2] " Ciara Loftus
  1 sibling, 1 reply; 36+ messages in thread
From: Ciara Loftus @ 2022-01-12  7:54 UTC (permalink / raw)
  To: dev; +Cc: stephen, ferruh.yigit, Ciara Loftus

Secondary process support had been disabled for the AF_XDP PMD
because there was no logic in place to share the AF_XDP socket
file descriptors between the processes. This commit introduces
this logic using the IPC APIs.

Since AF_XDP rings are single-producer single-consumer, rx/tx
in the secondary process is disabled. However other operations
including retrieval of stats are permitted.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

---
RFC -> v1:
* Added newline to af_xdp.rst
* Fixed spelling errors
* Fixed potential NULL dereference in init_internals
* Fixed potential free of address-of expression in afxdp_mp_request_fds
---
 doc/guides/nics/af_xdp.rst             |  11 +-
 doc/guides/nics/features/af_xdp.ini    |   1 +
 doc/guides/rel_notes/release_22_03.rst |   4 +
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 210 +++++++++++++++++++++++--
 4 files changed, 211 insertions(+), 15 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index c9d0e1ad6c..9e3feee8d1 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -143,4 +143,13 @@ Limitations
   NAPI context from a watchdog timer instead of from softirqs. More information
   on this feature can be found at [1].
 
-  [1] https://lwn.net/Articles/837010/
\ No newline at end of file
+- **Secondary Processes**
+
+  Rx and Tx are not supported for secondary processes due to the single-producer
+  single-consumer nature of the AF_XDP rings. However other operations including
+  statistics retrieval are permitted.
+  The maximum number of queues permitted for PMDs operating in this model is 8
+  as this is the maximum number of fds that can be sent through the IPC APIs as
+  defined by RTE_MP_MAX_FD_NUM.
+
+  [1] https://lwn.net/Articles/837010/
diff --git a/doc/guides/nics/features/af_xdp.ini b/doc/guides/nics/features/af_xdp.ini
index 54b738e616..8e7e075aaf 100644
--- a/doc/guides/nics/features/af_xdp.ini
+++ b/doc/guides/nics/features/af_xdp.ini
@@ -9,4 +9,5 @@ Power mgmt address monitor = Y
 MTU update           = Y
 Promiscuous mode     = Y
 Stats per queue      = Y
+Multiprocess aware   = Y
 x86-64               = Y
diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
index 6d99d1eaa9..ec719e9738 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -55,6 +55,10 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Updated AF_XDP PMD.**
+
+  * Re-enabled secondary process support. RX/TX is not supported.
+
 
 Removed Items
 -------------
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 96c2c9d939..1c8c52ddb6 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -81,6 +81,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
 
 #define ETH_AF_XDP_ETH_OVERHEAD		(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
 
+#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
+
+static int afxdp_dev_count;
+
+/* Message header to synchronize fds via IPC */
+struct ipc_hdr {
+	char port_name[RTE_DEV_NAME_MAX_LEN];
+	/* The file descriptors are in the dedicated part
+	 * of the Unix message to be translated by the kernel.
+	 */
+};
+
 struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_ring *buf_ring;
@@ -148,6 +160,10 @@ struct pmd_internals {
 	struct pkt_tx_queue *tx_queues;
 };
 
+struct pmd_process_private {
+	int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
+};
+
 #define ETH_AF_XDP_IFACE_ARG			"iface"
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
@@ -857,11 +873,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct xdp_statistics xdp_stats;
 	struct pkt_rx_queue *rxq;
 	struct pkt_tx_queue *txq;
 	socklen_t optlen;
-	int i, ret;
+	int i, ret, fd;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
 		optlen = sizeof(struct xdp_statistics);
@@ -877,8 +894,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		stats->ibytes += stats->q_ibytes[i];
 		stats->imissed += rxq->stats.rx_dropped;
 		stats->oerrors += txq->stats.tx_dropped;
-		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
-				XDP_STATISTICS, &xdp_stats, &optlen);
+		fd = process_private->rxq_xsk_fds[i];
+		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+					   &xdp_stats, &optlen) : -1;
 		if (ret != 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
@@ -945,8 +963,10 @@ eth_dev_close(struct rte_eth_dev *dev)
 	struct pkt_rx_queue *rxq;
 	int i;
 
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_free(dev->process_private);
 		return 0;
+	}
 
 	AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
 		rte_socket_id());
@@ -1351,6 +1371,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   struct rte_mempool *mb_pool)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct pkt_rx_queue *rxq;
 	int ret;
 
@@ -1389,6 +1410,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
 	rxq->fds[0].events = POLLIN;
 
+	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
+
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -1690,6 +1713,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
+	struct pmd_process_private *process_private;
 	struct pmd_internals *internals;
 	struct rte_eth_dev *eth_dev;
 	int ret;
@@ -1755,9 +1779,17 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	if (ret)
 		goto err_free_tx;
 
+	process_private = (struct pmd_process_private *)
+		rte_zmalloc_socket(name, sizeof(struct pmd_process_private),
+				   RTE_CACHE_LINE_SIZE, numa_node);
+	if (process_private == NULL) {
+		AF_XDP_LOG(ERR, "Failed to alloc memory for process private\n");
+		goto err_free_tx;
+	}
+
 	eth_dev = rte_eth_vdev_allocate(dev, 0);
 	if (eth_dev == NULL)
-		goto err_free_tx;
+		goto err_free_pp;
 
 	eth_dev->data->dev_private = internals;
 	eth_dev->data->dev_link = pmd_link;
@@ -1766,6 +1798,10 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	eth_dev->dev_ops = &ops;
 	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
 	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
+	eth_dev->process_private = process_private;
+
+	for (i = 0; i < queue_cnt; i++)
+		process_private->rxq_xsk_fds[i] = -1;
 
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
@@ -1773,6 +1809,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 
 	return eth_dev;
 
+err_free_pp:
+	rte_free(process_private);
 err_free_tx:
 	rte_free(internals->tx_queues);
 err_free_rx:
@@ -1782,6 +1820,115 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	return NULL;
 }
 
+/* Secondary process requests rxq fds from primary. */
+static int
+afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
+{
+	struct pmd_process_private *process_private = dev->process_private;
+	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
+	struct rte_mp_msg request, *reply;
+	struct rte_mp_reply replies;
+	struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
+	int i, ret;
+
+	/* Prepare the request */
+	memset(&request, 0, sizeof(request));
+	strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name));
+	strlcpy(request_param->port_name, name,
+		sizeof(request_param->port_name));
+	request.len_param = sizeof(*request_param);
+
+	/* Send the request and receive the reply */
+	AF_XDP_LOG(DEBUG, "Sending IPC request for %s\n", name);
+	ret = rte_mp_request_sync(&request, &replies, &timeout);
+	if (ret < 0 || replies.nb_received != 1) {
+		AF_XDP_LOG(ERR, "Failed to request fds from primary: %d",
+			   rte_errno);
+		return -1;
+	}
+	reply = replies.msgs;
+	AF_XDP_LOG(DEBUG, "Received IPC reply for %s\n", name);
+	if (dev->data->nb_rx_queues != reply->num_fds) {
+		AF_XDP_LOG(ERR, "Incorrect number of fds received: %d != %d\n",
+			   reply->num_fds, dev->data->nb_rx_queues);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < reply->num_fds; i++)
+		process_private->rxq_xsk_fds[i] = reply->fds[i];
+
+	free(reply);
+	return 0;
+}
+
+/* Primary process sends rxq fds to secondary. */
+static int
+afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer)
+{
+	struct rte_eth_dev *dev;
+	struct pmd_process_private *process_private;
+	struct rte_mp_msg reply;
+	const struct ipc_hdr *request_param =
+		(const struct ipc_hdr *)request->param;
+	struct ipc_hdr *reply_param =
+		(struct ipc_hdr *)reply.param;
+	const char *request_name = request_param->port_name;
+	uint16_t port_id;
+	int i, ret;
+
+	AF_XDP_LOG(DEBUG, "Received IPC request for %s\n", request_name);
+
+	/* Find the requested port */
+	ret = rte_eth_dev_get_port_by_name(request_name, &port_id);
+	if (ret) {
+		AF_XDP_LOG(ERR, "Failed to get port id for %s\n", request_name);
+		return -1;
+	}
+	dev = &rte_eth_devices[port_id];
+	process_private = dev->process_private;
+
+	/* Populate the reply with the xsk fd for each queue */
+	reply.num_fds = 0;
+	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
+		AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max number of fds (%d)\n",
+			   dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i];
+
+	/* Send the reply */
+	strlcpy(reply.name, request->name, sizeof(reply.name));
+	strlcpy(reply_param->port_name, request_name,
+		sizeof(reply_param->port_name));
+	reply.len_param = sizeof(*reply_param);
+	AF_XDP_LOG(DEBUG, "Sending IPC reply for %s\n", reply_param->port_name);
+	if (rte_mp_reply(&reply, peer) < 0) {
+		AF_XDP_LOG(ERR, "Failed to reply to IPC request\n");
+		return -1;
+	}
+	return 0;
+}
+
+/* Secondary process rx function. RX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_rx_noop(void *queue __rte_unused,
+		struct rte_mbuf **bufs __rte_unused,
+		uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
+/* Secondary process tx function. TX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_tx_noop(void *queue __rte_unused,
+			struct rte_mbuf **bufs __rte_unused,
+			uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 static int
 rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 {
@@ -1791,19 +1938,39 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
 	int shared_umem = 0;
 	char prog_path[PATH_MAX] = {'\0'};
-	int busy_budget = -1;
+	int busy_budget = -1, ret;
 	struct rte_eth_dev *eth_dev = NULL;
-	const char *name;
+	const char *name = rte_vdev_device_name(dev);
 
-	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
-		rte_vdev_device_name(dev));
+	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name);
 
-	name = rte_vdev_device_name(dev);
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		AF_XDP_LOG(ERR, "Failed to probe %s. "
-				"AF_XDP PMD does not support secondary processes.\n",
-				name);
-		return -ENOTSUP;
+		eth_dev = rte_eth_dev_attach_secondary(name);
+		if (eth_dev == NULL) {
+			AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
+			return -EINVAL;
+		}
+		eth_dev->dev_ops = &ops;
+		eth_dev->device = &dev->device;
+		eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop;
+		eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop;
+		eth_dev->process_private = (struct pmd_process_private *)
+			rte_zmalloc_socket(name,
+					   sizeof(struct pmd_process_private),
+					   RTE_CACHE_LINE_SIZE,
+					   eth_dev->device->numa_node);
+		if (eth_dev->process_private == NULL) {
+			AF_XDP_LOG(ERR,
+				"Failed to alloc memory for process private\n");
+			return -ENOMEM;
+		}
+
+		/* Obtain the xsk fds from the primary process. */
+		if (afxdp_mp_request_fds(name, eth_dev))
+			return -1;
+
+		rte_eth_dev_probing_finish(eth_dev);
+		return 0;
 	}
 
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
@@ -1838,6 +2005,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		return -1;
 	}
 
+	/* Register IPC callback which shares xsk fds from primary to secondary */
+	if (!afxdp_dev_count) {
+		ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
+		if (ret < 0) {
+			AF_XDP_LOG(ERR, "%s: Failed to register IPC callback: %s",
+				   name, strerror(rte_errno));
+			return -1;
+		}
+	}
+	afxdp_dev_count++;
+
 	rte_eth_dev_probing_finish(eth_dev);
 
 	return 0;
@@ -1860,6 +2038,10 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 		return 0;
 
 	eth_dev_close(eth_dev);
+	rte_free(eth_dev->process_private);
+	if (afxdp_dev_count == 1)
+		rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
+	afxdp_dev_count--;
 	rte_eth_dev_release_port(eth_dev);
 
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH v2] net/af_xdp: re-enable secondary process support
  2022-01-12  7:54 ` [PATCH] net/af_xdp: re-enable " Ciara Loftus
@ 2022-02-04 12:54   ` Ciara Loftus
  2022-02-04 14:18     ` Ferruh Yigit
  2022-02-08 13:48     ` [PATCH v3] " Ciara Loftus
  0 siblings, 2 replies; 36+ messages in thread
From: Ciara Loftus @ 2022-02-04 12:54 UTC (permalink / raw)
  To: dev; +Cc: stephen, ferruh.yigit, Ciara Loftus

Secondary process support had been disabled for the AF_XDP PMD
because there was no logic in place to share the AF_XDP socket
file descriptors between the processes. This commit introduces
this logic using the IPC APIs.

Since AF_XDP rings are single-producer single-consumer, rx/tx
in the secondary process is disabled. However other operations
including retrieval of stats are permitted.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

---
v1 -> v2:
* Rebase to next-net

RFC -> v1:
* Added newline to af_xdp.rst
* Fixed spelling errors
* Fixed potential NULL dereference in init_internals
* Fixed potential free of address-of expression in afxdp_mp_request_fds
---
 doc/guides/nics/af_xdp.rst             |   9 ++
 doc/guides/nics/features/af_xdp.ini    |   1 +
 doc/guides/rel_notes/release_22_03.rst |   1 +
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 210 +++++++++++++++++++++++--
 4 files changed, 207 insertions(+), 14 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index db02ea1984..eb4eab28a8 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -141,4 +141,13 @@ Limitations
   NAPI context from a watchdog timer instead of from softirqs. More information
   on this feature can be found at [1].
 
+- **Secondary Processes**
+
+  Rx and Tx are not supported for secondary processes due to the single-producer
+  single-consumer nature of the AF_XDP rings. However other operations including
+  statistics retrieval are permitted.
+  The maximum number of queues permitted for PMDs operating in this model is 8
+  as this is the maximum number of fds that can be sent through the IPC APIs as
+  defined by RTE_MP_MAX_FD_NUM.
+
   [1] https://lwn.net/Articles/837010/
diff --git a/doc/guides/nics/features/af_xdp.ini b/doc/guides/nics/features/af_xdp.ini
index 54b738e616..8e7e075aaf 100644
--- a/doc/guides/nics/features/af_xdp.ini
+++ b/doc/guides/nics/features/af_xdp.ini
@@ -9,4 +9,5 @@ Power mgmt address monitor = Y
 MTU update           = Y
 Promiscuous mode     = Y
 Stats per queue      = Y
+Multiprocess aware   = Y
 x86-64               = Y
diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
index bf2e3f78a9..dfd2cbbccf 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -58,6 +58,7 @@ New Features
 * **Updated AF_XDP PMD**
 
   * Added support for libxdp >=v1.2.2.
+  * Re-enabled secondary process support. RX/TX is not supported.
 
 * **Updated Cisco enic driver.**
 
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 1b6192fa44..407f6d8dbe 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -80,6 +80,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
 
 #define ETH_AF_XDP_ETH_OVERHEAD		(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
 
+#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
+
+static int afxdp_dev_count;
+
+/* Message header to synchronize fds via IPC */
+struct ipc_hdr {
+	char port_name[RTE_DEV_NAME_MAX_LEN];
+	/* The file descriptors are in the dedicated part
+	 * of the Unix message to be translated by the kernel.
+	 */
+};
+
 struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_ring *buf_ring;
@@ -147,6 +159,10 @@ struct pmd_internals {
 	struct pkt_tx_queue *tx_queues;
 };
 
+struct pmd_process_private {
+	int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
+};
+
 #define ETH_AF_XDP_IFACE_ARG			"iface"
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
@@ -795,11 +811,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct xdp_statistics xdp_stats;
 	struct pkt_rx_queue *rxq;
 	struct pkt_tx_queue *txq;
 	socklen_t optlen;
-	int i, ret;
+	int i, ret, fd;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
 		optlen = sizeof(struct xdp_statistics);
@@ -815,8 +832,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		stats->ibytes += stats->q_ibytes[i];
 		stats->imissed += rxq->stats.rx_dropped;
 		stats->oerrors += txq->stats.tx_dropped;
-		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
-				XDP_STATISTICS, &xdp_stats, &optlen);
+		fd = process_private->rxq_xsk_fds[i];
+		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+					   &xdp_stats, &optlen) : -1;
 		if (ret != 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
@@ -883,8 +901,10 @@ eth_dev_close(struct rte_eth_dev *dev)
 	struct pkt_rx_queue *rxq;
 	int i;
 
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_free(dev->process_private);
 		return 0;
+	}
 
 	AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
 		rte_socket_id());
@@ -1349,6 +1369,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   struct rte_mempool *mb_pool)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct pkt_rx_queue *rxq;
 	int ret;
 
@@ -1387,6 +1408,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
 	rxq->fds[0].events = POLLIN;
 
+	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
+
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -1688,6 +1711,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
+	struct pmd_process_private *process_private;
 	struct pmd_internals *internals;
 	struct rte_eth_dev *eth_dev;
 	int ret;
@@ -1753,9 +1777,17 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	if (ret)
 		goto err_free_tx;
 
+	process_private = (struct pmd_process_private *)
+		rte_zmalloc_socket(name, sizeof(struct pmd_process_private),
+				   RTE_CACHE_LINE_SIZE, numa_node);
+	if (process_private == NULL) {
+		AF_XDP_LOG(ERR, "Failed to alloc memory for process private\n");
+		goto err_free_tx;
+	}
+
 	eth_dev = rte_eth_vdev_allocate(dev, 0);
 	if (eth_dev == NULL)
-		goto err_free_tx;
+		goto err_free_pp;
 
 	eth_dev->data->dev_private = internals;
 	eth_dev->data->dev_link = pmd_link;
@@ -1764,6 +1796,10 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	eth_dev->dev_ops = &ops;
 	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
 	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
+	eth_dev->process_private = process_private;
+
+	for (i = 0; i < queue_cnt; i++)
+		process_private->rxq_xsk_fds[i] = -1;
 
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
@@ -1771,6 +1807,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 
 	return eth_dev;
 
+err_free_pp:
+	rte_free(process_private);
 err_free_tx:
 	rte_free(internals->tx_queues);
 err_free_rx:
@@ -1780,6 +1818,115 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	return NULL;
 }
 
+/* Secondary process requests rxq fds from primary. */
+static int
+afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
+{
+	struct pmd_process_private *process_private = dev->process_private;
+	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
+	struct rte_mp_msg request, *reply;
+	struct rte_mp_reply replies;
+	struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
+	int i, ret;
+
+	/* Prepare the request */
+	memset(&request, 0, sizeof(request));
+	strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name));
+	strlcpy(request_param->port_name, name,
+		sizeof(request_param->port_name));
+	request.len_param = sizeof(*request_param);
+
+	/* Send the request and receive the reply */
+	AF_XDP_LOG(DEBUG, "Sending IPC request for %s\n", name);
+	ret = rte_mp_request_sync(&request, &replies, &timeout);
+	if (ret < 0 || replies.nb_received != 1) {
+		AF_XDP_LOG(ERR, "Failed to request fds from primary: %d",
+			   rte_errno);
+		return -1;
+	}
+	reply = replies.msgs;
+	AF_XDP_LOG(DEBUG, "Received IPC reply for %s\n", name);
+	if (dev->data->nb_rx_queues != reply->num_fds) {
+		AF_XDP_LOG(ERR, "Incorrect number of fds received: %d != %d\n",
+			   reply->num_fds, dev->data->nb_rx_queues);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < reply->num_fds; i++)
+		process_private->rxq_xsk_fds[i] = reply->fds[i];
+
+	free(reply);
+	return 0;
+}
+
+/* Primary process sends rxq fds to secondary. */
+static int
+afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer)
+{
+	struct rte_eth_dev *dev;
+	struct pmd_process_private *process_private;
+	struct rte_mp_msg reply;
+	const struct ipc_hdr *request_param =
+		(const struct ipc_hdr *)request->param;
+	struct ipc_hdr *reply_param =
+		(struct ipc_hdr *)reply.param;
+	const char *request_name = request_param->port_name;
+	uint16_t port_id;
+	int i, ret;
+
+	AF_XDP_LOG(DEBUG, "Received IPC request for %s\n", request_name);
+
+	/* Find the requested port */
+	ret = rte_eth_dev_get_port_by_name(request_name, &port_id);
+	if (ret) {
+		AF_XDP_LOG(ERR, "Failed to get port id for %s\n", request_name);
+		return -1;
+	}
+	dev = &rte_eth_devices[port_id];
+	process_private = dev->process_private;
+
+	/* Populate the reply with the xsk fd for each queue */
+	reply.num_fds = 0;
+	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
+		AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max number of fds (%d)\n",
+			   dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i];
+
+	/* Send the reply */
+	strlcpy(reply.name, request->name, sizeof(reply.name));
+	strlcpy(reply_param->port_name, request_name,
+		sizeof(reply_param->port_name));
+	reply.len_param = sizeof(*reply_param);
+	AF_XDP_LOG(DEBUG, "Sending IPC reply for %s\n", reply_param->port_name);
+	if (rte_mp_reply(&reply, peer) < 0) {
+		AF_XDP_LOG(ERR, "Failed to reply to IPC request\n");
+		return -1;
+	}
+	return 0;
+}
+
+/* Secondary process rx function. RX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_rx_noop(void *queue __rte_unused,
+		struct rte_mbuf **bufs __rte_unused,
+		uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
+/* Secondary process tx function. TX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_tx_noop(void *queue __rte_unused,
+			struct rte_mbuf **bufs __rte_unused,
+			uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 static int
 rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 {
@@ -1789,19 +1936,39 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
 	int shared_umem = 0;
 	char prog_path[PATH_MAX] = {'\0'};
-	int busy_budget = -1;
+	int busy_budget = -1, ret;
 	struct rte_eth_dev *eth_dev = NULL;
-	const char *name;
+	const char *name = rte_vdev_device_name(dev);
 
-	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
-		rte_vdev_device_name(dev));
+	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name);
 
-	name = rte_vdev_device_name(dev);
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		AF_XDP_LOG(ERR, "Failed to probe %s. "
-				"AF_XDP PMD does not support secondary processes.\n",
-				name);
-		return -ENOTSUP;
+		eth_dev = rte_eth_dev_attach_secondary(name);
+		if (eth_dev == NULL) {
+			AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
+			return -EINVAL;
+		}
+		eth_dev->dev_ops = &ops;
+		eth_dev->device = &dev->device;
+		eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop;
+		eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop;
+		eth_dev->process_private = (struct pmd_process_private *)
+			rte_zmalloc_socket(name,
+					   sizeof(struct pmd_process_private),
+					   RTE_CACHE_LINE_SIZE,
+					   eth_dev->device->numa_node);
+		if (eth_dev->process_private == NULL) {
+			AF_XDP_LOG(ERR,
+				"Failed to alloc memory for process private\n");
+			return -ENOMEM;
+		}
+
+		/* Obtain the xsk fds from the primary process. */
+		if (afxdp_mp_request_fds(name, eth_dev))
+			return -1;
+
+		rte_eth_dev_probing_finish(eth_dev);
+		return 0;
 	}
 
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
@@ -1836,6 +2003,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		return -1;
 	}
 
+	/* Register IPC callback which shares xsk fds from primary to secondary */
+	if (!afxdp_dev_count) {
+		ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
+		if (ret < 0) {
+			AF_XDP_LOG(ERR, "%s: Failed to register IPC callback: %s",
+				   name, strerror(rte_errno));
+			return -1;
+		}
+	}
+	afxdp_dev_count++;
+
 	rte_eth_dev_probing_finish(eth_dev);
 
 	return 0;
@@ -1858,6 +2036,10 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 		return 0;
 
 	eth_dev_close(eth_dev);
+	rte_free(eth_dev->process_private);
+	if (afxdp_dev_count == 1)
+		rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
+	afxdp_dev_count--;
 	rte_eth_dev_release_port(eth_dev);
 
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2] net/af_xdp: re-enable secondary process support
  2022-02-04 12:54   ` [PATCH v2] " Ciara Loftus
@ 2022-02-04 14:18     ` Ferruh Yigit
  2022-02-07  7:49       ` Loftus, Ciara
  2022-02-08 13:48     ` [PATCH v3] " Ciara Loftus
  1 sibling, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-04 14:18 UTC (permalink / raw)
  To: Ciara Loftus, dev; +Cc: stephen

On 2/4/2022 12:54 PM, Ciara Loftus wrote:
> Secondary process support had been disabled for the AF_XDP PMD
> because there was no logic in place to share the AF_XDP socket
> file descriptors between the processes. This commit introduces
> this logic using the IPC APIs.
> 
> Since AF_XDP rings are single-producer single-consumer, rx/tx
> in the secondary process is disabled. However other operations
> including retrieval of stats are permitted.
> 
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> 
> ---
> v1 -> v2:
> * Rebase to next-net
> 
> RFC -> v1:
> * Added newline to af_xdp.rst
> * Fixed spelling errors
> * Fixed potential NULL dereference in init_internals
> * Fixed potential free of address-of expression in afxdp_mp_request_fds
> ---
>   doc/guides/nics/af_xdp.rst             |   9 ++
>   doc/guides/nics/features/af_xdp.ini    |   1 +
>   doc/guides/rel_notes/release_22_03.rst |   1 +
>   drivers/net/af_xdp/rte_eth_af_xdp.c    | 210 +++++++++++++++++++++++--
>   4 files changed, 207 insertions(+), 14 deletions(-)
> 
> diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
> index db02ea1984..eb4eab28a8 100644
> --- a/doc/guides/nics/af_xdp.rst
> +++ b/doc/guides/nics/af_xdp.rst
> @@ -141,4 +141,13 @@ Limitations
>     NAPI context from a watchdog timer instead of from softirqs. More information
>     on this feature can be found at [1].
>   
> +- **Secondary Processes**
> +
> +  Rx and Tx are not supported for secondary processes due to the single-producer
> +  single-consumer nature of the AF_XDP rings. However other operations including
> +  statistics retrieval are permitted.

Hi Ciara,

Isn't this limitation same for all PMDs, like not both primary & secondary can Rx/Tx
from same queue at the same time.
But primary can initiallize the PMD and secondary can do the datapath,
or isn't af_xdp supports multiple queue, if so some queues can be used by
primary and some by secondary for datapath.

Is there anyhing special for af_xdp that prevents it?

> +  The maximum number of queues permitted for PMDs operating in this model is 8
> +  as this is the maximum number of fds that can be sent through the IPC APIs as
> +  defined by RTE_MP_MAX_FD_NUM.
> +
>     [1] https://lwn.net/Articles/837010/
> diff --git a/doc/guides/nics/features/af_xdp.ini b/doc/guides/nics/features/af_xdp.ini
> index 54b738e616..8e7e075aaf 100644
> --- a/doc/guides/nics/features/af_xdp.ini
> +++ b/doc/guides/nics/features/af_xdp.ini
> @@ -9,4 +9,5 @@ Power mgmt address monitor = Y
>   MTU update           = Y
>   Promiscuous mode     = Y
>   Stats per queue      = Y
> +Multiprocess aware   = Y
>   x86-64               = Y
> diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
> index bf2e3f78a9..dfd2cbbccf 100644
> --- a/doc/guides/rel_notes/release_22_03.rst
> +++ b/doc/guides/rel_notes/release_22_03.rst
> @@ -58,6 +58,7 @@ New Features
>   * **Updated AF_XDP PMD**
>   
>     * Added support for libxdp >=v1.2.2.
> +  * Re-enabled secondary process support. RX/TX is not supported.
>   
>   * **Updated Cisco enic driver.**
>   
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 1b6192fa44..407f6d8dbe 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -80,6 +80,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
>   
>   #define ETH_AF_XDP_ETH_OVERHEAD		(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
>   
> +#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
> +
> +static int afxdp_dev_count;
> +
> +/* Message header to synchronize fds via IPC */
> +struct ipc_hdr {
> +	char port_name[RTE_DEV_NAME_MAX_LEN];
> +	/* The file descriptors are in the dedicated part
> +	 * of the Unix message to be translated by the kernel.
> +	 */
> +};
> +
>   struct xsk_umem_info {
>   	struct xsk_umem *umem;
>   	struct rte_ring *buf_ring;
> @@ -147,6 +159,10 @@ struct pmd_internals {
>   	struct pkt_tx_queue *tx_queues;
>   };
>   
> +struct pmd_process_private {
> +	int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
> +};
> +
>   #define ETH_AF_XDP_IFACE_ARG			"iface"
>   #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
>   #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
> @@ -795,11 +811,12 @@ static int
>   eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>   {
>   	struct pmd_internals *internals = dev->data->dev_private;
> +	struct pmd_process_private *process_private = dev->process_private;
>   	struct xdp_statistics xdp_stats;
>   	struct pkt_rx_queue *rxq;
>   	struct pkt_tx_queue *txq;
>   	socklen_t optlen;
> -	int i, ret;
> +	int i, ret, fd;
>   
>   	for (i = 0; i < dev->data->nb_rx_queues; i++) {
>   		optlen = sizeof(struct xdp_statistics);
> @@ -815,8 +832,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>   		stats->ibytes += stats->q_ibytes[i];
>   		stats->imissed += rxq->stats.rx_dropped;
>   		stats->oerrors += txq->stats.tx_dropped;
> -		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
> -				XDP_STATISTICS, &xdp_stats, &optlen);
> +		fd = process_private->rxq_xsk_fds[i];
> +		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
> +					   &xdp_stats, &optlen) : -1;
>   		if (ret != 0) {
>   			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
>   			return -1;
> @@ -883,8 +901,10 @@ eth_dev_close(struct rte_eth_dev *dev)
>   	struct pkt_rx_queue *rxq;
>   	int i;
>   
> -	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_free(dev->process_private);
>   		return 0;
> +	}
>   
>   	AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
>   		rte_socket_id());
> @@ -1349,6 +1369,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>   		   struct rte_mempool *mb_pool)
>   {
>   	struct pmd_internals *internals = dev->data->dev_private;
> +	struct pmd_process_private *process_private = dev->process_private;
>   	struct pkt_rx_queue *rxq;
>   	int ret;
>   
> @@ -1387,6 +1408,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>   	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
>   	rxq->fds[0].events = POLLIN;
>   
> +	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
> +
>   	dev->data->rx_queues[rx_queue_id] = rxq;
>   	return 0;
>   
> @@ -1688,6 +1711,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   {
>   	const char *name = rte_vdev_device_name(dev);
>   	const unsigned int numa_node = dev->device.numa_node;
> +	struct pmd_process_private *process_private;
>   	struct pmd_internals *internals;
>   	struct rte_eth_dev *eth_dev;
>   	int ret;
> @@ -1753,9 +1777,17 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   	if (ret)
>   		goto err_free_tx;
>   
> +	process_private = (struct pmd_process_private *)
> +		rte_zmalloc_socket(name, sizeof(struct pmd_process_private),
> +				   RTE_CACHE_LINE_SIZE, numa_node);
> +	if (process_private == NULL) {
> +		AF_XDP_LOG(ERR, "Failed to alloc memory for process private\n");
> +		goto err_free_tx;
> +	}

Need to free 'process_private' in the PMD, in 'close()' and 'remove()' paths.

> +
>   	eth_dev = rte_eth_vdev_allocate(dev, 0);
>   	if (eth_dev == NULL)
> -		goto err_free_tx;
> +		goto err_free_pp;
>   
>   	eth_dev->data->dev_private = internals;
>   	eth_dev->data->dev_link = pmd_link;
> @@ -1764,6 +1796,10 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   	eth_dev->dev_ops = &ops;
>   	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
>   	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
> +	eth_dev->process_private = process_private;
> +
> +	for (i = 0; i < queue_cnt; i++)
> +		process_private->rxq_xsk_fds[i] = -1;
>   
>   #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>   	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
> @@ -1771,6 +1807,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   
>   	return eth_dev;
>   
> +err_free_pp:
> +	rte_free(process_private);
>   err_free_tx:
>   	rte_free(internals->tx_queues);
>   err_free_rx:
> @@ -1780,6 +1818,115 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   	return NULL;
>   }
>   
> +/* Secondary process requests rxq fds from primary. */
> +static int
> +afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
> +{
> +	struct pmd_process_private *process_private = dev->process_private;
> +	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
> +	struct rte_mp_msg request, *reply;
> +	struct rte_mp_reply replies;
> +	struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
> +	int i, ret;
> +
> +	/* Prepare the request */
> +	memset(&request, 0, sizeof(request));
> +	strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name));
> +	strlcpy(request_param->port_name, name,
> +		sizeof(request_param->port_name));
> +	request.len_param = sizeof(*request_param);
> +
> +	/* Send the request and receive the reply */
> +	AF_XDP_LOG(DEBUG, "Sending IPC request for %s\n", name);
> +	ret = rte_mp_request_sync(&request, &replies, &timeout);
> +	if (ret < 0 || replies.nb_received != 1) {
> +		AF_XDP_LOG(ERR, "Failed to request fds from primary: %d",
> +			   rte_errno);
> +		return -1;
> +	}
> +	reply = replies.msgs;
> +	AF_XDP_LOG(DEBUG, "Received IPC reply for %s\n", name);

I think message can mention "multi-process IPC" for clarification.

> +	if (dev->data->nb_rx_queues != reply->num_fds) {
> +		AF_XDP_LOG(ERR, "Incorrect number of fds received: %d != %d\n",
> +			   reply->num_fds, dev->data->nb_rx_queues);
> +		return -EINVAL;
> +	}
> +
> +	for (i = 0; i < reply->num_fds; i++)
> +		process_private->rxq_xsk_fds[i] = reply->fds[i];
> +
> +	free(reply);
> +	return 0;
> +}
> +
> +/* Primary process sends rxq fds to secondary. */
> +static int
> +afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer)
> +{
> +	struct rte_eth_dev *dev;
> +	struct pmd_process_private *process_private;
> +	struct rte_mp_msg reply;
> +	const struct ipc_hdr *request_param =
> +		(const struct ipc_hdr *)request->param;
> +	struct ipc_hdr *reply_param =
> +		(struct ipc_hdr *)reply.param;
> +	const char *request_name = request_param->port_name;
> +	uint16_t port_id;
> +	int i, ret;
> +
> +	AF_XDP_LOG(DEBUG, "Received IPC request for %s\n", request_name);
> +
> +	/* Find the requested port */
> +	ret = rte_eth_dev_get_port_by_name(request_name, &port_id);
> +	if (ret) {
> +		AF_XDP_LOG(ERR, "Failed to get port id for %s\n", request_name);
> +		return -1;
> +	}
> +	dev = &rte_eth_devices[port_id];

Better to not access the global array, there is a new API and a cleanup
already done [1] in other PMDs, can you please apply the same here.

[1]
https://patches.dpdk.org/project/dpdk/patch/20220203082412.79028-1-kumaraparamesh92@gmail.com/

> +	process_private = dev->process_private;
> +
> +	/* Populate the reply with the xsk fd for each queue */
> +	reply.num_fds = 0;
> +	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
> +		AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max number of fds (%d)\n",
> +			   dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
> +		return -EINVAL;
> +	}
> +
> +	for (i = 0; i < dev->data->nb_rx_queues; i++)
> +		reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i];
> +
> +	/* Send the reply */
> +	strlcpy(reply.name, request->name, sizeof(reply.name));
> +	strlcpy(reply_param->port_name, request_name,
> +		sizeof(reply_param->port_name));
> +	reply.len_param = sizeof(*reply_param);
> +	AF_XDP_LOG(DEBUG, "Sending IPC reply for %s\n", reply_param->port_name);
> +	if (rte_mp_reply(&reply, peer) < 0) {
> +		AF_XDP_LOG(ERR, "Failed to reply to IPC request\n");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +/* Secondary process rx function. RX is disabled because rings are SPSC */
> +static uint16_t
> +eth_af_xdp_rx_noop(void *queue __rte_unused,
> +		struct rte_mbuf **bufs __rte_unused,
> +		uint16_t nb_pkts __rte_unused)
> +{
> +	return 0;
> +}
> +
> +/* Secondary process tx function. TX is disabled because rings are SPSC */
> +static uint16_t
> +eth_af_xdp_tx_noop(void *queue __rte_unused,
> +			struct rte_mbuf **bufs __rte_unused,
> +			uint16_t nb_pkts __rte_unused)
> +{
> +	return 0;
> +}
> +

Now there are multiple PMDs using noop/dummy Rx/Tx burst functions,
what do you think to add static inline functions in the 'ethdev_driver.h'
(and clean existing drivers to use it) with a separate patch, later in this
patch use those functions?


>   static int
>   rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>   {
> @@ -1789,19 +1936,39 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>   	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
>   	int shared_umem = 0;
>   	char prog_path[PATH_MAX] = {'\0'};
> -	int busy_budget = -1;
> +	int busy_budget = -1, ret;
>   	struct rte_eth_dev *eth_dev = NULL;
> -	const char *name;
> +	const char *name = rte_vdev_device_name(dev);
>   
> -	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
> -		rte_vdev_device_name(dev));
> +	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name);
>   
> -	name = rte_vdev_device_name(dev);
>   	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> -		AF_XDP_LOG(ERR, "Failed to probe %s. "
> -				"AF_XDP PMD does not support secondary processes.\n",
> -				name);
> -		return -ENOTSUP;
> +		eth_dev = rte_eth_dev_attach_secondary(name);
> +		if (eth_dev == NULL) {
> +			AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
> +			return -EINVAL;
> +		}
> +		eth_dev->dev_ops = &ops;
> +		eth_dev->device = &dev->device;
> +		eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop;
> +		eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop;
> +		eth_dev->process_private = (struct pmd_process_private *)
> +			rte_zmalloc_socket(name,
> +					   sizeof(struct pmd_process_private),
> +					   RTE_CACHE_LINE_SIZE,
> +					   eth_dev->device->numa_node);
> +		if (eth_dev->process_private == NULL) {
> +			AF_XDP_LOG(ERR,
> +				"Failed to alloc memory for process private\n");
> +			return -ENOMEM;
> +		}
> +
> +		/* Obtain the xsk fds from the primary process. */
> +		if (afxdp_mp_request_fds(name, eth_dev))
> +			return -1;
> +
> +		rte_eth_dev_probing_finish(eth_dev);
> +		return 0;
>   	}
>   
>   	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
> @@ -1836,6 +2003,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>   		return -1;
>   	}
>   
> +	/* Register IPC callback which shares xsk fds from primary to secondary */
> +	if (!afxdp_dev_count) {
> +		ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
> +		if (ret < 0) {
> +			AF_XDP_LOG(ERR, "%s: Failed to register IPC callback: %s",
> +				   name, strerror(rte_errno));
> +			return -1;
> +		}
> +	}
> +	afxdp_dev_count++;
> +
>   	rte_eth_dev_probing_finish(eth_dev);
>   
>   	return 0;
> @@ -1858,6 +2036,10 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
>   		return 0;
>   
>   	eth_dev_close(eth_dev);
> +	rte_free(eth_dev->process_private);
> +	if (afxdp_dev_count == 1)
> +		rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
> +	afxdp_dev_count--;
>   	rte_eth_dev_release_port(eth_dev);
>   
>   


^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v2] net/af_xdp: re-enable secondary process support
  2022-02-04 14:18     ` Ferruh Yigit
@ 2022-02-07  7:49       ` Loftus, Ciara
  2022-02-07 10:27         ` Ferruh Yigit
  0 siblings, 1 reply; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-07  7:49 UTC (permalink / raw)
  To: Yigit, Ferruh, dev; +Cc: stephen

> 
> On 2/4/2022 12:54 PM, Ciara Loftus wrote:
> > Secondary process support had been disabled for the AF_XDP PMD
> > because there was no logic in place to share the AF_XDP socket
> > file descriptors between the processes. This commit introduces
> > this logic using the IPC APIs.
> >
> > Since AF_XDP rings are single-producer single-consumer, rx/tx
> > in the secondary process is disabled. However other operations
> > including retrieval of stats are permitted.
> >
> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> >
> > ---
> > v1 -> v2:
> > * Rebase to next-net
> >
> > RFC -> v1:
> > * Added newline to af_xdp.rst
> > * Fixed spelling errors
> > * Fixed potential NULL dereference in init_internals
> > * Fixed potential free of address-of expression in afxdp_mp_request_fds
> > ---
> >   doc/guides/nics/af_xdp.rst             |   9 ++
> >   doc/guides/nics/features/af_xdp.ini    |   1 +
> >   doc/guides/rel_notes/release_22_03.rst |   1 +
> >   drivers/net/af_xdp/rte_eth_af_xdp.c    | 210
> +++++++++++++++++++++++--
> >   4 files changed, 207 insertions(+), 14 deletions(-)
> >
> > diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
> > index db02ea1984..eb4eab28a8 100644
> > --- a/doc/guides/nics/af_xdp.rst
> > +++ b/doc/guides/nics/af_xdp.rst
> > @@ -141,4 +141,13 @@ Limitations
> >     NAPI context from a watchdog timer instead of from softirqs. More
> information
> >     on this feature can be found at [1].
> >
> > +- **Secondary Processes**
> > +
> > +  Rx and Tx are not supported for secondary processes due to the single-
> producer
> > +  single-consumer nature of the AF_XDP rings. However other operations
> including
> > +  statistics retrieval are permitted.
> 
> Hi Ciara,
> 
> Isn't this limitation same for all PMDs, like not both primary & secondary can
> Rx/Tx
> from same queue at the same time.
> But primary can initiallize the PMD and secondary can do the datapath,
> or isn't af_xdp supports multiple queue, if so some queues can be used by
> primary and some by secondary for datapath.
> 
> Is there anyhing special for af_xdp that prevents it?

Hi Ferruh,

Thanks for the review.
Each queue of the PMD corresponds to a new AF_XDP socket.
Each socket has an RX and TX ring that is mmapped from the kernel to userspace and this mapping is only valid for the primary process.
I did not figure out a way to share that mapping with the secondary process successfully. Can you think of anything that might work?

> 
> > +  The maximum number of queues permitted for PMDs operating in this
> model is 8
> > +  as this is the maximum number of fds that can be sent through the IPC
> APIs as
> > +  defined by RTE_MP_MAX_FD_NUM.
> > +
> >     [1] https://lwn.net/Articles/837010/
> > diff --git a/doc/guides/nics/features/af_xdp.ini
> b/doc/guides/nics/features/af_xdp.ini
> > index 54b738e616..8e7e075aaf 100644
> > --- a/doc/guides/nics/features/af_xdp.ini
> > +++ b/doc/guides/nics/features/af_xdp.ini
> > @@ -9,4 +9,5 @@ Power mgmt address monitor = Y
> >   MTU update           = Y
> >   Promiscuous mode     = Y
> >   Stats per queue      = Y
> > +Multiprocess aware   = Y
> >   x86-64               = Y
> > diff --git a/doc/guides/rel_notes/release_22_03.rst
> b/doc/guides/rel_notes/release_22_03.rst
> > index bf2e3f78a9..dfd2cbbccf 100644
> > --- a/doc/guides/rel_notes/release_22_03.rst
> > +++ b/doc/guides/rel_notes/release_22_03.rst
> > @@ -58,6 +58,7 @@ New Features
> >   * **Updated AF_XDP PMD**
> >
> >     * Added support for libxdp >=v1.2.2.
> > +  * Re-enabled secondary process support. RX/TX is not supported.
> >
> >   * **Updated Cisco enic driver.**
> >
> > diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c
> b/drivers/net/af_xdp/rte_eth_af_xdp.c
> > index 1b6192fa44..407f6d8dbe 100644
> > --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> > +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> > @@ -80,6 +80,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype,
> NOTICE);
> >
> >   #define ETH_AF_XDP_ETH_OVERHEAD
> 	(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
> >
> > +#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
> > +
> > +static int afxdp_dev_count;
> > +
> > +/* Message header to synchronize fds via IPC */
> > +struct ipc_hdr {
> > +	char port_name[RTE_DEV_NAME_MAX_LEN];
> > +	/* The file descriptors are in the dedicated part
> > +	 * of the Unix message to be translated by the kernel.
> > +	 */
> > +};
> > +
> >   struct xsk_umem_info {
> >   	struct xsk_umem *umem;
> >   	struct rte_ring *buf_ring;
> > @@ -147,6 +159,10 @@ struct pmd_internals {
> >   	struct pkt_tx_queue *tx_queues;
> >   };
> >
> > +struct pmd_process_private {
> > +	int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
> > +};
> > +
> >   #define ETH_AF_XDP_IFACE_ARG			"iface"
> >   #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
> >   #define ETH_AF_XDP_QUEUE_COUNT_ARG
> 	"queue_count"
> > @@ -795,11 +811,12 @@ static int
> >   eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> >   {
> >   	struct pmd_internals *internals = dev->data->dev_private;
> > +	struct pmd_process_private *process_private = dev-
> >process_private;
> >   	struct xdp_statistics xdp_stats;
> >   	struct pkt_rx_queue *rxq;
> >   	struct pkt_tx_queue *txq;
> >   	socklen_t optlen;
> > -	int i, ret;
> > +	int i, ret, fd;
> >
> >   	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> >   		optlen = sizeof(struct xdp_statistics);
> > @@ -815,8 +832,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct
> rte_eth_stats *stats)
> >   		stats->ibytes += stats->q_ibytes[i];
> >   		stats->imissed += rxq->stats.rx_dropped;
> >   		stats->oerrors += txq->stats.tx_dropped;
> > -		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
> > -				XDP_STATISTICS, &xdp_stats, &optlen);
> > +		fd = process_private->rxq_xsk_fds[i];
> > +		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
> > +					   &xdp_stats, &optlen) : -1;
> >   		if (ret != 0) {
> >   			AF_XDP_LOG(ERR, "getsockopt() failed for
> XDP_STATISTICS.\n");
> >   			return -1;
> > @@ -883,8 +901,10 @@ eth_dev_close(struct rte_eth_dev *dev)
> >   	struct pkt_rx_queue *rxq;
> >   	int i;
> >
> > -	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> > +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > +		rte_free(dev->process_private);
> >   		return 0;
> > +	}
> >
> >   	AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
> >   		rte_socket_id());
> > @@ -1349,6 +1369,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
> >   		   struct rte_mempool *mb_pool)
> >   {
> >   	struct pmd_internals *internals = dev->data->dev_private;
> > +	struct pmd_process_private *process_private = dev-
> >process_private;
> >   	struct pkt_rx_queue *rxq;
> >   	int ret;
> >
> > @@ -1387,6 +1408,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
> >   	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
> >   	rxq->fds[0].events = POLLIN;
> >
> > +	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
> > +
> >   	dev->data->rx_queues[rx_queue_id] = rxq;
> >   	return 0;
> >
> > @@ -1688,6 +1711,7 @@ init_internals(struct rte_vdev_device *dev, const
> char *if_name,
> >   {
> >   	const char *name = rte_vdev_device_name(dev);
> >   	const unsigned int numa_node = dev->device.numa_node;
> > +	struct pmd_process_private *process_private;
> >   	struct pmd_internals *internals;
> >   	struct rte_eth_dev *eth_dev;
> >   	int ret;
> > @@ -1753,9 +1777,17 @@ init_internals(struct rte_vdev_device *dev,
> const char *if_name,
> >   	if (ret)
> >   		goto err_free_tx;
> >
> > +	process_private = (struct pmd_process_private *)
> > +		rte_zmalloc_socket(name, sizeof(struct
> pmd_process_private),
> > +				   RTE_CACHE_LINE_SIZE, numa_node);
> > +	if (process_private == NULL) {
> > +		AF_XDP_LOG(ERR, "Failed to alloc memory for process
> private\n");
> > +		goto err_free_tx;
> > +	}
> 
> Need to free 'process_private' in the PMD, in 'close()' and 'remove()' paths.

+1

> 
> > +
> >   	eth_dev = rte_eth_vdev_allocate(dev, 0);
> >   	if (eth_dev == NULL)
> > -		goto err_free_tx;
> > +		goto err_free_pp;
> >
> >   	eth_dev->data->dev_private = internals;
> >   	eth_dev->data->dev_link = pmd_link;
> > @@ -1764,6 +1796,10 @@ init_internals(struct rte_vdev_device *dev,
> const char *if_name,
> >   	eth_dev->dev_ops = &ops;
> >   	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
> >   	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
> > +	eth_dev->process_private = process_private;
> > +
> > +	for (i = 0; i < queue_cnt; i++)
> > +		process_private->rxq_xsk_fds[i] = -1;
> >
> >   #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
> >   	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf
> enabled.\n");
> > @@ -1771,6 +1807,8 @@ init_internals(struct rte_vdev_device *dev, const
> char *if_name,
> >
> >   	return eth_dev;
> >
> > +err_free_pp:
> > +	rte_free(process_private);
> >   err_free_tx:
> >   	rte_free(internals->tx_queues);
> >   err_free_rx:
> > @@ -1780,6 +1818,115 @@ init_internals(struct rte_vdev_device *dev,
> const char *if_name,
> >   	return NULL;
> >   }
> >
> > +/* Secondary process requests rxq fds from primary. */
> > +static int
> > +afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
> > +{
> > +	struct pmd_process_private *process_private = dev-
> >process_private;
> > +	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
> > +	struct rte_mp_msg request, *reply;
> > +	struct rte_mp_reply replies;
> > +	struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
> > +	int i, ret;
> > +
> > +	/* Prepare the request */
> > +	memset(&request, 0, sizeof(request));
> > +	strlcpy(request.name, ETH_AF_XDP_MP_KEY,
> sizeof(request.name));
> > +	strlcpy(request_param->port_name, name,
> > +		sizeof(request_param->port_name));
> > +	request.len_param = sizeof(*request_param);
> > +
> > +	/* Send the request and receive the reply */
> > +	AF_XDP_LOG(DEBUG, "Sending IPC request for %s\n", name);
> > +	ret = rte_mp_request_sync(&request, &replies, &timeout);
> > +	if (ret < 0 || replies.nb_received != 1) {
> > +		AF_XDP_LOG(ERR, "Failed to request fds from primary: %d",
> > +			   rte_errno);
> > +		return -1;
> > +	}
> > +	reply = replies.msgs;
> > +	AF_XDP_LOG(DEBUG, "Received IPC reply for %s\n", name);
> 
> I think message can mention "multi-process IPC" for clarification.

+1

> 
> > +	if (dev->data->nb_rx_queues != reply->num_fds) {
> > +		AF_XDP_LOG(ERR, "Incorrect number of fds received: %d !=
> %d\n",
> > +			   reply->num_fds, dev->data->nb_rx_queues);
> > +		return -EINVAL;
> > +	}
> > +
> > +	for (i = 0; i < reply->num_fds; i++)
> > +		process_private->rxq_xsk_fds[i] = reply->fds[i];
> > +
> > +	free(reply);
> > +	return 0;
> > +}
> > +
> > +/* Primary process sends rxq fds to secondary. */
> > +static int
> > +afxdp_mp_send_fds(const struct rte_mp_msg *request, const void
> *peer)
> > +{
> > +	struct rte_eth_dev *dev;
> > +	struct pmd_process_private *process_private;
> > +	struct rte_mp_msg reply;
> > +	const struct ipc_hdr *request_param =
> > +		(const struct ipc_hdr *)request->param;
> > +	struct ipc_hdr *reply_param =
> > +		(struct ipc_hdr *)reply.param;
> > +	const char *request_name = request_param->port_name;
> > +	uint16_t port_id;
> > +	int i, ret;
> > +
> > +	AF_XDP_LOG(DEBUG, "Received IPC request for %s\n",
> request_name);
> > +
> > +	/* Find the requested port */
> > +	ret = rte_eth_dev_get_port_by_name(request_name, &port_id);
> > +	if (ret) {
> > +		AF_XDP_LOG(ERR, "Failed to get port id for %s\n",
> request_name);
> > +		return -1;
> > +	}
> > +	dev = &rte_eth_devices[port_id];
> 
> Better to not access the global array, there is a new API and a cleanup
> already done [1] in other PMDs, can you please apply the same here.
> 
> [1]
> https://patches.dpdk.org/project/dpdk/patch/20220203082412.79028-1-
> kumaraparamesh92@gmail.com/

Thanks for sharing, I wasn't aware of this API. I'll add it in the v3.

> 
> > +	process_private = dev->process_private;
> > +
> > +	/* Populate the reply with the xsk fd for each queue */
> > +	reply.num_fds = 0;
> > +	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
> > +		AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max
> number of fds (%d)\n",
> > +			   dev->data->nb_rx_queues,
> RTE_MP_MAX_FD_NUM);
> > +		return -EINVAL;
> > +	}
> > +
> > +	for (i = 0; i < dev->data->nb_rx_queues; i++)
> > +		reply.fds[reply.num_fds++] = process_private-
> >rxq_xsk_fds[i];
> > +
> > +	/* Send the reply */
> > +	strlcpy(reply.name, request->name, sizeof(reply.name));
> > +	strlcpy(reply_param->port_name, request_name,
> > +		sizeof(reply_param->port_name));
> > +	reply.len_param = sizeof(*reply_param);
> > +	AF_XDP_LOG(DEBUG, "Sending IPC reply for %s\n", reply_param-
> >port_name);
> > +	if (rte_mp_reply(&reply, peer) < 0) {
> > +		AF_XDP_LOG(ERR, "Failed to reply to IPC request\n");
> > +		return -1;
> > +	}
> > +	return 0;
> > +}
> > +
> > +/* Secondary process rx function. RX is disabled because rings are SPSC */
> > +static uint16_t
> > +eth_af_xdp_rx_noop(void *queue __rte_unused,
> > +		struct rte_mbuf **bufs __rte_unused,
> > +		uint16_t nb_pkts __rte_unused)
> > +{
> > +	return 0;
> > +}
> > +
> > +/* Secondary process tx function. TX is disabled because rings are SPSC */
> > +static uint16_t
> > +eth_af_xdp_tx_noop(void *queue __rte_unused,
> > +			struct rte_mbuf **bufs __rte_unused,
> > +			uint16_t nb_pkts __rte_unused)
> > +{
> > +	return 0;
> > +}
> > +
> 
> Now there are multiple PMDs using noop/dummy Rx/Tx burst functions,
> what do you think to add static inline functions in the 'ethdev_driver.h'
> (and clean existing drivers to use it) with a separate patch, later in this
> patch use those functions?

+1

> 
> 
> >   static int
> >   rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> >   {
> > @@ -1789,19 +1936,39 @@ rte_pmd_af_xdp_probe(struct
> rte_vdev_device *dev)
> >   	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
> >   	int shared_umem = 0;
> >   	char prog_path[PATH_MAX] = {'\0'};
> > -	int busy_budget = -1;
> > +	int busy_budget = -1, ret;
> >   	struct rte_eth_dev *eth_dev = NULL;
> > -	const char *name;
> > +	const char *name = rte_vdev_device_name(dev);
> >
> > -	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
> > -		rte_vdev_device_name(dev));
> > +	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name);
> >
> > -	name = rte_vdev_device_name(dev);
> >   	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> > -		AF_XDP_LOG(ERR, "Failed to probe %s. "
> > -				"AF_XDP PMD does not support secondary
> processes.\n",
> > -				name);
> > -		return -ENOTSUP;
> > +		eth_dev = rte_eth_dev_attach_secondary(name);
> > +		if (eth_dev == NULL) {
> > +			AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
> > +			return -EINVAL;
> > +		}
> > +		eth_dev->dev_ops = &ops;
> > +		eth_dev->device = &dev->device;
> > +		eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop;
> > +		eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop;
> > +		eth_dev->process_private = (struct pmd_process_private *)
> > +			rte_zmalloc_socket(name,
> > +					   sizeof(struct
> pmd_process_private),
> > +					   RTE_CACHE_LINE_SIZE,
> > +					   eth_dev->device->numa_node);
> > +		if (eth_dev->process_private == NULL) {
> > +			AF_XDP_LOG(ERR,
> > +				"Failed to alloc memory for process
> private\n");
> > +			return -ENOMEM;
> > +		}
> > +
> > +		/* Obtain the xsk fds from the primary process. */
> > +		if (afxdp_mp_request_fds(name, eth_dev))
> > +			return -1;
> > +
> > +		rte_eth_dev_probing_finish(eth_dev);
> > +		return 0;
> >   	}
> >
> >   	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
> valid_arguments);
> > @@ -1836,6 +2003,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
> *dev)
> >   		return -1;
> >   	}
> >
> > +	/* Register IPC callback which shares xsk fds from primary to
> secondary */
> > +	if (!afxdp_dev_count) {
> > +		ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY,
> afxdp_mp_send_fds);
> > +		if (ret < 0) {
> > +			AF_XDP_LOG(ERR, "%s: Failed to register IPC
> callback: %s",
> > +				   name, strerror(rte_errno));
> > +			return -1;
> > +		}
> > +	}
> > +	afxdp_dev_count++;
> > +
> >   	rte_eth_dev_probing_finish(eth_dev);
> >
> >   	return 0;
> > @@ -1858,6 +2036,10 @@ rte_pmd_af_xdp_remove(struct
> rte_vdev_device *dev)
> >   		return 0;
> >
> >   	eth_dev_close(eth_dev);
> > +	rte_free(eth_dev->process_private);
> > +	if (afxdp_dev_count == 1)
> > +		rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
> > +	afxdp_dev_count--;
> >   	rte_eth_dev_release_port(eth_dev);
> >
> >


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2] net/af_xdp: re-enable secondary process support
  2022-02-07  7:49       ` Loftus, Ciara
@ 2022-02-07 10:27         ` Ferruh Yigit
  2022-02-07 11:39           ` Loftus, Ciara
  0 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-07 10:27 UTC (permalink / raw)
  To: Loftus, Ciara, dev; +Cc: stephen, Anatoly Burakov

On 2/7/2022 7:49 AM, Loftus, Ciara wrote:
>>
>> On 2/4/2022 12:54 PM, Ciara Loftus wrote:
>>> Secondary process support had been disabled for the AF_XDP PMD
>>> because there was no logic in place to share the AF_XDP socket
>>> file descriptors between the processes. This commit introduces
>>> this logic using the IPC APIs.
>>>
>>> Since AF_XDP rings are single-producer single-consumer, rx/tx
>>> in the secondary process is disabled. However other operations
>>> including retrieval of stats are permitted.
>>>
>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>>>
>>> ---
>>> v1 -> v2:
>>> * Rebase to next-net
>>>
>>> RFC -> v1:
>>> * Added newline to af_xdp.rst
>>> * Fixed spelling errors
>>> * Fixed potential NULL dereference in init_internals
>>> * Fixed potential free of address-of expression in afxdp_mp_request_fds
>>> ---
>>>    doc/guides/nics/af_xdp.rst             |   9 ++
>>>    doc/guides/nics/features/af_xdp.ini    |   1 +
>>>    doc/guides/rel_notes/release_22_03.rst |   1 +
>>>    drivers/net/af_xdp/rte_eth_af_xdp.c    | 210
>> +++++++++++++++++++++++--
>>>    4 files changed, 207 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
>>> index db02ea1984..eb4eab28a8 100644
>>> --- a/doc/guides/nics/af_xdp.rst
>>> +++ b/doc/guides/nics/af_xdp.rst
>>> @@ -141,4 +141,13 @@ Limitations
>>>      NAPI context from a watchdog timer instead of from softirqs. More
>> information
>>>      on this feature can be found at [1].
>>>
>>> +- **Secondary Processes**
>>> +
>>> +  Rx and Tx are not supported for secondary processes due to the single-
>> producer
>>> +  single-consumer nature of the AF_XDP rings. However other operations
>> including
>>> +  statistics retrieval are permitted.
>>
>> Hi Ciara,
>>
>> Isn't this limitation same for all PMDs, like not both primary & secondary can
>> Rx/Tx
>> from same queue at the same time.
>> But primary can initiallize the PMD and secondary can do the datapath,
>> or isn't af_xdp supports multiple queue, if so some queues can be used by
>> primary and some by secondary for datapath.
>>
>> Is there anyhing special for af_xdp that prevents it?
> 
> Hi Ferruh,
> 
> Thanks for the review.
> Each queue of the PMD corresponds to a new AF_XDP socket.
> Each socket has an RX and TX ring that is mmapped from the kernel to userspace and this mapping is only valid for the primary process.
> I did not figure out a way to share that mapping with the secondary process successfully. Can you think of anything that might work?
> 

Does the application knows the buffer address for the Rx/Tx, or is
abstracted to the 'fd'?
If only 'fd' is used, this patch already converts 'fd' between
processes.
cc'ed Anatoly, but what I understand is after MP fd conversion:
Primary process: FD=x
Secondary process: FD=y
And both x & y points to exact same socket in the kernel side.

At least this is how it works for the 'tap' interface, and that is
why 'fs' are in the process_private area and converted between primary
and secondary, I thought it will be same for the xdp socket.

Did you test the secondary Rx/Tx in the secondary after this patch?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v2] net/af_xdp: re-enable secondary process support
  2022-02-07 10:27         ` Ferruh Yigit
@ 2022-02-07 11:39           ` Loftus, Ciara
  2022-02-08 10:58             ` Ferruh Yigit
  0 siblings, 1 reply; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-07 11:39 UTC (permalink / raw)
  To: Yigit, Ferruh, dev; +Cc: stephen, Burakov, Anatoly

> >>
> >> On 2/4/2022 12:54 PM, Ciara Loftus wrote:
> >>> Secondary process support had been disabled for the AF_XDP PMD
> >>> because there was no logic in place to share the AF_XDP socket
> >>> file descriptors between the processes. This commit introduces
> >>> this logic using the IPC APIs.
> >>>
> >>> Since AF_XDP rings are single-producer single-consumer, rx/tx
> >>> in the secondary process is disabled. However other operations
> >>> including retrieval of stats are permitted.
> >>>
> >>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> >>>
> >>> ---
> >>> v1 -> v2:
> >>> * Rebase to next-net
> >>>
> >>> RFC -> v1:
> >>> * Added newline to af_xdp.rst
> >>> * Fixed spelling errors
> >>> * Fixed potential NULL dereference in init_internals
> >>> * Fixed potential free of address-of expression in
> afxdp_mp_request_fds
> >>> ---
> >>>    doc/guides/nics/af_xdp.rst             |   9 ++
> >>>    doc/guides/nics/features/af_xdp.ini    |   1 +
> >>>    doc/guides/rel_notes/release_22_03.rst |   1 +
> >>>    drivers/net/af_xdp/rte_eth_af_xdp.c    | 210
> >> +++++++++++++++++++++++--
> >>>    4 files changed, 207 insertions(+), 14 deletions(-)
> >>>
> >>> diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
> >>> index db02ea1984..eb4eab28a8 100644
> >>> --- a/doc/guides/nics/af_xdp.rst
> >>> +++ b/doc/guides/nics/af_xdp.rst
> >>> @@ -141,4 +141,13 @@ Limitations
> >>>      NAPI context from a watchdog timer instead of from softirqs. More
> >> information
> >>>      on this feature can be found at [1].
> >>>
> >>> +- **Secondary Processes**
> >>> +
> >>> +  Rx and Tx are not supported for secondary processes due to the
> single-
> >> producer
> >>> +  single-consumer nature of the AF_XDP rings. However other
> operations
> >> including
> >>> +  statistics retrieval are permitted.
> >>
> >> Hi Ciara,
> >>
> >> Isn't this limitation same for all PMDs, like not both primary & secondary
> can
> >> Rx/Tx
> >> from same queue at the same time.
> >> But primary can initiallize the PMD and secondary can do the datapath,
> >> or isn't af_xdp supports multiple queue, if so some queues can be used
> by
> >> primary and some by secondary for datapath.
> >>
> >> Is there anyhing special for af_xdp that prevents it?
> >
> > Hi Ferruh,
> >
> > Thanks for the review.
> > Each queue of the PMD corresponds to a new AF_XDP socket.
> > Each socket has an RX and TX ring that is mmapped from the kernel to
> userspace and this mapping is only valid for the primary process.
> > I did not figure out a way to share that mapping with the secondary process
> successfully. Can you think of anything that might work?
> >
> 
> Does the application knows the buffer address for the Rx/Tx, or is
> abstracted to the 'fd'?

The application knows the buffer address of the Rx/Tx rings.
We pass a pointer to these rings to the libbpf xsk_socket__create API, which sets up the mappings:
http://code.dpdk.org/dpdk/v21.11/source/drivers/net/af_xdp/rte_eth_af_xdp.c#L1291
Then later on in the datapath we operate directly on those rings:
http://code.dpdk.org/dpdk/v21.11/source/drivers/net/af_xdp/rte_eth_af_xdp.c#L268
The fd is used in the datapath, but just for the syscalls (recvfrom/poll/send).

> If only 'fd' is used, this patch already converts 'fd' between
> processes.
> cc'ed Anatoly, but what I understand is after MP fd conversion:
> Primary process: FD=x
> Secondary process: FD=y
> And both x & y points to exact same socket in the kernel side.
> 
> At least this is how it works for the 'tap' interface, and that is
> why 'fs' are in the process_private area and converted between primary
> and secondary, I thought it will be same for the xdp socket.
> 
> Did you test the secondary Rx/Tx in the secondary after this patch?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2] net/af_xdp: re-enable secondary process support
  2022-02-07 11:39           ` Loftus, Ciara
@ 2022-02-08 10:58             ` Ferruh Yigit
  0 siblings, 0 replies; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-08 10:58 UTC (permalink / raw)
  To: Loftus, Ciara, Burakov, Anatoly; +Cc: stephen, dev

On 2/7/2022 11:39 AM, Loftus, Ciara wrote:
>>>>
>>>> On 2/4/2022 12:54 PM, Ciara Loftus wrote:
>>>>> Secondary process support had been disabled for the AF_XDP PMD
>>>>> because there was no logic in place to share the AF_XDP socket
>>>>> file descriptors between the processes. This commit introduces
>>>>> this logic using the IPC APIs.
>>>>>
>>>>> Since AF_XDP rings are single-producer single-consumer, rx/tx
>>>>> in the secondary process is disabled. However other operations
>>>>> including retrieval of stats are permitted.
>>>>>
>>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>>>>>
>>>>> ---
>>>>> v1 -> v2:
>>>>> * Rebase to next-net
>>>>>
>>>>> RFC -> v1:
>>>>> * Added newline to af_xdp.rst
>>>>> * Fixed spelling errors
>>>>> * Fixed potential NULL dereference in init_internals
>>>>> * Fixed potential free of address-of expression in
>> afxdp_mp_request_fds
>>>>> ---
>>>>>     doc/guides/nics/af_xdp.rst             |   9 ++
>>>>>     doc/guides/nics/features/af_xdp.ini    |   1 +
>>>>>     doc/guides/rel_notes/release_22_03.rst |   1 +
>>>>>     drivers/net/af_xdp/rte_eth_af_xdp.c    | 210
>>>> +++++++++++++++++++++++--
>>>>>     4 files changed, 207 insertions(+), 14 deletions(-)
>>>>>
>>>>> diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
>>>>> index db02ea1984..eb4eab28a8 100644
>>>>> --- a/doc/guides/nics/af_xdp.rst
>>>>> +++ b/doc/guides/nics/af_xdp.rst
>>>>> @@ -141,4 +141,13 @@ Limitations
>>>>>       NAPI context from a watchdog timer instead of from softirqs. More
>>>> information
>>>>>       on this feature can be found at [1].
>>>>>
>>>>> +- **Secondary Processes**
>>>>> +
>>>>> +  Rx and Tx are not supported for secondary processes due to the
>> single-
>>>> producer
>>>>> +  single-consumer nature of the AF_XDP rings. However other
>> operations
>>>> including
>>>>> +  statistics retrieval are permitted.
>>>>
>>>> Hi Ciara,
>>>>
>>>> Isn't this limitation same for all PMDs, like not both primary & secondary
>> can
>>>> Rx/Tx
>>>> from same queue at the same time.
>>>> But primary can initiallize the PMD and secondary can do the datapath,
>>>> or isn't af_xdp supports multiple queue, if so some queues can be used
>> by
>>>> primary and some by secondary for datapath.
>>>>
>>>> Is there anyhing special for af_xdp that prevents it?
>>>
>>> Hi Ferruh,
>>>
>>> Thanks for the review.
>>> Each queue of the PMD corresponds to a new AF_XDP socket.
>>> Each socket has an RX and TX ring that is mmapped from the kernel to
>> userspace and this mapping is only valid for the primary process.
>>> I did not figure out a way to share that mapping with the secondary process
>> successfully. Can you think of anything that might work?
>>>
>>
>> Does the application knows the buffer address for the Rx/Tx, or is
>> abstracted to the 'fd'?
> 
> The application knows the buffer address of the Rx/Tx rings.
> We pass a pointer to these rings to the libbpf xsk_socket__create API, which sets up the mappings:
> http://code.dpdk.org/dpdk/v21.11/source/drivers/net/af_xdp/rte_eth_af_xdp.c#L1291
> Then later on in the datapath we operate directly on those rings:
> http://code.dpdk.org/dpdk/v21.11/source/drivers/net/af_xdp/rte_eth_af_xdp.c#L268
> The fd is used in the datapath, but just for the syscalls (recvfrom/poll/send).
> 

Got it, if the buffer address is explicitly required for datapath, fd conversion
is not enough.

Primary/secondary process works by mapping memory to same virtual address
on two different process. The same method can be used for af_xdp multi
process support, again @Anatoly can comment better.
But this method is fragile, not sure if we should implement it in more
places...

Anyway, agree to continue this patch without datapath support in secondary.

>> If only 'fd' is used, this patch already converts 'fd' between
>> processes.
>> cc'ed Anatoly, but what I understand is after MP fd conversion:
>> Primary process: FD=x
>> Secondary process: FD=y
>> And both x & y points to exact same socket in the kernel side.
>>
>> At least this is how it works for the 'tap' interface, and that is
>> why 'fs' are in the process_private area and converted between primary
>> and secondary, I thought it will be same for the xdp socket.
>>
>> Did you test the secondary Rx/Tx in the secondary after this patch?


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH v3] net/af_xdp: re-enable secondary process support
  2022-02-04 12:54   ` [PATCH v2] " Ciara Loftus
  2022-02-04 14:18     ` Ferruh Yigit
@ 2022-02-08 13:48     ` Ciara Loftus
  2022-02-08 17:45       ` Stephen Hemminger
  2022-02-09  9:48       ` [PATCH v4] " Ciara Loftus
  1 sibling, 2 replies; 36+ messages in thread
From: Ciara Loftus @ 2022-02-08 13:48 UTC (permalink / raw)
  To: dev; +Cc: stephen, ferruh.yigit, anatoly.burakov, Ciara Loftus

Secondary process support had been disabled for the AF_XDP PMD
because there was no logic in place to share the AF_XDP socket
file descriptors between the processes. This commit introduces
this logic using the IPC APIs.

Since AF_XDP rings are single-producer single-consumer, rx/tx
in the secondary process is disabled. However other operations
including retrieval of stats are permitted.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

---
v2 -> v3:
* Rebase to next-net
* Updated logs to be more specific about multi-process IPC
* Free process_private in _close and _remove functions
* Use rte_eth_dev_get_by_name instead of global array

v1 -> v2:
* Rebase to next-net

RFC -> v1:
* Added newline to af_xdp.rst
* Fixed spelling errors
* Fixed potential NULL dereference in init_internals
* Fixed potential free of address-of expression in afxdp_mp_request_fds
---
 doc/guides/nics/af_xdp.rst             |   9 ++
 doc/guides/nics/features/af_xdp.ini    |   1 +
 doc/guides/rel_notes/release_22_03.rst |   1 +
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 211 +++++++++++++++++++++++--
 4 files changed, 207 insertions(+), 15 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index db02ea1984..eb4eab28a8 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -141,4 +141,13 @@ Limitations
   NAPI context from a watchdog timer instead of from softirqs. More information
   on this feature can be found at [1].
 
+- **Secondary Processes**
+
+  Rx and Tx are not supported for secondary processes due to the single-producer
+  single-consumer nature of the AF_XDP rings. However other operations including
+  statistics retrieval are permitted.
+  The maximum number of queues permitted for PMDs operating in this model is 8
+  as this is the maximum number of fds that can be sent through the IPC APIs as
+  defined by RTE_MP_MAX_FD_NUM.
+
   [1] https://lwn.net/Articles/837010/
diff --git a/doc/guides/nics/features/af_xdp.ini b/doc/guides/nics/features/af_xdp.ini
index 54b738e616..8e7e075aaf 100644
--- a/doc/guides/nics/features/af_xdp.ini
+++ b/doc/guides/nics/features/af_xdp.ini
@@ -9,4 +9,5 @@ Power mgmt address monitor = Y
 MTU update           = Y
 Promiscuous mode     = Y
 Stats per queue      = Y
+Multiprocess aware   = Y
 x86-64               = Y
diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
index f03183ee86..a9bf3cc7a6 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -72,6 +72,7 @@ New Features
 * **Updated AF_XDP PMD**
 
   * Added support for libxdp >=v1.2.2.
+  * Re-enabled secondary process support. RX/TX is not supported.
 
 * **Updated Cisco enic driver.**
 
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 1b6192fa44..ce638b9049 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -80,6 +80,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
 
 #define ETH_AF_XDP_ETH_OVERHEAD		(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
 
+#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
+
+static int afxdp_dev_count;
+
+/* Message header to synchronize fds via IPC */
+struct ipc_hdr {
+	char port_name[RTE_DEV_NAME_MAX_LEN];
+	/* The file descriptors are in the dedicated part
+	 * of the Unix message to be translated by the kernel.
+	 */
+};
+
 struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_ring *buf_ring;
@@ -147,6 +159,10 @@ struct pmd_internals {
 	struct pkt_tx_queue *tx_queues;
 };
 
+struct pmd_process_private {
+	int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
+};
+
 #define ETH_AF_XDP_IFACE_ARG			"iface"
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
@@ -795,11 +811,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct xdp_statistics xdp_stats;
 	struct pkt_rx_queue *rxq;
 	struct pkt_tx_queue *txq;
 	socklen_t optlen;
-	int i, ret;
+	int i, ret, fd;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
 		optlen = sizeof(struct xdp_statistics);
@@ -815,8 +832,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		stats->ibytes += stats->q_ibytes[i];
 		stats->imissed += rxq->stats.rx_dropped;
 		stats->oerrors += txq->stats.tx_dropped;
-		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
-				XDP_STATISTICS, &xdp_stats, &optlen);
+		fd = process_private->rxq_xsk_fds[i];
+		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+					   &xdp_stats, &optlen) : -1;
 		if (ret != 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
@@ -884,7 +902,7 @@ eth_dev_close(struct rte_eth_dev *dev)
 	int i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-		return 0;
+		goto out;
 
 	AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
 		rte_socket_id());
@@ -927,6 +945,9 @@ eth_dev_close(struct rte_eth_dev *dev)
 		}
 	}
 
+out:
+	rte_free(dev->process_private);
+
 	return 0;
 }
 
@@ -1349,6 +1370,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   struct rte_mempool *mb_pool)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct pkt_rx_queue *rxq;
 	int ret;
 
@@ -1387,6 +1409,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
 	rxq->fds[0].events = POLLIN;
 
+	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
+
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -1688,6 +1712,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
+	struct pmd_process_private *process_private;
 	struct pmd_internals *internals;
 	struct rte_eth_dev *eth_dev;
 	int ret;
@@ -1753,9 +1778,17 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	if (ret)
 		goto err_free_tx;
 
+	process_private = (struct pmd_process_private *)
+		rte_zmalloc_socket(name, sizeof(struct pmd_process_private),
+				   RTE_CACHE_LINE_SIZE, numa_node);
+	if (process_private == NULL) {
+		AF_XDP_LOG(ERR, "Failed to alloc memory for process private\n");
+		goto err_free_tx;
+	}
+
 	eth_dev = rte_eth_vdev_allocate(dev, 0);
 	if (eth_dev == NULL)
-		goto err_free_tx;
+		goto err_free_pp;
 
 	eth_dev->data->dev_private = internals;
 	eth_dev->data->dev_link = pmd_link;
@@ -1764,6 +1797,10 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	eth_dev->dev_ops = &ops;
 	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
 	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
+	eth_dev->process_private = process_private;
+
+	for (i = 0; i < queue_cnt; i++)
+		process_private->rxq_xsk_fds[i] = -1;
 
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
@@ -1771,6 +1808,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 
 	return eth_dev;
 
+err_free_pp:
+	rte_free(process_private);
 err_free_tx:
 	rte_free(internals->tx_queues);
 err_free_rx:
@@ -1780,6 +1819,115 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	return NULL;
 }
 
+/* Secondary process requests rxq fds from primary. */
+static int
+afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
+{
+	struct pmd_process_private *process_private = dev->process_private;
+	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
+	struct rte_mp_msg request, *reply;
+	struct rte_mp_reply replies;
+	struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
+	int i, ret;
+
+	/* Prepare the request */
+	memset(&request, 0, sizeof(request));
+	strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name));
+	strlcpy(request_param->port_name, name,
+		sizeof(request_param->port_name));
+	request.len_param = sizeof(*request_param);
+
+	/* Send the request and receive the reply */
+	AF_XDP_LOG(DEBUG, "Sending multi-process IPC request for %s\n", name);
+	ret = rte_mp_request_sync(&request, &replies, &timeout);
+	if (ret < 0 || replies.nb_received != 1) {
+		AF_XDP_LOG(ERR, "Failed to request fds from primary: %d",
+			   rte_errno);
+		return -1;
+	}
+	reply = replies.msgs;
+	AF_XDP_LOG(DEBUG, "Received multi-process IPC reply for %s\n", name);
+	if (dev->data->nb_rx_queues != reply->num_fds) {
+		AF_XDP_LOG(ERR, "Incorrect number of fds received: %d != %d\n",
+			   reply->num_fds, dev->data->nb_rx_queues);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < reply->num_fds; i++)
+		process_private->rxq_xsk_fds[i] = reply->fds[i];
+
+	free(reply);
+	return 0;
+}
+
+/* Primary process sends rxq fds to secondary. */
+static int
+afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer)
+{
+	struct rte_eth_dev *dev;
+	struct pmd_process_private *process_private;
+	struct rte_mp_msg reply;
+	const struct ipc_hdr *request_param =
+		(const struct ipc_hdr *)request->param;
+	struct ipc_hdr *reply_param =
+		(struct ipc_hdr *)reply.param;
+	const char *request_name = request_param->port_name;
+	int i;
+
+	AF_XDP_LOG(DEBUG, "Received multi-process IPC request for %s\n",
+		   request_name);
+
+	/* Find the requested port */
+	dev = rte_eth_dev_get_by_name(request_name);
+	if (!dev) {
+		AF_XDP_LOG(ERR, "Failed to get port id for %s\n", request_name);
+		return -1;
+	}
+	process_private = dev->process_private;
+
+	/* Populate the reply with the xsk fd for each queue */
+	reply.num_fds = 0;
+	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
+		AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max number of fds (%d)\n",
+			   dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i];
+
+	/* Send the reply */
+	strlcpy(reply.name, request->name, sizeof(reply.name));
+	strlcpy(reply_param->port_name, request_name,
+		sizeof(reply_param->port_name));
+	reply.len_param = sizeof(*reply_param);
+	AF_XDP_LOG(DEBUG, "Sending multi-process IPC reply for %s\n",
+		   reply_param->port_name);
+	if (rte_mp_reply(&reply, peer) < 0) {
+		AF_XDP_LOG(ERR, "Failed to reply to multi-process IPC request\n");
+		return -1;
+	}
+	return 0;
+}
+
+/* Secondary process rx function. RX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_rx_noop(void *queue __rte_unused,
+		struct rte_mbuf **bufs __rte_unused,
+		uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
+/* Secondary process tx function. TX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_tx_noop(void *queue __rte_unused,
+			struct rte_mbuf **bufs __rte_unused,
+			uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 static int
 rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 {
@@ -1789,19 +1937,39 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
 	int shared_umem = 0;
 	char prog_path[PATH_MAX] = {'\0'};
-	int busy_budget = -1;
+	int busy_budget = -1, ret;
 	struct rte_eth_dev *eth_dev = NULL;
-	const char *name;
+	const char *name = rte_vdev_device_name(dev);
 
-	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
-		rte_vdev_device_name(dev));
+	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name);
 
-	name = rte_vdev_device_name(dev);
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		AF_XDP_LOG(ERR, "Failed to probe %s. "
-				"AF_XDP PMD does not support secondary processes.\n",
-				name);
-		return -ENOTSUP;
+		eth_dev = rte_eth_dev_attach_secondary(name);
+		if (eth_dev == NULL) {
+			AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
+			return -EINVAL;
+		}
+		eth_dev->dev_ops = &ops;
+		eth_dev->device = &dev->device;
+		eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop;
+		eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop;
+		eth_dev->process_private = (struct pmd_process_private *)
+			rte_zmalloc_socket(name,
+					   sizeof(struct pmd_process_private),
+					   RTE_CACHE_LINE_SIZE,
+					   eth_dev->device->numa_node);
+		if (eth_dev->process_private == NULL) {
+			AF_XDP_LOG(ERR,
+				"Failed to alloc memory for process private\n");
+			return -ENOMEM;
+		}
+
+		/* Obtain the xsk fds from the primary process. */
+		if (afxdp_mp_request_fds(name, eth_dev))
+			return -1;
+
+		rte_eth_dev_probing_finish(eth_dev);
+		return 0;
 	}
 
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
@@ -1836,6 +2004,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		return -1;
 	}
 
+	/* Register IPC callback which shares xsk fds from primary to secondary */
+	if (!afxdp_dev_count) {
+		ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
+		if (ret < 0) {
+			AF_XDP_LOG(ERR, "%s: Failed to register multi-process IPC callback: %s",
+				   name, strerror(rte_errno));
+			return -1;
+		}
+	}
+	afxdp_dev_count++;
+
 	rte_eth_dev_probing_finish(eth_dev);
 
 	return 0;
@@ -1858,9 +2037,11 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 		return 0;
 
 	eth_dev_close(eth_dev);
+	if (afxdp_dev_count == 1)
+		rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
+	afxdp_dev_count--;
 	rte_eth_dev_release_port(eth_dev);
 
-
 	return 0;
 }
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v3] net/af_xdp: re-enable secondary process support
  2022-02-08 13:48     ` [PATCH v3] " Ciara Loftus
@ 2022-02-08 17:45       ` Stephen Hemminger
  2022-02-08 18:00         ` Ferruh Yigit
  2022-02-09  9:48       ` [PATCH v4] " Ciara Loftus
  1 sibling, 1 reply; 36+ messages in thread
From: Stephen Hemminger @ 2022-02-08 17:45 UTC (permalink / raw)
  To: Ciara Loftus; +Cc: dev, ferruh.yigit, anatoly.burakov

On Tue,  8 Feb 2022 13:48:00 +0000
Ciara Loftus <ciara.loftus@intel.com> wrote:

> +- **Secondary Processes**
> +
> +  Rx and Tx are not supported for secondary processes due to the single-producer
> +  single-consumer nature of the AF_XDP rings. However other operations including
> +  statistics retrieval are permitted.
> +  The maximum number of queues permitted for PMDs operating in this model is 8
> +  as this is the maximum number of fds that can be sent through the IPC APIs as
> +  defined by RTE_MP_MAX_FD_NUM.
> +

This seems like a restriction that is true for most devices in DPDK.
Most other devices also have restriction that on queues;
the hardware descriptor ring can only be used by one thread at a time.
Is this different with AF_XDP?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v3] net/af_xdp: re-enable secondary process support
  2022-02-08 17:45       ` Stephen Hemminger
@ 2022-02-08 18:00         ` Ferruh Yigit
  2022-02-08 18:42           ` Stephen Hemminger
  0 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-08 18:00 UTC (permalink / raw)
  To: Stephen Hemminger, Ciara Loftus; +Cc: dev, anatoly.burakov

On 2/8/2022 5:45 PM, Stephen Hemminger wrote:
> On Tue,  8 Feb 2022 13:48:00 +0000
> Ciara Loftus <ciara.loftus@intel.com> wrote:
> 
>> +- **Secondary Processes**
>> +
>> +  Rx and Tx are not supported for secondary processes due to the single-producer
>> +  single-consumer nature of the AF_XDP rings. However other operations including
>> +  statistics retrieval are permitted.
>> +  The maximum number of queues permitted for PMDs operating in this model is 8
>> +  as this is the maximum number of fds that can be sent through the IPC APIs as
>> +  defined by RTE_MP_MAX_FD_NUM.
>> +
> 
> This seems like a restriction that is true for most devices in DPDK.
> Most other devices also have restriction that on queues;
> the hardware descriptor ring can only be used by one thread at a time.
> Is this different with AF_XDP?

I asked the same on v2 :) and Ciara explained the reason, it is on v2 discussion thread.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v3] net/af_xdp: re-enable secondary process support
  2022-02-08 18:00         ` Ferruh Yigit
@ 2022-02-08 18:42           ` Stephen Hemminger
  2022-02-08 18:56             ` Ferruh Yigit
  0 siblings, 1 reply; 36+ messages in thread
From: Stephen Hemminger @ 2022-02-08 18:42 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Ciara Loftus, dev, anatoly.burakov

On Tue, 8 Feb 2022 18:00:27 +0000
Ferruh Yigit <ferruh.yigit@intel.com> wrote:

> On 2/8/2022 5:45 PM, Stephen Hemminger wrote:
> > On Tue,  8 Feb 2022 13:48:00 +0000
> > Ciara Loftus <ciara.loftus@intel.com> wrote:
> >   
> >> +- **Secondary Processes**
> >> +
> >> +  Rx and Tx are not supported for secondary processes due to the single-producer
> >> +  single-consumer nature of the AF_XDP rings. However other operations including
> >> +  statistics retrieval are permitted.
> >> +  The maximum number of queues permitted for PMDs operating in this model is 8
> >> +  as this is the maximum number of fds that can be sent through the IPC APIs as
> >> +  defined by RTE_MP_MAX_FD_NUM.
> >> +  
> > 
> > This seems like a restriction that is true for most devices in DPDK.
> > Most other devices also have restriction that on queues;
> > the hardware descriptor ring can only be used by one thread at a time.
> > Is this different with AF_XDP?  
> 
> I asked the same on v2 :) and Ciara explained the reason, it is on v2 discussion thread.

The wording of the message is what confused me.
It would be better to change:
    due to the single-producer single-consumer nature of the AF_XDP rings
to
    due to memory mapping of the AF_XDP rings being assigned by the kernel
    in the primary process only.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v3] net/af_xdp: re-enable secondary process support
  2022-02-08 18:42           ` Stephen Hemminger
@ 2022-02-08 18:56             ` Ferruh Yigit
  2022-02-09  7:41               ` Loftus, Ciara
  0 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-08 18:56 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Ciara Loftus, dev, anatoly.burakov

On 2/8/2022 6:42 PM, Stephen Hemminger wrote:
> On Tue, 8 Feb 2022 18:00:27 +0000
> Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> 
>> On 2/8/2022 5:45 PM, Stephen Hemminger wrote:
>>> On Tue,  8 Feb 2022 13:48:00 +0000
>>> Ciara Loftus <ciara.loftus@intel.com> wrote:
>>>    
>>>> +- **Secondary Processes**
>>>> +
>>>> +  Rx and Tx are not supported for secondary processes due to the single-producer
>>>> +  single-consumer nature of the AF_XDP rings. However other operations including
>>>> +  statistics retrieval are permitted.
>>>> +  The maximum number of queues permitted for PMDs operating in this model is 8
>>>> +  as this is the maximum number of fds that can be sent through the IPC APIs as
>>>> +  defined by RTE_MP_MAX_FD_NUM.
>>>> +
>>>
>>> This seems like a restriction that is true for most devices in DPDK.
>>> Most other devices also have restriction that on queues;
>>> the hardware descriptor ring can only be used by one thread at a time.
>>> Is this different with AF_XDP?
>>
>> I asked the same on v2 :) and Ciara explained the reason, it is on v2 discussion thread.
> 
> The wording of the message is what confused me.
> It would be better to change:
>      due to the single-producer single-consumer nature of the AF_XDP rings
> to
>      due to memory mapping of the AF_XDP rings being assigned by the kernel
>      in the primary process only.

+1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v3] net/af_xdp: re-enable secondary process support
  2022-02-08 18:56             ` Ferruh Yigit
@ 2022-02-09  7:41               ` Loftus, Ciara
  0 siblings, 0 replies; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-09  7:41 UTC (permalink / raw)
  To: Yigit, Ferruh, Stephen Hemminger; +Cc: dev, Burakov, Anatoly

> 
> On 2/8/2022 6:42 PM, Stephen Hemminger wrote:
> > On Tue, 8 Feb 2022 18:00:27 +0000
> > Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> >
> >> On 2/8/2022 5:45 PM, Stephen Hemminger wrote:
> >>> On Tue,  8 Feb 2022 13:48:00 +0000
> >>> Ciara Loftus <ciara.loftus@intel.com> wrote:
> >>>
> >>>> +- **Secondary Processes**
> >>>> +
> >>>> +  Rx and Tx are not supported for secondary processes due to the
> single-producer
> >>>> +  single-consumer nature of the AF_XDP rings. However other
> operations including
> >>>> +  statistics retrieval are permitted.
> >>>> +  The maximum number of queues permitted for PMDs operating in
> this model is 8
> >>>> +  as this is the maximum number of fds that can be sent through the
> IPC APIs as
> >>>> +  defined by RTE_MP_MAX_FD_NUM.
> >>>> +
> >>>
> >>> This seems like a restriction that is true for most devices in DPDK.
> >>> Most other devices also have restriction that on queues;
> >>> the hardware descriptor ring can only be used by one thread at a time.
> >>> Is this different with AF_XDP?
> >>
> >> I asked the same on v2 :) and Ciara explained the reason, it is on v2
> discussion thread.
> >
> > The wording of the message is what confused me.
> > It would be better to change:
> >      due to the single-producer single-consumer nature of the AF_XDP rings
> > to
> >      due to memory mapping of the AF_XDP rings being assigned by the
> kernel
> >      in the primary process only.
> 
> +1

Agree, I worded this poorly! Will submit a v4 with a more accurate explanation of the limitation.

Thanks,
Ciara

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-08 13:48     ` [PATCH v3] " Ciara Loftus
  2022-02-08 17:45       ` Stephen Hemminger
@ 2022-02-09  9:48       ` Ciara Loftus
  2022-02-09 15:29         ` Stephen Hemminger
                           ` (3 more replies)
  1 sibling, 4 replies; 36+ messages in thread
From: Ciara Loftus @ 2022-02-09  9:48 UTC (permalink / raw)
  To: dev; +Cc: stephen, ferruh.yigit, anatoly.burakov, Ciara Loftus

Secondary process support had been disabled for the AF_XDP PMD because
there was no logic in place to share the AF_XDP socket file descriptors
between the processes. This commit introduces this logic using the IPC
APIs.

Rx and Tx are disabled in the secondary process due to memory mapping of
the AF_XDP rings being assigned by the kernel in the primary process only.
However other operations including retrieval of stats are permitted.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

---
v3 -> v4:
* Rebase to next-net
* Reword limitation which requires Rx and Tx to be disabled.

v2 -> v3:
* Rebase to next-net
* Updated logs to be more specific about multi-process IPC
* Free process_private in _close and _remove functions
* Use rte_eth_dev_get_by_name instead of global array

v1 -> v2:
* Rebase to next-net

RFC -> v1:
* Added newline to af_xdp.rst
* Fixed spelling errors
* Fixed potential NULL dereference in init_internals
* Fixed potential free of address-of expression in afxdp_mp_request_fds
---
 doc/guides/nics/af_xdp.rst             |   9 ++
 doc/guides/nics/features/af_xdp.ini    |   1 +
 doc/guides/rel_notes/release_22_03.rst |   1 +
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 215 +++++++++++++++++++++++--
 4 files changed, 211 insertions(+), 15 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index db02ea1984..3d8b70e3f8 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -141,4 +141,13 @@ Limitations
   NAPI context from a watchdog timer instead of from softirqs. More information
   on this feature can be found at [1].
 
+- **Secondary Processes**
+
+  Rx and Tx are not supported for secondary processes due to memory mapping of
+  the AF_XDP rings being assigned by the kernel in the primary process only.
+  However other operations including statistics retrieval are permitted.
+  The maximum number of queues permitted for PMDs operating in this model is 8
+  as this is the maximum number of fds that can be sent through the IPC APIs as
+  defined by RTE_MP_MAX_FD_NUM.
+
   [1] https://lwn.net/Articles/837010/
diff --git a/doc/guides/nics/features/af_xdp.ini b/doc/guides/nics/features/af_xdp.ini
index 54b738e616..8e7e075aaf 100644
--- a/doc/guides/nics/features/af_xdp.ini
+++ b/doc/guides/nics/features/af_xdp.ini
@@ -9,4 +9,5 @@ Power mgmt address monitor = Y
 MTU update           = Y
 Promiscuous mode     = Y
 Stats per queue      = Y
+Multiprocess aware   = Y
 x86-64               = Y
diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
index f03183ee86..a9bf3cc7a6 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -72,6 +72,7 @@ New Features
 * **Updated AF_XDP PMD**
 
   * Added support for libxdp >=v1.2.2.
+  * Re-enabled secondary process support. RX/TX is not supported.
 
 * **Updated Cisco enic driver.**
 
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 1b6192fa44..bfa5d2a038 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -80,6 +80,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
 
 #define ETH_AF_XDP_ETH_OVERHEAD		(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
 
+#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
+
+static int afxdp_dev_count;
+
+/* Message header to synchronize fds via IPC */
+struct ipc_hdr {
+	char port_name[RTE_DEV_NAME_MAX_LEN];
+	/* The file descriptors are in the dedicated part
+	 * of the Unix message to be translated by the kernel.
+	 */
+};
+
 struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_ring *buf_ring;
@@ -147,6 +159,10 @@ struct pmd_internals {
 	struct pkt_tx_queue *tx_queues;
 };
 
+struct pmd_process_private {
+	int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
+};
+
 #define ETH_AF_XDP_IFACE_ARG			"iface"
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
@@ -795,11 +811,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct xdp_statistics xdp_stats;
 	struct pkt_rx_queue *rxq;
 	struct pkt_tx_queue *txq;
 	socklen_t optlen;
-	int i, ret;
+	int i, ret, fd;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
 		optlen = sizeof(struct xdp_statistics);
@@ -815,8 +832,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		stats->ibytes += stats->q_ibytes[i];
 		stats->imissed += rxq->stats.rx_dropped;
 		stats->oerrors += txq->stats.tx_dropped;
-		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
-				XDP_STATISTICS, &xdp_stats, &optlen);
+		fd = process_private->rxq_xsk_fds[i];
+		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+					   &xdp_stats, &optlen) : -1;
 		if (ret != 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
@@ -884,7 +902,7 @@ eth_dev_close(struct rte_eth_dev *dev)
 	int i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-		return 0;
+		goto out;
 
 	AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
 		rte_socket_id());
@@ -927,6 +945,9 @@ eth_dev_close(struct rte_eth_dev *dev)
 		}
 	}
 
+out:
+	rte_free(dev->process_private);
+
 	return 0;
 }
 
@@ -1349,6 +1370,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   struct rte_mempool *mb_pool)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_process_private *process_private = dev->process_private;
 	struct pkt_rx_queue *rxq;
 	int ret;
 
@@ -1387,6 +1409,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
 	rxq->fds[0].events = POLLIN;
 
+	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
+
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -1688,6 +1712,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
+	struct pmd_process_private *process_private;
 	struct pmd_internals *internals;
 	struct rte_eth_dev *eth_dev;
 	int ret;
@@ -1753,9 +1778,17 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	if (ret)
 		goto err_free_tx;
 
+	process_private = (struct pmd_process_private *)
+		rte_zmalloc_socket(name, sizeof(struct pmd_process_private),
+				   RTE_CACHE_LINE_SIZE, numa_node);
+	if (process_private == NULL) {
+		AF_XDP_LOG(ERR, "Failed to alloc memory for process private\n");
+		goto err_free_tx;
+	}
+
 	eth_dev = rte_eth_vdev_allocate(dev, 0);
 	if (eth_dev == NULL)
-		goto err_free_tx;
+		goto err_free_pp;
 
 	eth_dev->data->dev_private = internals;
 	eth_dev->data->dev_link = pmd_link;
@@ -1764,6 +1797,10 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	eth_dev->dev_ops = &ops;
 	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
 	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
+	eth_dev->process_private = process_private;
+
+	for (i = 0; i < queue_cnt; i++)
+		process_private->rxq_xsk_fds[i] = -1;
 
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
@@ -1771,6 +1808,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 
 	return eth_dev;
 
+err_free_pp:
+	rte_free(process_private);
 err_free_tx:
 	rte_free(internals->tx_queues);
 err_free_rx:
@@ -1780,6 +1819,119 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	return NULL;
 }
 
+/* Secondary process requests rxq fds from primary. */
+static int
+afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
+{
+	struct pmd_process_private *process_private = dev->process_private;
+	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
+	struct rte_mp_msg request, *reply;
+	struct rte_mp_reply replies;
+	struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
+	int i, ret;
+
+	/* Prepare the request */
+	memset(&request, 0, sizeof(request));
+	strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name));
+	strlcpy(request_param->port_name, name,
+		sizeof(request_param->port_name));
+	request.len_param = sizeof(*request_param);
+
+	/* Send the request and receive the reply */
+	AF_XDP_LOG(DEBUG, "Sending multi-process IPC request for %s\n", name);
+	ret = rte_mp_request_sync(&request, &replies, &timeout);
+	if (ret < 0 || replies.nb_received != 1) {
+		AF_XDP_LOG(ERR, "Failed to request fds from primary: %d",
+			   rte_errno);
+		return -1;
+	}
+	reply = replies.msgs;
+	AF_XDP_LOG(DEBUG, "Received multi-process IPC reply for %s\n", name);
+	if (dev->data->nb_rx_queues != reply->num_fds) {
+		AF_XDP_LOG(ERR, "Incorrect number of fds received: %d != %d\n",
+			   reply->num_fds, dev->data->nb_rx_queues);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < reply->num_fds; i++)
+		process_private->rxq_xsk_fds[i] = reply->fds[i];
+
+	free(reply);
+	return 0;
+}
+
+/* Primary process sends rxq fds to secondary. */
+static int
+afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer)
+{
+	struct rte_eth_dev *dev;
+	struct pmd_process_private *process_private;
+	struct rte_mp_msg reply;
+	const struct ipc_hdr *request_param =
+		(const struct ipc_hdr *)request->param;
+	struct ipc_hdr *reply_param =
+		(struct ipc_hdr *)reply.param;
+	const char *request_name = request_param->port_name;
+	int i;
+
+	AF_XDP_LOG(DEBUG, "Received multi-process IPC request for %s\n",
+		   request_name);
+
+	/* Find the requested port */
+	dev = rte_eth_dev_get_by_name(request_name);
+	if (!dev) {
+		AF_XDP_LOG(ERR, "Failed to get port id for %s\n", request_name);
+		return -1;
+	}
+	process_private = dev->process_private;
+
+	/* Populate the reply with the xsk fd for each queue */
+	reply.num_fds = 0;
+	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
+		AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max number of fds (%d)\n",
+			   dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i];
+
+	/* Send the reply */
+	strlcpy(reply.name, request->name, sizeof(reply.name));
+	strlcpy(reply_param->port_name, request_name,
+		sizeof(reply_param->port_name));
+	reply.len_param = sizeof(*reply_param);
+	AF_XDP_LOG(DEBUG, "Sending multi-process IPC reply for %s\n",
+		   reply_param->port_name);
+	if (rte_mp_reply(&reply, peer) < 0) {
+		AF_XDP_LOG(ERR, "Failed to reply to multi-process IPC request\n");
+		return -1;
+	}
+	return 0;
+}
+
+/* Secondary process rx function. RX is disabled because memory mapping of the
+ * rings being assigned by the kernel in the primary process only.
+ */
+static uint16_t
+eth_af_xdp_rx_noop(void *queue __rte_unused,
+		struct rte_mbuf **bufs __rte_unused,
+		uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
+/* Secondary process tx function. TX is disabled because memory mapping of the
+ * rings being assigned by the kernel in the primary process only.
+ */
+static uint16_t
+eth_af_xdp_tx_noop(void *queue __rte_unused,
+			struct rte_mbuf **bufs __rte_unused,
+			uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 static int
 rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 {
@@ -1789,19 +1941,39 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
 	int shared_umem = 0;
 	char prog_path[PATH_MAX] = {'\0'};
-	int busy_budget = -1;
+	int busy_budget = -1, ret;
 	struct rte_eth_dev *eth_dev = NULL;
-	const char *name;
+	const char *name = rte_vdev_device_name(dev);
 
-	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
-		rte_vdev_device_name(dev));
+	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name);
 
-	name = rte_vdev_device_name(dev);
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		AF_XDP_LOG(ERR, "Failed to probe %s. "
-				"AF_XDP PMD does not support secondary processes.\n",
-				name);
-		return -ENOTSUP;
+		eth_dev = rte_eth_dev_attach_secondary(name);
+		if (eth_dev == NULL) {
+			AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
+			return -EINVAL;
+		}
+		eth_dev->dev_ops = &ops;
+		eth_dev->device = &dev->device;
+		eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop;
+		eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop;
+		eth_dev->process_private = (struct pmd_process_private *)
+			rte_zmalloc_socket(name,
+					   sizeof(struct pmd_process_private),
+					   RTE_CACHE_LINE_SIZE,
+					   eth_dev->device->numa_node);
+		if (eth_dev->process_private == NULL) {
+			AF_XDP_LOG(ERR,
+				"Failed to alloc memory for process private\n");
+			return -ENOMEM;
+		}
+
+		/* Obtain the xsk fds from the primary process. */
+		if (afxdp_mp_request_fds(name, eth_dev))
+			return -1;
+
+		rte_eth_dev_probing_finish(eth_dev);
+		return 0;
 	}
 
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
@@ -1836,6 +2008,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		return -1;
 	}
 
+	/* Register IPC callback which shares xsk fds from primary to secondary */
+	if (!afxdp_dev_count) {
+		ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
+		if (ret < 0) {
+			AF_XDP_LOG(ERR, "%s: Failed to register multi-process IPC callback: %s",
+				   name, strerror(rte_errno));
+			return -1;
+		}
+	}
+	afxdp_dev_count++;
+
 	rte_eth_dev_probing_finish(eth_dev);
 
 	return 0;
@@ -1858,9 +2041,11 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 		return 0;
 
 	eth_dev_close(eth_dev);
+	if (afxdp_dev_count == 1)
+		rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
+	afxdp_dev_count--;
 	rte_eth_dev_release_port(eth_dev);
 
-
 	return 0;
 }
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-09  9:48       ` [PATCH v4] " Ciara Loftus
@ 2022-02-09 15:29         ` Stephen Hemminger
  2022-02-11 13:32           ` Ferruh Yigit
  2022-02-09 17:55         ` Ferruh Yigit
                           ` (2 subsequent siblings)
  3 siblings, 1 reply; 36+ messages in thread
From: Stephen Hemminger @ 2022-02-09 15:29 UTC (permalink / raw)
  To: Ciara Loftus; +Cc: dev, ferruh.yigit, anatoly.burakov

On Wed,  9 Feb 2022 09:48:08 +0000
Ciara Loftus <ciara.loftus@intel.com> wrote:

> Secondary process support had been disabled for the AF_XDP PMD because
> there was no logic in place to share the AF_XDP socket file descriptors
> between the processes. This commit introduces this logic using the IPC
> APIs.
> 
> Rx and Tx are disabled in the secondary process due to memory mapping of
> the AF_XDP rings being assigned by the kernel in the primary process only.
> However other operations including retrieval of stats are permitted.
> 
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

Acked-by: Stephen Hemminger <stephen@networkplumber.org>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-09  9:48       ` [PATCH v4] " Ciara Loftus
  2022-02-09 15:29         ` Stephen Hemminger
@ 2022-02-09 17:55         ` Ferruh Yigit
  2022-02-10 15:08           ` Ferruh Yigit
  2022-02-10 15:19         ` Ferruh Yigit
  2022-02-17 12:44         ` David Marchand
  3 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-09 17:55 UTC (permalink / raw)
  To: Ciara Loftus, dev; +Cc: stephen, anatoly.burakov

On 2/9/2022 9:48 AM, Ciara Loftus wrote:
> Secondary process support had been disabled for the AF_XDP PMD because
> there was no logic in place to share the AF_XDP socket file descriptors
> between the processes. This commit introduces this logic using the IPC
> APIs.
> 
> Rx and Tx are disabled in the secondary process due to memory mapping of
> the AF_XDP rings being assigned by the kernel in the primary process only.
> However other operations including retrieval of stats are permitted.
> 
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> 
> ---
> v3 -> v4:
> * Rebase to next-net
> * Reword limitation which requires Rx and Tx to be disabled.
> 
> v2 -> v3:
> * Rebase to next-net
> * Updated logs to be more specific about multi-process IPC
> * Free process_private in _close and _remove functions
> * Use rte_eth_dev_get_by_name instead of global array
> 
> v1 -> v2:
> * Rebase to next-net
> 
> RFC -> v1:
> * Added newline to af_xdp.rst
> * Fixed spelling errors
> * Fixed potential NULL dereference in init_internals
> * Fixed potential free of address-of expression in afxdp_mp_request_fds
> ---
>   doc/guides/nics/af_xdp.rst             |   9 ++
>   doc/guides/nics/features/af_xdp.ini    |   1 +
>   doc/guides/rel_notes/release_22_03.rst |   1 +
>   drivers/net/af_xdp/rte_eth_af_xdp.c    | 215 +++++++++++++++++++++++--
>   4 files changed, 211 insertions(+), 15 deletions(-)
> 
> diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
> index db02ea1984..3d8b70e3f8 100644
> --- a/doc/guides/nics/af_xdp.rst
> +++ b/doc/guides/nics/af_xdp.rst
> @@ -141,4 +141,13 @@ Limitations
>     NAPI context from a watchdog timer instead of from softirqs. More information
>     on this feature can be found at [1].
>   
> +- **Secondary Processes**
> +
> +  Rx and Tx are not supported for secondary processes due to memory mapping of
> +  the AF_XDP rings being assigned by the kernel in the primary process only.
> +  However other operations including statistics retrieval are permitted.
> +  The maximum number of queues permitted for PMDs operating in this model is 8
> +  as this is the maximum number of fds that can be sent through the IPC APIs as
> +  defined by RTE_MP_MAX_FD_NUM.
> +
>     [1] https://lwn.net/Articles/837010/
> diff --git a/doc/guides/nics/features/af_xdp.ini b/doc/guides/nics/features/af_xdp.ini
> index 54b738e616..8e7e075aaf 100644
> --- a/doc/guides/nics/features/af_xdp.ini
> +++ b/doc/guides/nics/features/af_xdp.ini
> @@ -9,4 +9,5 @@ Power mgmt address monitor = Y
>   MTU update           = Y
>   Promiscuous mode     = Y
>   Stats per queue      = Y
> +Multiprocess aware   = Y
>   x86-64               = Y
> diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
> index f03183ee86..a9bf3cc7a6 100644
> --- a/doc/guides/rel_notes/release_22_03.rst
> +++ b/doc/guides/rel_notes/release_22_03.rst
> @@ -72,6 +72,7 @@ New Features
>   * **Updated AF_XDP PMD**
>   
>     * Added support for libxdp >=v1.2.2.
> +  * Re-enabled secondary process support. RX/TX is not supported.
>   
>   * **Updated Cisco enic driver.**
>   
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 1b6192fa44..bfa5d2a038 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -80,6 +80,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
>   
>   #define ETH_AF_XDP_ETH_OVERHEAD		(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
>   
> +#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
> +
> +static int afxdp_dev_count;
> +
> +/* Message header to synchronize fds via IPC */
> +struct ipc_hdr {
> +	char port_name[RTE_DEV_NAME_MAX_LEN];
> +	/* The file descriptors are in the dedicated part
> +	 * of the Unix message to be translated by the kernel.
> +	 */
> +};
> +
>   struct xsk_umem_info {
>   	struct xsk_umem *umem;
>   	struct rte_ring *buf_ring;
> @@ -147,6 +159,10 @@ struct pmd_internals {
>   	struct pkt_tx_queue *tx_queues;
>   };
>   
> +struct pmd_process_private {
> +	int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
> +};
> +
>   #define ETH_AF_XDP_IFACE_ARG			"iface"
>   #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
>   #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
> @@ -795,11 +811,12 @@ static int
>   eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>   {
>   	struct pmd_internals *internals = dev->data->dev_private;
> +	struct pmd_process_private *process_private = dev->process_private;
>   	struct xdp_statistics xdp_stats;
>   	struct pkt_rx_queue *rxq;
>   	struct pkt_tx_queue *txq;
>   	socklen_t optlen;
> -	int i, ret;
> +	int i, ret, fd;
>   
>   	for (i = 0; i < dev->data->nb_rx_queues; i++) {
>   		optlen = sizeof(struct xdp_statistics);
> @@ -815,8 +832,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>   		stats->ibytes += stats->q_ibytes[i];
>   		stats->imissed += rxq->stats.rx_dropped;
>   		stats->oerrors += txq->stats.tx_dropped;
> -		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
> -				XDP_STATISTICS, &xdp_stats, &optlen);
> +		fd = process_private->rxq_xsk_fds[i];
> +		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
> +					   &xdp_stats, &optlen) : -1;
>   		if (ret != 0) {
>   			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
>   			return -1;
> @@ -884,7 +902,7 @@ eth_dev_close(struct rte_eth_dev *dev)
>   	int i;
>   
>   	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> -		return 0;
> +		goto out;
>   
>   	AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
>   		rte_socket_id());
> @@ -927,6 +945,9 @@ eth_dev_close(struct rte_eth_dev *dev)
>   		}
>   	}
>   
> +out:
> +	rte_free(dev->process_private);
> +
>   	return 0;
>   }
>   
> @@ -1349,6 +1370,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>   		   struct rte_mempool *mb_pool)
>   {
>   	struct pmd_internals *internals = dev->data->dev_private;
> +	struct pmd_process_private *process_private = dev->process_private;
>   	struct pkt_rx_queue *rxq;
>   	int ret;
>   
> @@ -1387,6 +1409,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>   	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
>   	rxq->fds[0].events = POLLIN;
>   
> +	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
> +
>   	dev->data->rx_queues[rx_queue_id] = rxq;
>   	return 0;
>   
> @@ -1688,6 +1712,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   {
>   	const char *name = rte_vdev_device_name(dev);
>   	const unsigned int numa_node = dev->device.numa_node;
> +	struct pmd_process_private *process_private;
>   	struct pmd_internals *internals;
>   	struct rte_eth_dev *eth_dev;
>   	int ret;
> @@ -1753,9 +1778,17 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   	if (ret)
>   		goto err_free_tx;
>   
> +	process_private = (struct pmd_process_private *)
> +		rte_zmalloc_socket(name, sizeof(struct pmd_process_private),
> +				   RTE_CACHE_LINE_SIZE, numa_node);
> +	if (process_private == NULL) {
> +		AF_XDP_LOG(ERR, "Failed to alloc memory for process private\n");
> +		goto err_free_tx;
> +	}
> +
>   	eth_dev = rte_eth_vdev_allocate(dev, 0);
>   	if (eth_dev == NULL)
> -		goto err_free_tx;
> +		goto err_free_pp;
>   
>   	eth_dev->data->dev_private = internals;
>   	eth_dev->data->dev_link = pmd_link;
> @@ -1764,6 +1797,10 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   	eth_dev->dev_ops = &ops;
>   	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
>   	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
> +	eth_dev->process_private = process_private;
> +
> +	for (i = 0; i < queue_cnt; i++)
> +		process_private->rxq_xsk_fds[i] = -1;
>   
>   #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>   	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
> @@ -1771,6 +1808,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   
>   	return eth_dev;
>   
> +err_free_pp:
> +	rte_free(process_private);
>   err_free_tx:
>   	rte_free(internals->tx_queues);
>   err_free_rx:
> @@ -1780,6 +1819,119 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
>   	return NULL;
>   }
>   
> +/* Secondary process requests rxq fds from primary. */
> +static int
> +afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
> +{
> +	struct pmd_process_private *process_private = dev->process_private;
> +	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
> +	struct rte_mp_msg request, *reply;
> +	struct rte_mp_reply replies;
> +	struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
> +	int i, ret;
> +
> +	/* Prepare the request */
> +	memset(&request, 0, sizeof(request));
> +	strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name));
> +	strlcpy(request_param->port_name, name,
> +		sizeof(request_param->port_name));
> +	request.len_param = sizeof(*request_param);
> +
> +	/* Send the request and receive the reply */
> +	AF_XDP_LOG(DEBUG, "Sending multi-process IPC request for %s\n", name);
> +	ret = rte_mp_request_sync(&request, &replies, &timeout);
> +	if (ret < 0 || replies.nb_received != 1) {
> +		AF_XDP_LOG(ERR, "Failed to request fds from primary: %d",
> +			   rte_errno);
> +		return -1;
> +	}
> +	reply = replies.msgs;
> +	AF_XDP_LOG(DEBUG, "Received multi-process IPC reply for %s\n", name);
> +	if (dev->data->nb_rx_queues != reply->num_fds) {
> +		AF_XDP_LOG(ERR, "Incorrect number of fds received: %d != %d\n",
> +			   reply->num_fds, dev->data->nb_rx_queues);
> +		return -EINVAL;
> +	}
> +
> +	for (i = 0; i < reply->num_fds; i++)
> +		process_private->rxq_xsk_fds[i] = reply->fds[i];
> +
> +	free(reply);
> +	return 0;
> +}
> +
> +/* Primary process sends rxq fds to secondary. */
> +static int
> +afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer)
> +{
> +	struct rte_eth_dev *dev;
> +	struct pmd_process_private *process_private;
> +	struct rte_mp_msg reply;
> +	const struct ipc_hdr *request_param =
> +		(const struct ipc_hdr *)request->param;
> +	struct ipc_hdr *reply_param =
> +		(struct ipc_hdr *)reply.param;
> +	const char *request_name = request_param->port_name;
> +	int i;
> +
> +	AF_XDP_LOG(DEBUG, "Received multi-process IPC request for %s\n",
> +		   request_name);
> +
> +	/* Find the requested port */
> +	dev = rte_eth_dev_get_by_name(request_name);
> +	if (!dev) {
> +		AF_XDP_LOG(ERR, "Failed to get port id for %s\n", request_name);
> +		return -1;
> +	}
> +	process_private = dev->process_private;
> +
> +	/* Populate the reply with the xsk fd for each queue */
> +	reply.num_fds = 0;
> +	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
> +		AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max number of fds (%d)\n",
> +			   dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
> +		return -EINVAL;
> +	}
> +
> +	for (i = 0; i < dev->data->nb_rx_queues; i++)
> +		reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i];
> +
> +	/* Send the reply */
> +	strlcpy(reply.name, request->name, sizeof(reply.name));
> +	strlcpy(reply_param->port_name, request_name,
> +		sizeof(reply_param->port_name));
> +	reply.len_param = sizeof(*reply_param);
> +	AF_XDP_LOG(DEBUG, "Sending multi-process IPC reply for %s\n",
> +		   reply_param->port_name);
> +	if (rte_mp_reply(&reply, peer) < 0) {
> +		AF_XDP_LOG(ERR, "Failed to reply to multi-process IPC request\n");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +/* Secondary process rx function. RX is disabled because memory mapping of the
> + * rings being assigned by the kernel in the primary process only.
> + */
> +static uint16_t
> +eth_af_xdp_rx_noop(void *queue __rte_unused,
> +		struct rte_mbuf **bufs __rte_unused,
> +		uint16_t nb_pkts __rte_unused)
> +{
> +	return 0;
> +}
> +
> +/* Secondary process tx function. TX is disabled because memory mapping of the
> + * rings being assigned by the kernel in the primary process only.
> + */
> +static uint16_t
> +eth_af_xdp_tx_noop(void *queue __rte_unused,
> +			struct rte_mbuf **bufs __rte_unused,
> +			uint16_t nb_pkts __rte_unused)
> +{
> +	return 0;
> +}
> +

Hi Ciara,

Can you please review following patch, if it is good we can merge it first
and rebase this patch on top of it to use generic dummy burst functions.

"ethdev: introduce generic dummy packet burst function"
https://patches.dpdk.org/project/dpdk/patch/20220208194437.426143-1-ferruh.yigit@intel.com/


>   static int
>   rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>   {
> @@ -1789,19 +1941,39 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>   	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
>   	int shared_umem = 0;
>   	char prog_path[PATH_MAX] = {'\0'};
> -	int busy_budget = -1;
> +	int busy_budget = -1, ret;
>   	struct rte_eth_dev *eth_dev = NULL;
> -	const char *name;
> +	const char *name = rte_vdev_device_name(dev);
>   
> -	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
> -		rte_vdev_device_name(dev));
> +	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name);
>   
> -	name = rte_vdev_device_name(dev);
>   	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> -		AF_XDP_LOG(ERR, "Failed to probe %s. "
> -				"AF_XDP PMD does not support secondary processes.\n",
> -				name);
> -		return -ENOTSUP;
> +		eth_dev = rte_eth_dev_attach_secondary(name);
> +		if (eth_dev == NULL) {
> +			AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
> +			return -EINVAL;
> +		}
> +		eth_dev->dev_ops = &ops;
> +		eth_dev->device = &dev->device;
> +		eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop;
> +		eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop;
> +		eth_dev->process_private = (struct pmd_process_private *)
> +			rte_zmalloc_socket(name,
> +					   sizeof(struct pmd_process_private),
> +					   RTE_CACHE_LINE_SIZE,
> +					   eth_dev->device->numa_node);
> +		if (eth_dev->process_private == NULL) {
> +			AF_XDP_LOG(ERR,
> +				"Failed to alloc memory for process private\n");
> +			return -ENOMEM;
> +		}
> +
> +		/* Obtain the xsk fds from the primary process. */
> +		if (afxdp_mp_request_fds(name, eth_dev))
> +			return -1;
> +
> +		rte_eth_dev_probing_finish(eth_dev);
> +		return 0;
>   	}
>   
>   	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
> @@ -1836,6 +2008,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>   		return -1;
>   	}
>   
> +	/* Register IPC callback which shares xsk fds from primary to secondary */
> +	if (!afxdp_dev_count) {
> +		ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
> +		if (ret < 0) {
> +			AF_XDP_LOG(ERR, "%s: Failed to register multi-process IPC callback: %s",
> +				   name, strerror(rte_errno));
> +			return -1;
> +		}
> +	}
> +	afxdp_dev_count++;
> +
>   	rte_eth_dev_probing_finish(eth_dev);
>   
>   	return 0;
> @@ -1858,9 +2041,11 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
>   		return 0;
>   
>   	eth_dev_close(eth_dev);
> +	if (afxdp_dev_count == 1)
> +		rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
> +	afxdp_dev_count--;
>   	rte_eth_dev_release_port(eth_dev);
>   
> -
>   	return 0;
>   }
>   


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-09 17:55         ` Ferruh Yigit
@ 2022-02-10 15:08           ` Ferruh Yigit
  0 siblings, 0 replies; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-10 15:08 UTC (permalink / raw)
  To: Ciara Loftus, dev; +Cc: stephen, anatoly.burakov

On 2/9/2022 5:55 PM, Ferruh Yigit wrote:
> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
>> Secondary process support had been disabled for the AF_XDP PMD because
>> there was no logic in place to share the AF_XDP socket file descriptors
>> between the processes. This commit introduces this logic using the IPC
>> APIs.
>>
>> Rx and Tx are disabled in the secondary process due to memory mapping of
>> the AF_XDP rings being assigned by the kernel in the primary process only.
>> However other operations including retrieval of stats are permitted.
>>
>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>>
>> ---
>> v3 -> v4:
>> * Rebase to next-net
>> * Reword limitation which requires Rx and Tx to be disabled.
>>
>> v2 -> v3:
>> * Rebase to next-net
>> * Updated logs to be more specific about multi-process IPC
>> * Free process_private in _close and _remove functions
>> * Use rte_eth_dev_get_by_name instead of global array
>>
>> v1 -> v2:
>> * Rebase to next-net
>>
>> RFC -> v1:
>> * Added newline to af_xdp.rst
>> * Fixed spelling errors
>> * Fixed potential NULL dereference in init_internals
>> * Fixed potential free of address-of expression in afxdp_mp_request_fds
>> ---
>>   doc/guides/nics/af_xdp.rst             |   9 ++
>>   doc/guides/nics/features/af_xdp.ini    |   1 +
>>   doc/guides/rel_notes/release_22_03.rst |   1 +
>>   drivers/net/af_xdp/rte_eth_af_xdp.c    | 215 +++++++++++++++++++++++--
>>   4 files changed, 211 insertions(+), 15 deletions(-)
>>

<...>

>> +
>> +/* Secondary process rx function. RX is disabled because memory mapping of the
>> + * rings being assigned by the kernel in the primary process only.
>> + */
>> +static uint16_t
>> +eth_af_xdp_rx_noop(void *queue __rte_unused,
>> +        struct rte_mbuf **bufs __rte_unused,
>> +        uint16_t nb_pkts __rte_unused)
>> +{
>> +    return 0;
>> +}
>> +
>> +/* Secondary process tx function. TX is disabled because memory mapping of the
>> + * rings being assigned by the kernel in the primary process only.
>> + */
>> +static uint16_t
>> +eth_af_xdp_tx_noop(void *queue __rte_unused,
>> +            struct rte_mbuf **bufs __rte_unused,
>> +            uint16_t nb_pkts __rte_unused)
>> +{
>> +    return 0;
>> +}
>> +
> 
> Hi Ciara,
> 
> Can you please review following patch, if it is good we can merge it first
> and rebase this patch on top of it to use generic dummy burst functions.
> 
> "ethdev: introduce generic dummy packet burst function"
> https://patches.dpdk.org/project/dpdk/patch/20220208194437.426143-1-ferruh.yigit@intel.com/
> 

That ethdev patch can be delayed, so I will proceed with this patch.
noop burst functions in the PMD can be updated later.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-09  9:48       ` [PATCH v4] " Ciara Loftus
  2022-02-09 15:29         ` Stephen Hemminger
  2022-02-09 17:55         ` Ferruh Yigit
@ 2022-02-10 15:19         ` Ferruh Yigit
  2022-02-10 15:40           ` Loftus, Ciara
  2022-02-17 12:44         ` David Marchand
  3 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-10 15:19 UTC (permalink / raw)
  To: Ciara Loftus, dev; +Cc: stephen, anatoly.burakov, Singh, Aman Deep

On 2/9/2022 9:48 AM, Ciara Loftus wrote:
> Secondary process support had been disabled for the AF_XDP PMD because
> there was no logic in place to share the AF_XDP socket file descriptors
> between the processes. This commit introduces this logic using the IPC
> APIs.
> 
> Rx and Tx are disabled in the secondary process due to memory mapping of
> the AF_XDP rings being assigned by the kernel in the primary process only.
> However other operations including retrieval of stats are permitted.
> 
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> 

Hi Ciara,

When I tried to test the patch getting following error [1], it doesn't look
related to this patch but can you help to fix the issue, thanks.

[1]
libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
xsk_configure(): Failed to create xsk socket.
eth_rx_queue_setup(): Failed to configure xdp socket
Fail to configure port 2 rx queues
EAL: Error - exiting with code: 1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-10 15:19         ` Ferruh Yigit
@ 2022-02-10 15:40           ` Loftus, Ciara
  2022-02-10 16:06             ` Ferruh Yigit
  0 siblings, 1 reply; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-10 15:40 UTC (permalink / raw)
  To: Yigit, Ferruh, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process support
> 
> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
> > Secondary process support had been disabled for the AF_XDP PMD
> because
> > there was no logic in place to share the AF_XDP socket file descriptors
> > between the processes. This commit introduces this logic using the IPC
> > APIs.
> >
> > Rx and Tx are disabled in the secondary process due to memory mapping of
> > the AF_XDP rings being assigned by the kernel in the primary process only.
> > However other operations including retrieval of stats are permitted.
> >
> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> >
> 
> Hi Ciara,
> 
> When I tried to test the patch getting following error [1], it doesn't look
> related to this patch but can you help to fix the issue, thanks.
> 
> [1]
> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> xsk_configure(): Failed to create xsk socket.
> eth_rx_queue_setup(): Failed to configure xdp socket
> Fail to configure port 2 rx queues
> EAL: Error - exiting with code: 1


Hi Ferruh,

This file should be generated when libxdp is compiled.
Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
Can you check if that file is there for you? It could be in /usr/local/lib64/bpf/ on your machine.
What kernel are you running on?

Thanks,
Ciara

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-10 15:40           ` Loftus, Ciara
@ 2022-02-10 16:06             ` Ferruh Yigit
  2022-02-10 17:47               ` Loftus, Ciara
  0 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-10 16:06 UTC (permalink / raw)
  To: Loftus, Ciara, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process support
>>
>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
>>> Secondary process support had been disabled for the AF_XDP PMD
>> because
>>> there was no logic in place to share the AF_XDP socket file descriptors
>>> between the processes. This commit introduces this logic using the IPC
>>> APIs.
>>>
>>> Rx and Tx are disabled in the secondary process due to memory mapping of
>>> the AF_XDP rings being assigned by the kernel in the primary process only.
>>> However other operations including retrieval of stats are permitted.
>>>
>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>>>
>>
>> Hi Ciara,
>>
>> When I tried to test the patch getting following error [1], it doesn't look
>> related to this patch but can you help to fix the issue, thanks.
>>
>> [1]
>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
>> xsk_configure(): Failed to create xsk socket.
>> eth_rx_queue_setup(): Failed to configure xdp socket
>> Fail to configure port 2 rx queues
>> EAL: Error - exiting with code: 1
> 
> 
> Hi Ferruh,
> 
> This file should be generated when libxdp is compiled.
> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
> Can you check if that file is there for you? It could be in /usr/local/lib64/bpf/ on your machine.
> What kernel are you running on?
> 

It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o

I had to compile libxdp from source because OS package version was old
to work with af_xdp.
Is something required to point location of this file to af_xdp PMD?

I run kernel:
5.15.16-200.fc35.x86_64



^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-10 16:06             ` Ferruh Yigit
@ 2022-02-10 17:47               ` Loftus, Ciara
  2022-02-10 20:12                 ` Ferruh Yigit
  0 siblings, 1 reply; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-10 17:47 UTC (permalink / raw)
  To: Yigit, Ferruh, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process support
> 
> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
> >> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process support
> >>
> >> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
> >>> Secondary process support had been disabled for the AF_XDP PMD
> >> because
> >>> there was no logic in place to share the AF_XDP socket file descriptors
> >>> between the processes. This commit introduces this logic using the IPC
> >>> APIs.
> >>>
> >>> Rx and Tx are disabled in the secondary process due to memory mapping
> of
> >>> the AF_XDP rings being assigned by the kernel in the primary process
> only.
> >>> However other operations including retrieval of stats are permitted.
> >>>
> >>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> >>>
> >>
> >> Hi Ciara,
> >>
> >> When I tried to test the patch getting following error [1], it doesn't look
> >> related to this patch but can you help to fix the issue, thanks.
> >>
> >> [1]
> >> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> >> xsk_configure(): Failed to create xsk socket.
> >> eth_rx_queue_setup(): Failed to configure xdp socket
> >> Fail to configure port 2 rx queues
> >> EAL: Error - exiting with code: 1
> >
> >
> > Hi Ferruh,
> >
> > This file should be generated when libxdp is compiled.
> > Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
> > Can you check if that file is there for you? It could be in
> /usr/local/lib64/bpf/ on your machine.
> > What kernel are you running on?
> >
> 
> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
> 
> I had to compile libxdp from source because OS package version was old
> to work with af_xdp.
> Is something required to point location of this file to af_xdp PMD?
> 
> I run kernel:
> 5.15.16-200.fc35.x86_64

I read through the libxdp code to figure out what happens when searching for the file:
https://github.com/xdp-project/xdp-tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055

secure_getenv(XDP_OBJECT_ENVVAR) is called which according to the README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems using a split library path)". 
If that fails, BPF_OBJECT_PATH will be searched, which points to /usr/lib/bpf

I discovered that on my system the getenv() call fails, but the file is eventually found because luckily BPF_OBJECT_PATH points to the appropriate place for me (lib):
https://github.com/xdp-project/xdp-tools/blob/v1.2.2/lib/util/util.h#L24
I suspect the same failure is happening for you, but since BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
As a temporary measure can you create a symlink in /usr/local/lib/bpf/ to point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
I will investigate the libxdp issue further. Maybe a change is needed in the library. If a change or setup recommendation is needed in DPDK I will create a patch.

Thanks,
Ciara

> 


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-10 17:47               ` Loftus, Ciara
@ 2022-02-10 20:12                 ` Ferruh Yigit
  2022-02-11  7:28                   ` Loftus, Ciara
  0 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-10 20:12 UTC (permalink / raw)
  To: Loftus, Ciara, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

On 2/10/2022 5:47 PM, Loftus, Ciara wrote:
>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process support
>>
>> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process support
>>>>
>>>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
>>>>> Secondary process support had been disabled for the AF_XDP PMD
>>>> because
>>>>> there was no logic in place to share the AF_XDP socket file descriptors
>>>>> between the processes. This commit introduces this logic using the IPC
>>>>> APIs.
>>>>>
>>>>> Rx and Tx are disabled in the secondary process due to memory mapping
>> of
>>>>> the AF_XDP rings being assigned by the kernel in the primary process
>> only.
>>>>> However other operations including retrieval of stats are permitted.
>>>>>
>>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>>>>>
>>>>
>>>> Hi Ciara,
>>>>
>>>> When I tried to test the patch getting following error [1], it doesn't look
>>>> related to this patch but can you help to fix the issue, thanks.
>>>>
>>>> [1]
>>>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
>>>> xsk_configure(): Failed to create xsk socket.
>>>> eth_rx_queue_setup(): Failed to configure xdp socket
>>>> Fail to configure port 2 rx queues
>>>> EAL: Error - exiting with code: 1
>>>
>>>
>>> Hi Ferruh,
>>>
>>> This file should be generated when libxdp is compiled.
>>> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
>>> Can you check if that file is there for you? It could be in
>> /usr/local/lib64/bpf/ on your machine.
>>> What kernel are you running on?
>>>
>>
>> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
>>
>> I had to compile libxdp from source because OS package version was old
>> to work with af_xdp.
>> Is something required to point location of this file to af_xdp PMD?
>>
>> I run kernel:
>> 5.15.16-200.fc35.x86_64
> 
> I read through the libxdp code to figure out what happens when searching for the file:
> https://github.com/xdp-project/xdp-tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055
> 
> secure_getenv(XDP_OBJECT_ENVVAR) is called which according to the README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems using a split library path)".
> If that fails, BPF_OBJECT_PATH will be searched, which points to /usr/lib/bpf
> 
> I discovered that on my system the getenv() call fails, but the file is eventually found because luckily BPF_OBJECT_PATH points to the appropriate place for me (lib):
> https://github.com/xdp-project/xdp-tools/blob/v1.2.2/lib/util/util.h#L24
> I suspect the same failure is happening for you, but since BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
> As a temporary measure can you create a symlink in /usr/local/lib/bpf/ to point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
> I will investigate the libxdp issue further. Maybe a change is needed in the library. If a change or setup recommendation is needed in DPDK I will create a patch.
> 


I don't have XDP_OBJECT_ENVVAR or BPF_OBJECT_PATH environment variables set,
if they should be we should document them.

When I created '/usr/local/lib/bpf/' link, the BPF file found.
This should be clarified/documented for users.


And still observing following two:

1) I don't know what following log means:
Configuring Port 2 (socket 0)
libbpf: elf: skipping unrecognized data section(7) .xdp_run_config
libbpf: elf: skipping unrecognized data section(8) xdp_metadata
libxdp: XDP flag not supported by libxdp.
libbpf: elf: skipping unrecognized data section(8) xdp_metadata
libbpf: elf: skipping unrecognized data section(8) xdp_metadata

2) When I try to create two af_xdp interface, I only got one:
"--vdev net_af_xdp,iface=enp24s0f1 --vdev net_af_xdp,iface=enp24s0f0"


Thanks,
ferruh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-10 20:12                 ` Ferruh Yigit
@ 2022-02-11  7:28                   ` Loftus, Ciara
  2022-02-11  9:26                     ` Loftus, Ciara
  2022-02-11 11:31                     ` Ferruh Yigit
  0 siblings, 2 replies; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-11  7:28 UTC (permalink / raw)
  To: Yigit, Ferruh, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

> 
> On 2/10/2022 5:47 PM, Loftus, Ciara wrote:
> >> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process support
> >>
> >> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
> >>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
> support
> >>>>
> >>>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
> >>>>> Secondary process support had been disabled for the AF_XDP PMD
> >>>> because
> >>>>> there was no logic in place to share the AF_XDP socket file descriptors
> >>>>> between the processes. This commit introduces this logic using the
> IPC
> >>>>> APIs.
> >>>>>
> >>>>> Rx and Tx are disabled in the secondary process due to memory
> mapping
> >> of
> >>>>> the AF_XDP rings being assigned by the kernel in the primary process
> >> only.
> >>>>> However other operations including retrieval of stats are permitted.
> >>>>>
> >>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> >>>>>
> >>>>
> >>>> Hi Ciara,
> >>>>
> >>>> When I tried to test the patch getting following error [1], it doesn't look
> >>>> related to this patch but can you help to fix the issue, thanks.
> >>>>
> >>>> [1]
> >>>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> >>>> xsk_configure(): Failed to create xsk socket.
> >>>> eth_rx_queue_setup(): Failed to configure xdp socket
> >>>> Fail to configure port 2 rx queues
> >>>> EAL: Error - exiting with code: 1
> >>>
> >>>
> >>> Hi Ferruh,
> >>>
> >>> This file should be generated when libxdp is compiled.
> >>> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
> >>> Can you check if that file is there for you? It could be in
> >> /usr/local/lib64/bpf/ on your machine.
> >>> What kernel are you running on?
> >>>
> >>
> >> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
> >>
> >> I had to compile libxdp from source because OS package version was old
> >> to work with af_xdp.
> >> Is something required to point location of this file to af_xdp PMD?
> >>
> >> I run kernel:
> >> 5.15.16-200.fc35.x86_64
> >
> > I read through the libxdp code to figure out what happens when searching
> for the file:
> > https://github.com/xdp-project/xdp-
> tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055
> >
> > secure_getenv(XDP_OBJECT_ENVVAR) is called which according to the
> README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems using a split
> library path)".
> > If that fails, BPF_OBJECT_PATH will be searched, which points to
> /usr/lib/bpf
> >
> > I discovered that on my system the getenv() call fails, but the file is
> eventually found because luckily BPF_OBJECT_PATH points to the
> appropriate place for me (lib):
> > https://github.com/xdp-project/xdp-tools/blob/v1.2.2/lib/util/util.h#L24
> > I suspect the same failure is happening for you, but since
> BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
> > As a temporary measure can you create a symlink in /usr/local/lib/bpf/ to
> point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
> > I will investigate the libxdp issue further. Maybe a change is needed in the
> library. If a change or setup recommendation is needed in DPDK I will create a
> patch.
> >
> 
> 
> I don't have XDP_OBJECT_ENVVAR or BPF_OBJECT_PATH environment
> variables set,
> if they should be we should document them.
> 
> When I created '/usr/local/lib/bpf/' link, the BPF file found.
> This should be clarified/documented for users.

Ok. Ideally we shouldn't have to create the symlink. I will look for a better solution and submit a patch.
The symlink might be a temporary solution if another solution is not found.

> 
> 
> And still observing following two:
> 
> 1) I don't know what following log means:
> Configuring Port 2 (socket 0)
> libbpf: elf: skipping unrecognized data section(7) .xdp_run_config
> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
> libxdp: XDP flag not supported by libxdp.
> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
> libbpf: elf: skipping unrecognized data section(8) xdp_metadata

I reported this and a patch was submitted to libbpf to demote those logs:
https://www.spinics.net/lists/bpf/msg49140.html
It looks like the patch never made it. I'll chase it up.
Anyway, the logs can be ignored as they are not errors.

> 
> 2) When I try to create two af_xdp interface, I only got one:
> "--vdev net_af_xdp,iface=enp24s0f1 --vdev net_af_xdp,iface=enp24s0f0"

This is also expected as you haven't given each vdev a unique name. Try:
"--vdev net_af_xdp0,iface=enp24s0f1 --vdev net_af_xdp1,iface=enp24s0f0"

Thank you for the testing.

Ciara

> 
> 
> Thanks,
> ferruh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-11  7:28                   ` Loftus, Ciara
@ 2022-02-11  9:26                     ` Loftus, Ciara
  2022-02-11 12:29                       ` Ferruh Yigit
  2022-02-11 11:31                     ` Ferruh Yigit
  1 sibling, 1 reply; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-11  9:26 UTC (permalink / raw)
  To: Loftus, Ciara, Yigit, Ferruh, dev
  Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

> >
> > On 2/10/2022 5:47 PM, Loftus, Ciara wrote:
> > >> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
> support
> > >>
> > >> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
> > >>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
> > support
> > >>>>
> > >>>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
> > >>>>> Secondary process support had been disabled for the AF_XDP PMD
> > >>>> because
> > >>>>> there was no logic in place to share the AF_XDP socket file
> descriptors
> > >>>>> between the processes. This commit introduces this logic using the
> > IPC
> > >>>>> APIs.
> > >>>>>
> > >>>>> Rx and Tx are disabled in the secondary process due to memory
> > mapping
> > >> of
> > >>>>> the AF_XDP rings being assigned by the kernel in the primary
> process
> > >> only.
> > >>>>> However other operations including retrieval of stats are permitted.
> > >>>>>
> > >>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > >>>>>
> > >>>>
> > >>>> Hi Ciara,
> > >>>>
> > >>>> When I tried to test the patch getting following error [1], it doesn't
> look
> > >>>> related to this patch but can you help to fix the issue, thanks.
> > >>>>
> > >>>> [1]
> > >>>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> > >>>> xsk_configure(): Failed to create xsk socket.
> > >>>> eth_rx_queue_setup(): Failed to configure xdp socket
> > >>>> Fail to configure port 2 rx queues
> > >>>> EAL: Error - exiting with code: 1
> > >>>
> > >>>
> > >>> Hi Ferruh,
> > >>>
> > >>> This file should be generated when libxdp is compiled.
> > >>> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
> > >>> Can you check if that file is there for you? It could be in
> > >> /usr/local/lib64/bpf/ on your machine.
> > >>> What kernel are you running on?
> > >>>
> > >>
> > >> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
> > >>
> > >> I had to compile libxdp from source because OS package version was old
> > >> to work with af_xdp.
> > >> Is something required to point location of this file to af_xdp PMD?
> > >>
> > >> I run kernel:
> > >> 5.15.16-200.fc35.x86_64
> > >
> > > I read through the libxdp code to figure out what happens when
> searching
> > for the file:
> > > https://github.com/xdp-project/xdp-
> > tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055
> > >
> > > secure_getenv(XDP_OBJECT_ENVVAR) is called which according to the
> > README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems using a split
> > library path)".
> > > If that fails, BPF_OBJECT_PATH will be searched, which points to
> > /usr/lib/bpf
> > >
> > > I discovered that on my system the getenv() call fails, but the file is
> > eventually found because luckily BPF_OBJECT_PATH points to the
> > appropriate place for me (lib):
> > > https://github.com/xdp-project/xdp-tools/blob/v1.2.2/lib/util/util.h#L24
> > > I suspect the same failure is happening for you, but since
> > BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
> > > As a temporary measure can you create a symlink in /usr/local/lib/bpf/ to
> > point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
> > > I will investigate the libxdp issue further. Maybe a change is needed in
> the
> > library. If a change or setup recommendation is needed in DPDK I will
> create a
> > patch.
> > >
> >
> >
> > I don't have XDP_OBJECT_ENVVAR or BPF_OBJECT_PATH environment
> > variables set,
> > if they should be we should document them.
> >
> > When I created '/usr/local/lib/bpf/' link, the BPF file found.
> > This should be clarified/documented for users.
> 
> Ok. Ideally we shouldn't have to create the symlink. I will look for a better
> solution and submit a patch.
> The symlink might be a temporary solution if another solution is not found.

Can you please try setting the environment variable LIBXDP_OBJECT_PATH=/usr/local/lib64/bpf/
And see if your test works without the symlink?
This worked for me and the getenv succeeded.
If it works for you too, I'll create a patch for the docs instructing users to do the same.

Thanks,
Ciara

> 
> >
> >
> > And still observing following two:
> >
> > 1) I don't know what following log means:
> > Configuring Port 2 (socket 0)
> > libbpf: elf: skipping unrecognized data section(7) .xdp_run_config
> > libbpf: elf: skipping unrecognized data section(8) xdp_metadata
> > libxdp: XDP flag not supported by libxdp.
> > libbpf: elf: skipping unrecognized data section(8) xdp_metadata
> > libbpf: elf: skipping unrecognized data section(8) xdp_metadata
> 
> I reported this and a patch was submitted to libbpf to demote those logs:
> https://www.spinics.net/lists/bpf/msg49140.html
> It looks like the patch never made it. I'll chase it up.
> Anyway, the logs can be ignored as they are not errors.
> 
> >
> > 2) When I try to create two af_xdp interface, I only got one:
> > "--vdev net_af_xdp,iface=enp24s0f1 --vdev net_af_xdp,iface=enp24s0f0"
> 
> This is also expected as you haven't given each vdev a unique name. Try:
> "--vdev net_af_xdp0,iface=enp24s0f1 --vdev net_af_xdp1,iface=enp24s0f0"
> 
> Thank you for the testing.
> 
> Ciara
> 
> >
> >
> > Thanks,
> > ferruh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-11  7:28                   ` Loftus, Ciara
  2022-02-11  9:26                     ` Loftus, Ciara
@ 2022-02-11 11:31                     ` Ferruh Yigit
  1 sibling, 0 replies; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-11 11:31 UTC (permalink / raw)
  To: Loftus, Ciara, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

On 2/11/2022 7:28 AM, Loftus, Ciara wrote:
>>
>> On 2/10/2022 5:47 PM, Loftus, Ciara wrote:
>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process support
>>>>
>>>> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
>>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
>> support
>>>>>>
>>>>>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
>>>>>>> Secondary process support had been disabled for the AF_XDP PMD
>>>>>> because
>>>>>>> there was no logic in place to share the AF_XDP socket file descriptors
>>>>>>> between the processes. This commit introduces this logic using the
>> IPC
>>>>>>> APIs.
>>>>>>>
>>>>>>> Rx and Tx are disabled in the secondary process due to memory
>> mapping
>>>> of
>>>>>>> the AF_XDP rings being assigned by the kernel in the primary process
>>>> only.
>>>>>>> However other operations including retrieval of stats are permitted.
>>>>>>>
>>>>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>>>>>>>
>>>>>>
>>>>>> Hi Ciara,
>>>>>>
>>>>>> When I tried to test the patch getting following error [1], it doesn't look
>>>>>> related to this patch but can you help to fix the issue, thanks.
>>>>>>
>>>>>> [1]
>>>>>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
>>>>>> xsk_configure(): Failed to create xsk socket.
>>>>>> eth_rx_queue_setup(): Failed to configure xdp socket
>>>>>> Fail to configure port 2 rx queues
>>>>>> EAL: Error - exiting with code: 1
>>>>>
>>>>>
>>>>> Hi Ferruh,
>>>>>
>>>>> This file should be generated when libxdp is compiled.
>>>>> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
>>>>> Can you check if that file is there for you? It could be in
>>>> /usr/local/lib64/bpf/ on your machine.
>>>>> What kernel are you running on?
>>>>>
>>>>
>>>> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
>>>>
>>>> I had to compile libxdp from source because OS package version was old
>>>> to work with af_xdp.
>>>> Is something required to point location of this file to af_xdp PMD?
>>>>
>>>> I run kernel:
>>>> 5.15.16-200.fc35.x86_64
>>>
>>> I read through the libxdp code to figure out what happens when searching
>> for the file:
>>> https://github.com/xdp-project/xdp-
>> tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055
>>>
>>> secure_getenv(XDP_OBJECT_ENVVAR) is called which according to the
>> README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems using a split
>> library path)".
>>> If that fails, BPF_OBJECT_PATH will be searched, which points to
>> /usr/lib/bpf
>>>
>>> I discovered that on my system the getenv() call fails, but the file is
>> eventually found because luckily BPF_OBJECT_PATH points to the
>> appropriate place for me (lib):
>>> https://github.com/xdp-project/xdp-tools/blob/v1.2.2/lib/util/util.h#L24
>>> I suspect the same failure is happening for you, but since
>> BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
>>> As a temporary measure can you create a symlink in /usr/local/lib/bpf/ to
>> point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
>>> I will investigate the libxdp issue further. Maybe a change is needed in the
>> library. If a change or setup recommendation is needed in DPDK I will create a
>> patch.
>>>
>>
>>
>> I don't have XDP_OBJECT_ENVVAR or BPF_OBJECT_PATH environment
>> variables set,
>> if they should be we should document them.
>>
>> When I created '/usr/local/lib/bpf/' link, the BPF file found.
>> This should be clarified/documented for users.
> 
> Ok. Ideally we shouldn't have to create the symlink. I will look for a better solution and submit a patch.
> The symlink might be a temporary solution if another solution is not found.
> 

ack

>>
>>
>> And still observing following two:
>>
>> 1) I don't know what following log means:
>> Configuring Port 2 (socket 0)
>> libbpf: elf: skipping unrecognized data section(7) .xdp_run_config
>> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
>> libxdp: XDP flag not supported by libxdp.
>> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
>> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
> 
> I reported this and a patch was submitted to libbpf to demote those logs:
> https://www.spinics.net/lists/bpf/msg49140.html
> It looks like the patch never made it. I'll chase it up.

thanks

> Anyway, the logs can be ignored as they are not errors.
> 

Should we document this?

>>
>> 2) When I try to create two af_xdp interface, I only got one:
>> "--vdev net_af_xdp,iface=enp24s0f1 --vdev net_af_xdp,iface=enp24s0f0"
> 
> This is also expected as you haven't given each vdev a unique name. Try:
> "--vdev net_af_xdp0,iface=enp24s0f1 --vdev net_af_xdp1,iface=enp24s0f0"
> 

Yes of course :(, it works now, and I did verify the proc-info stats works.

> Thank you for the testing.
> 
> Ciara
> 
>>
>>
>> Thanks,
>> ferruh


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-11  9:26                     ` Loftus, Ciara
@ 2022-02-11 12:29                       ` Ferruh Yigit
  2022-02-11 13:01                         ` Loftus, Ciara
  0 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-11 12:29 UTC (permalink / raw)
  To: Loftus, Ciara, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

On 2/11/2022 9:26 AM, Loftus, Ciara wrote:
>>>
>>> On 2/10/2022 5:47 PM, Loftus, Ciara wrote:
>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
>> support
>>>>>
>>>>> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
>>>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
>>> support
>>>>>>>
>>>>>>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
>>>>>>>> Secondary process support had been disabled for the AF_XDP PMD
>>>>>>> because
>>>>>>>> there was no logic in place to share the AF_XDP socket file
>> descriptors
>>>>>>>> between the processes. This commit introduces this logic using the
>>> IPC
>>>>>>>> APIs.
>>>>>>>>
>>>>>>>> Rx and Tx are disabled in the secondary process due to memory
>>> mapping
>>>>> of
>>>>>>>> the AF_XDP rings being assigned by the kernel in the primary
>> process
>>>>> only.
>>>>>>>> However other operations including retrieval of stats are permitted.
>>>>>>>>
>>>>>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>>>>>>>>
>>>>>>>
>>>>>>> Hi Ciara,
>>>>>>>
>>>>>>> When I tried to test the patch getting following error [1], it doesn't
>> look
>>>>>>> related to this patch but can you help to fix the issue, thanks.
>>>>>>>
>>>>>>> [1]
>>>>>>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
>>>>>>> xsk_configure(): Failed to create xsk socket.
>>>>>>> eth_rx_queue_setup(): Failed to configure xdp socket
>>>>>>> Fail to configure port 2 rx queues
>>>>>>> EAL: Error - exiting with code: 1
>>>>>>
>>>>>>
>>>>>> Hi Ferruh,
>>>>>>
>>>>>> This file should be generated when libxdp is compiled.
>>>>>> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
>>>>>> Can you check if that file is there for you? It could be in
>>>>> /usr/local/lib64/bpf/ on your machine.
>>>>>> What kernel are you running on?
>>>>>>
>>>>>
>>>>> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
>>>>>
>>>>> I had to compile libxdp from source because OS package version was old
>>>>> to work with af_xdp.
>>>>> Is something required to point location of this file to af_xdp PMD?
>>>>>
>>>>> I run kernel:
>>>>> 5.15.16-200.fc35.x86_64
>>>>
>>>> I read through the libxdp code to figure out what happens when
>> searching
>>> for the file:
>>>> https://github.com/xdp-project/xdp-
>>> tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055
>>>>
>>>> secure_getenv(XDP_OBJECT_ENVVAR) is called which according to the
>>> README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems using a split
>>> library path)".
>>>> If that fails, BPF_OBJECT_PATH will be searched, which points to
>>> /usr/lib/bpf
>>>>
>>>> I discovered that on my system the getenv() call fails, but the file is
>>> eventually found because luckily BPF_OBJECT_PATH points to the
>>> appropriate place for me (lib):
>>>> https://github.com/xdp-project/xdp-tools/blob/v1.2.2/lib/util/util.h#L24
>>>> I suspect the same failure is happening for you, but since
>>> BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
>>>> As a temporary measure can you create a symlink in /usr/local/lib/bpf/ to
>>> point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
>>>> I will investigate the libxdp issue further. Maybe a change is needed in
>> the
>>> library. If a change or setup recommendation is needed in DPDK I will
>> create a
>>> patch.
>>>>
>>>
>>>
>>> I don't have XDP_OBJECT_ENVVAR or BPF_OBJECT_PATH environment
>>> variables set,
>>> if they should be we should document them.
>>>
>>> When I created '/usr/local/lib/bpf/' link, the BPF file found.
>>> This should be clarified/documented for users.
>>
>> Ok. Ideally we shouldn't have to create the symlink. I will look for a better
>> solution and submit a patch.
>> The symlink might be a temporary solution if another solution is not found.
> 
> Can you please try setting the environment variable LIBXDP_OBJECT_PATH=/usr/local/lib64/bpf/
> And see if your test works without the symlink?
> This worked for me and the getenv succeeded.
> If it works for you too, I'll create a patch for the docs instructing users to do the same.
> 

I confirm it works, and +1 to document it.


btw, when this environment variable is not set (and no symlink), af_xdp fails
and testpmd crashes. I think af_xdp failure shouldn't cause a crash in testpmd,
most probably some error checks are needed in the af_xdp driver.

> Thanks,
> Ciara
> 
>>
>>>
>>>
>>> And still observing following two:
>>>
>>> 1) I don't know what following log means:
>>> Configuring Port 2 (socket 0)
>>> libbpf: elf: skipping unrecognized data section(7) .xdp_run_config
>>> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
>>> libxdp: XDP flag not supported by libxdp.
>>> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
>>> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
>>
>> I reported this and a patch was submitted to libbpf to demote those logs:
>> https://www.spinics.net/lists/bpf/msg49140.html
>> It looks like the patch never made it. I'll chase it up.
>> Anyway, the logs can be ignored as they are not errors.
>>
>>>
>>> 2) When I try to create two af_xdp interface, I only got one:
>>> "--vdev net_af_xdp,iface=enp24s0f1 --vdev net_af_xdp,iface=enp24s0f0"
>>
>> This is also expected as you haven't given each vdev a unique name. Try:
>> "--vdev net_af_xdp0,iface=enp24s0f1 --vdev net_af_xdp1,iface=enp24s0f0"
>>
>> Thank you for the testing.
>>
>> Ciara
>>
>>>
>>>
>>> Thanks,
>>> ferruh


^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-11 12:29                       ` Ferruh Yigit
@ 2022-02-11 13:01                         ` Loftus, Ciara
  2022-02-11 13:07                           ` Ferruh Yigit
  0 siblings, 1 reply; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-11 13:01 UTC (permalink / raw)
  To: Yigit, Ferruh, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

> 
> On 2/11/2022 9:26 AM, Loftus, Ciara wrote:
> >>>
> >>> On 2/10/2022 5:47 PM, Loftus, Ciara wrote:
> >>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
> >> support
> >>>>>
> >>>>> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
> >>>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
> >>> support
> >>>>>>>
> >>>>>>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
> >>>>>>>> Secondary process support had been disabled for the AF_XDP
> PMD
> >>>>>>> because
> >>>>>>>> there was no logic in place to share the AF_XDP socket file
> >> descriptors
> >>>>>>>> between the processes. This commit introduces this logic using
> the
> >>> IPC
> >>>>>>>> APIs.
> >>>>>>>>
> >>>>>>>> Rx and Tx are disabled in the secondary process due to memory
> >>> mapping
> >>>>> of
> >>>>>>>> the AF_XDP rings being assigned by the kernel in the primary
> >> process
> >>>>> only.
> >>>>>>>> However other operations including retrieval of stats are
> permitted.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> >>>>>>>>
> >>>>>>>
> >>>>>>> Hi Ciara,
> >>>>>>>
> >>>>>>> When I tried to test the patch getting following error [1], it doesn't
> >> look
> >>>>>>> related to this patch but can you help to fix the issue, thanks.
> >>>>>>>
> >>>>>>> [1]
> >>>>>>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> >>>>>>> xsk_configure(): Failed to create xsk socket.
> >>>>>>> eth_rx_queue_setup(): Failed to configure xdp socket
> >>>>>>> Fail to configure port 2 rx queues
> >>>>>>> EAL: Error - exiting with code: 1
> >>>>>>
> >>>>>>
> >>>>>> Hi Ferruh,
> >>>>>>
> >>>>>> This file should be generated when libxdp is compiled.
> >>>>>> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
> >>>>>> Can you check if that file is there for you? It could be in
> >>>>> /usr/local/lib64/bpf/ on your machine.
> >>>>>> What kernel are you running on?
> >>>>>>
> >>>>>
> >>>>> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
> >>>>>
> >>>>> I had to compile libxdp from source because OS package version was
> old
> >>>>> to work with af_xdp.
> >>>>> Is something required to point location of this file to af_xdp PMD?
> >>>>>
> >>>>> I run kernel:
> >>>>> 5.15.16-200.fc35.x86_64
> >>>>
> >>>> I read through the libxdp code to figure out what happens when
> >> searching
> >>> for the file:
> >>>> https://github.com/xdp-project/xdp-
> >>> tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055
> >>>>
> >>>> secure_getenv(XDP_OBJECT_ENVVAR) is called which according to the
> >>> README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems using a
> split
> >>> library path)".
> >>>> If that fails, BPF_OBJECT_PATH will be searched, which points to
> >>> /usr/lib/bpf
> >>>>
> >>>> I discovered that on my system the getenv() call fails, but the file is
> >>> eventually found because luckily BPF_OBJECT_PATH points to the
> >>> appropriate place for me (lib):
> >>>> https://github.com/xdp-project/xdp-
> tools/blob/v1.2.2/lib/util/util.h#L24
> >>>> I suspect the same failure is happening for you, but since
> >>> BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
> >>>> As a temporary measure can you create a symlink in /usr/local/lib/bpf/
> to
> >>> point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
> >>>> I will investigate the libxdp issue further. Maybe a change is needed in
> >> the
> >>> library. If a change or setup recommendation is needed in DPDK I will
> >> create a
> >>> patch.
> >>>>
> >>>
> >>>
> >>> I don't have XDP_OBJECT_ENVVAR or BPF_OBJECT_PATH environment
> >>> variables set,
> >>> if they should be we should document them.
> >>>
> >>> When I created '/usr/local/lib/bpf/' link, the BPF file found.
> >>> This should be clarified/documented for users.
> >>
> >> Ok. Ideally we shouldn't have to create the symlink. I will look for a better
> >> solution and submit a patch.
> >> The symlink might be a temporary solution if another solution is not
> found.
> >
> > Can you please try setting the environment variable
> LIBXDP_OBJECT_PATH=/usr/local/lib64/bpf/
> > And see if your test works without the symlink?
> > This worked for me and the getenv succeeded.
> > If it works for you too, I'll create a patch for the docs instructing users to do
> the same.
> >
> 
> I confirm it works, and +1 to document it.
> 
> 
> btw, when this environment variable is not set (and no symlink), af_xdp fails
> and testpmd crashes. I think af_xdp failure shouldn't cause a crash in
> testpmd,
> most probably some error checks are needed in the af_xdp driver.

When I trigger the error case in my environment I get a graceful exit:

libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
xsk_configure(): Failed to create xsk socket.
eth_rx_queue_setup(): Failed to configure xdp socket
Fail to configure port 0 rx queues
EAL: Error - exiting with code: 1
  Cause: Start ports failed

Can you please provide more info on your crash?

> 
> > Thanks,
> > Ciara
> >
> >>
> >>>
> >>>
> >>> And still observing following two:
> >>>
> >>> 1) I don't know what following log means:
> >>> Configuring Port 2 (socket 0)
> >>> libbpf: elf: skipping unrecognized data section(7) .xdp_run_config
> >>> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
> >>> libxdp: XDP flag not supported by libxdp.
> >>> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
> >>> libbpf: elf: skipping unrecognized data section(8) xdp_metadata
> >>
> >> I reported this and a patch was submitted to libbpf to demote those logs:
> >> https://www.spinics.net/lists/bpf/msg49140.html
> >> It looks like the patch never made it. I'll chase it up.
> >> Anyway, the logs can be ignored as they are not errors.
> >>
> >>>
> >>> 2) When I try to create two af_xdp interface, I only got one:
> >>> "--vdev net_af_xdp,iface=enp24s0f1 --vdev
> net_af_xdp,iface=enp24s0f0"
> >>
> >> This is also expected as you haven't given each vdev a unique name. Try:
> >> "--vdev net_af_xdp0,iface=enp24s0f1 --vdev
> net_af_xdp1,iface=enp24s0f0"
> >>
> >> Thank you for the testing.
> >>
> >> Ciara
> >>
> >>>
> >>>
> >>> Thanks,
> >>> ferruh


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-11 13:01                         ` Loftus, Ciara
@ 2022-02-11 13:07                           ` Ferruh Yigit
  2022-02-11 15:32                             ` Loftus, Ciara
  0 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-11 13:07 UTC (permalink / raw)
  To: Loftus, Ciara, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

On 2/11/2022 1:01 PM, Loftus, Ciara wrote:
>>
>> On 2/11/2022 9:26 AM, Loftus, Ciara wrote:
>>>>>
>>>>> On 2/10/2022 5:47 PM, Loftus, Ciara wrote:
>>>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
>>>> support
>>>>>>>
>>>>>>> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
>>>>>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
>>>>> support
>>>>>>>>>
>>>>>>>>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
>>>>>>>>>> Secondary process support had been disabled for the AF_XDP
>> PMD
>>>>>>>>> because
>>>>>>>>>> there was no logic in place to share the AF_XDP socket file
>>>> descriptors
>>>>>>>>>> between the processes. This commit introduces this logic using
>> the
>>>>> IPC
>>>>>>>>>> APIs.
>>>>>>>>>>
>>>>>>>>>> Rx and Tx are disabled in the secondary process due to memory
>>>>> mapping
>>>>>>> of
>>>>>>>>>> the AF_XDP rings being assigned by the kernel in the primary
>>>> process
>>>>>>> only.
>>>>>>>>>> However other operations including retrieval of stats are
>> permitted.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Hi Ciara,
>>>>>>>>>
>>>>>>>>> When I tried to test the patch getting following error [1], it doesn't
>>>> look
>>>>>>>>> related to this patch but can you help to fix the issue, thanks.
>>>>>>>>>
>>>>>>>>> [1]
>>>>>>>>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
>>>>>>>>> xsk_configure(): Failed to create xsk socket.
>>>>>>>>> eth_rx_queue_setup(): Failed to configure xdp socket
>>>>>>>>> Fail to configure port 2 rx queues
>>>>>>>>> EAL: Error - exiting with code: 1
>>>>>>>>
>>>>>>>>
>>>>>>>> Hi Ferruh,
>>>>>>>>
>>>>>>>> This file should be generated when libxdp is compiled.
>>>>>>>> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
>>>>>>>> Can you check if that file is there for you? It could be in
>>>>>>> /usr/local/lib64/bpf/ on your machine.
>>>>>>>> What kernel are you running on?
>>>>>>>>
>>>>>>>
>>>>>>> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
>>>>>>>
>>>>>>> I had to compile libxdp from source because OS package version was
>> old
>>>>>>> to work with af_xdp.
>>>>>>> Is something required to point location of this file to af_xdp PMD?
>>>>>>>
>>>>>>> I run kernel:
>>>>>>> 5.15.16-200.fc35.x86_64
>>>>>>
>>>>>> I read through the libxdp code to figure out what happens when
>>>> searching
>>>>> for the file:
>>>>>> https://github.com/xdp-project/xdp-
>>>>> tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055
>>>>>>
>>>>>> secure_getenv(XDP_OBJECT_ENVVAR) is called which according to the
>>>>> README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems using a
>> split
>>>>> library path)".
>>>>>> If that fails, BPF_OBJECT_PATH will be searched, which points to
>>>>> /usr/lib/bpf
>>>>>>
>>>>>> I discovered that on my system the getenv() call fails, but the file is
>>>>> eventually found because luckily BPF_OBJECT_PATH points to the
>>>>> appropriate place for me (lib):
>>>>>> https://github.com/xdp-project/xdp-
>> tools/blob/v1.2.2/lib/util/util.h#L24
>>>>>> I suspect the same failure is happening for you, but since
>>>>> BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
>>>>>> As a temporary measure can you create a symlink in /usr/local/lib/bpf/
>> to
>>>>> point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
>>>>>> I will investigate the libxdp issue further. Maybe a change is needed in
>>>> the
>>>>> library. If a change or setup recommendation is needed in DPDK I will
>>>> create a
>>>>> patch.
>>>>>>
>>>>>
>>>>>
>>>>> I don't have XDP_OBJECT_ENVVAR or BPF_OBJECT_PATH environment
>>>>> variables set,
>>>>> if they should be we should document them.
>>>>>
>>>>> When I created '/usr/local/lib/bpf/' link, the BPF file found.
>>>>> This should be clarified/documented for users.
>>>>
>>>> Ok. Ideally we shouldn't have to create the symlink. I will look for a better
>>>> solution and submit a patch.
>>>> The symlink might be a temporary solution if another solution is not
>> found.
>>>
>>> Can you please try setting the environment variable
>> LIBXDP_OBJECT_PATH=/usr/local/lib64/bpf/
>>> And see if your test works without the symlink?
>>> This worked for me and the getenv succeeded.
>>> If it works for you too, I'll create a patch for the docs instructing users to do
>> the same.
>>>
>>
>> I confirm it works, and +1 to document it.
>>
>>
>> btw, when this environment variable is not set (and no symlink), af_xdp fails
>> and testpmd crashes. I think af_xdp failure shouldn't cause a crash in
>> testpmd,
>> most probably some error checks are needed in the af_xdp driver.
> 
> When I trigger the error case in my environment I get a graceful exit:
> 
> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> xsk_configure(): Failed to create xsk socket.
> eth_rx_queue_setup(): Failed to configure xdp socket
> Fail to configure port 0 rx queues
> EAL: Error - exiting with code: 1
>    Cause: Start ports failed
> 
> Can you please provide more info on your crash?
> 

$ ./build/app/dpdk-testpmd --vdev net_af_xdp0,iface=enp24s0f1 --vdev net_af_xdp1,iface=enp94s0f1 -- -i
...
Configuring Port 2 (socket 0)
libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
xsk_configure(): Failed to create xsk socket.
eth_rx_queue_setup(): Failed to configure xdp socket
Fail to configure port 2 rx queues
EAL: Error - exiting with code: 1
   Cause: Start ports failed
Segmentation fault (core dumped)

(I have two physical interfaces too, af_xdp ports are port 2 & 3)

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-09 15:29         ` Stephen Hemminger
@ 2022-02-11 13:32           ` Ferruh Yigit
  0 siblings, 0 replies; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-11 13:32 UTC (permalink / raw)
  To: Stephen Hemminger, Ciara Loftus; +Cc: dev, anatoly.burakov

On 2/9/2022 3:29 PM, Stephen Hemminger wrote:
> On Wed,  9 Feb 2022 09:48:08 +0000
> Ciara Loftus <ciara.loftus@intel.com> wrote:
> 
>> Secondary process support had been disabled for the AF_XDP PMD because
>> there was no logic in place to share the AF_XDP socket file descriptors
>> between the processes. This commit introduces this logic using the IPC
>> APIs.
>>
>> Rx and Tx are disabled in the secondary process due to memory mapping of
>> the AF_XDP rings being assigned by the kernel in the primary process only.
>> However other operations including retrieval of stats are permitted.
>>
>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> 
> Acked-by: Stephen Hemminger <stephen@networkplumber.org>

Applied to dpdk-next-net/main, thanks.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-11 13:07                           ` Ferruh Yigit
@ 2022-02-11 15:32                             ` Loftus, Ciara
  2022-02-16 11:23                               ` Loftus, Ciara
  0 siblings, 1 reply; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-11 15:32 UTC (permalink / raw)
  To: Yigit, Ferruh, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

> 
> On 2/11/2022 1:01 PM, Loftus, Ciara wrote:
> >>
> >> On 2/11/2022 9:26 AM, Loftus, Ciara wrote:
> >>>>>
> >>>>> On 2/10/2022 5:47 PM, Loftus, Ciara wrote:
> >>>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
> >>>> support
> >>>>>>>
> >>>>>>> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
> >>>>>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary
> process
> >>>>> support
> >>>>>>>>>
> >>>>>>>>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
> >>>>>>>>>> Secondary process support had been disabled for the AF_XDP
> >> PMD
> >>>>>>>>> because
> >>>>>>>>>> there was no logic in place to share the AF_XDP socket file
> >>>> descriptors
> >>>>>>>>>> between the processes. This commit introduces this logic using
> >> the
> >>>>> IPC
> >>>>>>>>>> APIs.
> >>>>>>>>>>
> >>>>>>>>>> Rx and Tx are disabled in the secondary process due to memory
> >>>>> mapping
> >>>>>>> of
> >>>>>>>>>> the AF_XDP rings being assigned by the kernel in the primary
> >>>> process
> >>>>>>> only.
> >>>>>>>>>> However other operations including retrieval of stats are
> >> permitted.
> >>>>>>>>>>
> >>>>>>>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> >>>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Hi Ciara,
> >>>>>>>>>
> >>>>>>>>> When I tried to test the patch getting following error [1], it
> doesn't
> >>>> look
> >>>>>>>>> related to this patch but can you help to fix the issue, thanks.
> >>>>>>>>>
> >>>>>>>>> [1]
> >>>>>>>>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> >>>>>>>>> xsk_configure(): Failed to create xsk socket.
> >>>>>>>>> eth_rx_queue_setup(): Failed to configure xdp socket
> >>>>>>>>> Fail to configure port 2 rx queues
> >>>>>>>>> EAL: Error - exiting with code: 1
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> Hi Ferruh,
> >>>>>>>>
> >>>>>>>> This file should be generated when libxdp is compiled.
> >>>>>>>> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
> >>>>>>>> Can you check if that file is there for you? It could be in
> >>>>>>> /usr/local/lib64/bpf/ on your machine.
> >>>>>>>> What kernel are you running on?
> >>>>>>>>
> >>>>>>>
> >>>>>>> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
> >>>>>>>
> >>>>>>> I had to compile libxdp from source because OS package version
> was
> >> old
> >>>>>>> to work with af_xdp.
> >>>>>>> Is something required to point location of this file to af_xdp PMD?
> >>>>>>>
> >>>>>>> I run kernel:
> >>>>>>> 5.15.16-200.fc35.x86_64
> >>>>>>
> >>>>>> I read through the libxdp code to figure out what happens when
> >>>> searching
> >>>>> for the file:
> >>>>>> https://github.com/xdp-project/xdp-
> >>>>> tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055
> >>>>>>
> >>>>>> secure_getenv(XDP_OBJECT_ENVVAR) is called which according to
> the
> >>>>> README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems using
> a
> >> split
> >>>>> library path)".
> >>>>>> If that fails, BPF_OBJECT_PATH will be searched, which points to
> >>>>> /usr/lib/bpf
> >>>>>>
> >>>>>> I discovered that on my system the getenv() call fails, but the file is
> >>>>> eventually found because luckily BPF_OBJECT_PATH points to the
> >>>>> appropriate place for me (lib):
> >>>>>> https://github.com/xdp-project/xdp-
> >> tools/blob/v1.2.2/lib/util/util.h#L24
> >>>>>> I suspect the same failure is happening for you, but since
> >>>>> BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
> >>>>>> As a temporary measure can you create a symlink in
> /usr/local/lib/bpf/
> >> to
> >>>>> point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
> >>>>>> I will investigate the libxdp issue further. Maybe a change is needed
> in
> >>>> the
> >>>>> library. If a change or setup recommendation is needed in DPDK I will
> >>>> create a
> >>>>> patch.
> >>>>>>
> >>>>>
> >>>>>
> >>>>> I don't have XDP_OBJECT_ENVVAR or BPF_OBJECT_PATH
> environment
> >>>>> variables set,
> >>>>> if they should be we should document them.
> >>>>>
> >>>>> When I created '/usr/local/lib/bpf/' link, the BPF file found.
> >>>>> This should be clarified/documented for users.
> >>>>
> >>>> Ok. Ideally we shouldn't have to create the symlink. I will look for a
> better
> >>>> solution and submit a patch.
> >>>> The symlink might be a temporary solution if another solution is not
> >> found.
> >>>
> >>> Can you please try setting the environment variable
> >> LIBXDP_OBJECT_PATH=/usr/local/lib64/bpf/
> >>> And see if your test works without the symlink?
> >>> This worked for me and the getenv succeeded.
> >>> If it works for you too, I'll create a patch for the docs instructing users to
> do
> >> the same.
> >>>
> >>
> >> I confirm it works, and +1 to document it.
> >>
> >>
> >> btw, when this environment variable is not set (and no symlink), af_xdp
> fails
> >> and testpmd crashes. I think af_xdp failure shouldn't cause a crash in
> >> testpmd,
> >> most probably some error checks are needed in the af_xdp driver.
> >
> > When I trigger the error case in my environment I get a graceful exit:
> >
> > libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> > xsk_configure(): Failed to create xsk socket.
> > eth_rx_queue_setup(): Failed to configure xdp socket
> > Fail to configure port 0 rx queues
> > EAL: Error - exiting with code: 1
> >    Cause: Start ports failed
> >
> > Can you please provide more info on your crash?
> >
> 
> $ ./build/app/dpdk-testpmd --vdev net_af_xdp0,iface=enp24s0f1 --vdev
> net_af_xdp1,iface=enp94s0f1 -- -i
> ...
> Configuring Port 2 (socket 0)
> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> xsk_configure(): Failed to create xsk socket.
> eth_rx_queue_setup(): Failed to configure xdp socket
> Fail to configure port 2 rx queues
> EAL: Error - exiting with code: 1
>    Cause: Start ports failed
> Segmentation fault (core dumped)
> 
> (I have two physical interfaces too, af_xdp ports are port 2 & 3)

Thanks for providing more info. I have reproduced this issue.
It doesn't happen when --no-pci is used.
The issue is not related to the libxdp patch. It just occurs when xsk_configure() fails. As you said, some extra error checks/handling are probably required.
Thanks for reporting. I'll work on a fix.

Ciara

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-11 15:32                             ` Loftus, Ciara
@ 2022-02-16 11:23                               ` Loftus, Ciara
  0 siblings, 0 replies; 36+ messages in thread
From: Loftus, Ciara @ 2022-02-16 11:23 UTC (permalink / raw)
  To: Yigit, Ferruh, dev; +Cc: stephen, Burakov, Anatoly, Singh, Aman Deep

> Subject: RE: [PATCH v4] net/af_xdp: re-enable secondary process support
> 
> >
> > On 2/11/2022 1:01 PM, Loftus, Ciara wrote:
> > >>
> > >> On 2/11/2022 9:26 AM, Loftus, Ciara wrote:
> > >>>>>
> > >>>>> On 2/10/2022 5:47 PM, Loftus, Ciara wrote:
> > >>>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary process
> > >>>> support
> > >>>>>>>
> > >>>>>>> On 2/10/2022 3:40 PM, Loftus, Ciara wrote:
> > >>>>>>>>> Subject: Re: [PATCH v4] net/af_xdp: re-enable secondary
> > process
> > >>>>> support
> > >>>>>>>>>
> > >>>>>>>>> On 2/9/2022 9:48 AM, Ciara Loftus wrote:
> > >>>>>>>>>> Secondary process support had been disabled for the
> AF_XDP
> > >> PMD
> > >>>>>>>>> because
> > >>>>>>>>>> there was no logic in place to share the AF_XDP socket file
> > >>>> descriptors
> > >>>>>>>>>> between the processes. This commit introduces this logic
> using
> > >> the
> > >>>>> IPC
> > >>>>>>>>>> APIs.
> > >>>>>>>>>>
> > >>>>>>>>>> Rx and Tx are disabled in the secondary process due to
> memory
> > >>>>> mapping
> > >>>>>>> of
> > >>>>>>>>>> the AF_XDP rings being assigned by the kernel in the primary
> > >>>> process
> > >>>>>>> only.
> > >>>>>>>>>> However other operations including retrieval of stats are
> > >> permitted.
> > >>>>>>>>>>
> > >>>>>>>>>> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > >>>>>>>>>>
> > >>>>>>>>>
> > >>>>>>>>> Hi Ciara,
> > >>>>>>>>>
> > >>>>>>>>> When I tried to test the patch getting following error [1], it
> > doesn't
> > >>>> look
> > >>>>>>>>> related to this patch but can you help to fix the issue, thanks.
> > >>>>>>>>>
> > >>>>>>>>> [1]
> > >>>>>>>>> libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> > >>>>>>>>> xsk_configure(): Failed to create xsk socket.
> > >>>>>>>>> eth_rx_queue_setup(): Failed to configure xdp socket
> > >>>>>>>>> Fail to configure port 2 rx queues
> > >>>>>>>>> EAL: Error - exiting with code: 1
> > >>>>>>>>
> > >>>>>>>>
> > >>>>>>>> Hi Ferruh,
> > >>>>>>>>
> > >>>>>>>> This file should be generated when libxdp is compiled.
> > >>>>>>>> Mine is located @ /usr/local/lib/bpf/xsk_def_xdp_prog.o
> > >>>>>>>> Can you check if that file is there for you? It could be in
> > >>>>>>> /usr/local/lib64/bpf/ on your machine.
> > >>>>>>>> What kernel are you running on?
> > >>>>>>>>
> > >>>>>>>
> > >>>>>>> It is in: /usr/local/lib64/bpf/xsk_def_xdp_prog.o
> > >>>>>>>
> > >>>>>>> I had to compile libxdp from source because OS package version
> > was
> > >> old
> > >>>>>>> to work with af_xdp.
> > >>>>>>> Is something required to point location of this file to af_xdp
> PMD?
> > >>>>>>>
> > >>>>>>> I run kernel:
> > >>>>>>> 5.15.16-200.fc35.x86_64
> > >>>>>>
> > >>>>>> I read through the libxdp code to figure out what happens when
> > >>>> searching
> > >>>>> for the file:
> > >>>>>> https://github.com/xdp-project/xdp-
> > >>>>> tools/blob/v1.2.2/lib/libxdp/libxdp.c#L1055
> > >>>>>>
> > >>>>>> secure_getenv(XDP_OBJECT_ENVVAR) is called which according to
> > the
> > >>>>> README "defaults to /usr/lib/bpf (or /usr/lib64/bpf on systems
> using
> > a
> > >> split
> > >>>>> library path)".
> > >>>>>> If that fails, BPF_OBJECT_PATH will be searched, which points to
> > >>>>> /usr/lib/bpf
> > >>>>>>
> > >>>>>> I discovered that on my system the getenv() call fails, but the file is
> > >>>>> eventually found because luckily BPF_OBJECT_PATH points to the
> > >>>>> appropriate place for me (lib):
> > >>>>>> https://github.com/xdp-project/xdp-
> > >> tools/blob/v1.2.2/lib/util/util.h#L24
> > >>>>>> I suspect the same failure is happening for you, but since
> > >>>>> BPF_OBJECT_PATH points to lib and not lib64, the file is not found.
> > >>>>>> As a temporary measure can you create a symlink in
> > /usr/local/lib/bpf/
> > >> to
> > >>>>> point to /usr/local/lib/bpf/xsk_def_xdp_prog.o
> > >>>>>> I will investigate the libxdp issue further. Maybe a change is
> needed
> > in
> > >>>> the
> > >>>>> library. If a change or setup recommendation is needed in DPDK I
> will
> > >>>> create a
> > >>>>> patch.
> > >>>>>>
> > >>>>>
> > >>>>>
> > >>>>> I don't have XDP_OBJECT_ENVVAR or BPF_OBJECT_PATH
> > environment
> > >>>>> variables set,
> > >>>>> if they should be we should document them.
> > >>>>>
> > >>>>> When I created '/usr/local/lib/bpf/' link, the BPF file found.
> > >>>>> This should be clarified/documented for users.
> > >>>>
> > >>>> Ok. Ideally we shouldn't have to create the symlink. I will look for a
> > better
> > >>>> solution and submit a patch.
> > >>>> The symlink might be a temporary solution if another solution is not
> > >> found.
> > >>>
> > >>> Can you please try setting the environment variable
> > >> LIBXDP_OBJECT_PATH=/usr/local/lib64/bpf/
> > >>> And see if your test works without the symlink?
> > >>> This worked for me and the getenv succeeded.
> > >>> If it works for you too, I'll create a patch for the docs instructing users
> to
> > do
> > >> the same.
> > >>>
> > >>
> > >> I confirm it works, and +1 to document it.
> > >>
> > >>
> > >> btw, when this environment variable is not set (and no symlink), af_xdp
> > fails
> > >> and testpmd crashes. I think af_xdp failure shouldn't cause a crash in
> > >> testpmd,
> > >> most probably some error checks are needed in the af_xdp driver.
> > >
> > > When I trigger the error case in my environment I get a graceful exit:
> > >
> > > libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> > > xsk_configure(): Failed to create xsk socket.
> > > eth_rx_queue_setup(): Failed to configure xdp socket
> > > Fail to configure port 0 rx queues
> > > EAL: Error - exiting with code: 1
> > >    Cause: Start ports failed
> > >
> > > Can you please provide more info on your crash?
> > >
> >
> > $ ./build/app/dpdk-testpmd --vdev net_af_xdp0,iface=enp24s0f1 --vdev
> > net_af_xdp1,iface=enp94s0f1 -- -i
> > ...
> > Configuring Port 2 (socket 0)
> > libxdp: Couldn't find a BPF file with name xsk_def_xdp_prog.o
> > xsk_configure(): Failed to create xsk socket.
> > eth_rx_queue_setup(): Failed to configure xdp socket
> > Fail to configure port 2 rx queues
> > EAL: Error - exiting with code: 1
> >    Cause: Start ports failed
> > Segmentation fault (core dumped)
> >
> > (I have two physical interfaces too, af_xdp ports are port 2 & 3)
> 
> Thanks for providing more info. I have reproduced this issue.
> It doesn't happen when --no-pci is used.
> The issue is not related to the libxdp patch. It just occurs when
> xsk_configure() fails. As you said, some extra error checks/handling are
> probably required.
> Thanks for reporting. I'll work on a fix.
> 

I reproduced the issue with some other vdevs (af_packet and null) by forcing an error return from their respective rx_queue_setup functions.
I traced the issue back to dfbc61a2f9a6 ("mem: detach memsegs on cleanup"). Before this commit there is no SEGV after the vdev setup fails.
The SEGVs occur in the phy driver code.
For i40e it occurs @ i40e_dev_alarm_handler()
For ixgbe it occurs @ ixgbe_dev_setup_link_thread_handler()
Not sure exactly why they occur though. Anatoly, you authored the commit mentioned above. Can you think of any reason why it might cause this behaviour?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-09  9:48       ` [PATCH v4] " Ciara Loftus
                           ` (2 preceding siblings ...)
  2022-02-10 15:19         ` Ferruh Yigit
@ 2022-02-17 12:44         ` David Marchand
  2022-02-17 12:47           ` Ferruh Yigit
  3 siblings, 1 reply; 36+ messages in thread
From: David Marchand @ 2022-02-17 12:44 UTC (permalink / raw)
  To: Ciara Loftus; +Cc: dev, Stephen Hemminger, Yigit, Ferruh, Burakov, Anatoly

On Wed, Feb 9, 2022 at 10:48 AM Ciara Loftus <ciara.loftus@intel.com> wrote:
> @@ -1836,6 +2008,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>                 return -1;
>         }
>
> +       /* Register IPC callback which shares xsk fds from primary to secondary */
> +       if (!afxdp_dev_count) {
> +               ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
> +               if (ret < 0) {

This breaks --in-memory mode.
It should be instead ret < 0 && rte_errno != ENOTSUP

Can you double check?
Thanks.


> +                       AF_XDP_LOG(ERR, "%s: Failed to register multi-process IPC callback: %s",
> +                                  name, strerror(rte_errno));
> +                       return -1;
> +               }
> +       }


-- 
David Marchand


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-17 12:44         ` David Marchand
@ 2022-02-17 12:47           ` Ferruh Yigit
  2022-02-17 12:53             ` David Marchand
  0 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2022-02-17 12:47 UTC (permalink / raw)
  To: David Marchand, Ciara Loftus; +Cc: dev, Stephen Hemminger, Burakov, Anatoly

On 2/17/2022 12:44 PM, David Marchand wrote:
> On Wed, Feb 9, 2022 at 10:48 AM Ciara Loftus <ciara.loftus@intel.com> wrote:
>> @@ -1836,6 +2008,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>>                  return -1;
>>          }
>>
>> +       /* Register IPC callback which shares xsk fds from primary to secondary */
>> +       if (!afxdp_dev_count) {
>> +               ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
>> +               if (ret < 0) {
> 
> This breaks --in-memory mode.
> It should be instead ret < 0 && rte_errno != ENOTSUP
> 
> Can you double check?
> Thanks.
> 

There is already a patch for it [1], if it is reviewed/tested,
I can merge quickly.

[1]
https://patches.dpdk.org/project/dpdk/patch/2c422567d3640972@cs.arizona.edu/

> 
>> +                       AF_XDP_LOG(ERR, "%s: Failed to register multi-process IPC callback: %s",
>> +                                  name, strerror(rte_errno));
>> +                       return -1;
>> +               }
>> +       }
> 
> 


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v4] net/af_xdp: re-enable secondary process support
  2022-02-17 12:47           ` Ferruh Yigit
@ 2022-02-17 12:53             ` David Marchand
  0 siblings, 0 replies; 36+ messages in thread
From: David Marchand @ 2022-02-17 12:53 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Ciara Loftus, dev, Stephen Hemminger, Burakov, Anatoly

On Thu, Feb 17, 2022 at 1:48 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 2/17/2022 12:44 PM, David Marchand wrote:
> > On Wed, Feb 9, 2022 at 10:48 AM Ciara Loftus <ciara.loftus@intel.com> wrote:
> >> @@ -1836,6 +2008,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> >>                  return -1;
> >>          }
> >>
> >> +       /* Register IPC callback which shares xsk fds from primary to secondary */
> >> +       if (!afxdp_dev_count) {
> >> +               ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
> >> +               if (ret < 0) {
> >
> > This breaks --in-memory mode.
> > It should be instead ret < 0 && rte_errno != ENOTSUP
> >
> > Can you double check?
> > Thanks.
> >
>
> There is already a patch for it [1], if it is reviewed/tested,
> I can merge quickly.

I had tested my suggestion.
The patch [1] does the same and lgtm, I'll let Ciara ack it.

>
> [1]
> https://patches.dpdk.org/project/dpdk/patch/2c422567d3640972@cs.arizona.edu/


-- 
David Marchand


^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2022-02-17 12:53 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-10 15:32 [RFC PATCH] net/af_xdp: reenable secondary process support Ciara Loftus
2021-12-11 21:49 ` Stephen Hemminger
2022-01-12  7:54 ` [PATCH] net/af_xdp: re-enable " Ciara Loftus
2022-02-04 12:54   ` [PATCH v2] " Ciara Loftus
2022-02-04 14:18     ` Ferruh Yigit
2022-02-07  7:49       ` Loftus, Ciara
2022-02-07 10:27         ` Ferruh Yigit
2022-02-07 11:39           ` Loftus, Ciara
2022-02-08 10:58             ` Ferruh Yigit
2022-02-08 13:48     ` [PATCH v3] " Ciara Loftus
2022-02-08 17:45       ` Stephen Hemminger
2022-02-08 18:00         ` Ferruh Yigit
2022-02-08 18:42           ` Stephen Hemminger
2022-02-08 18:56             ` Ferruh Yigit
2022-02-09  7:41               ` Loftus, Ciara
2022-02-09  9:48       ` [PATCH v4] " Ciara Loftus
2022-02-09 15:29         ` Stephen Hemminger
2022-02-11 13:32           ` Ferruh Yigit
2022-02-09 17:55         ` Ferruh Yigit
2022-02-10 15:08           ` Ferruh Yigit
2022-02-10 15:19         ` Ferruh Yigit
2022-02-10 15:40           ` Loftus, Ciara
2022-02-10 16:06             ` Ferruh Yigit
2022-02-10 17:47               ` Loftus, Ciara
2022-02-10 20:12                 ` Ferruh Yigit
2022-02-11  7:28                   ` Loftus, Ciara
2022-02-11  9:26                     ` Loftus, Ciara
2022-02-11 12:29                       ` Ferruh Yigit
2022-02-11 13:01                         ` Loftus, Ciara
2022-02-11 13:07                           ` Ferruh Yigit
2022-02-11 15:32                             ` Loftus, Ciara
2022-02-16 11:23                               ` Loftus, Ciara
2022-02-11 11:31                     ` Ferruh Yigit
2022-02-17 12:44         ` David Marchand
2022-02-17 12:47           ` Ferruh Yigit
2022-02-17 12:53             ` David Marchand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).