DPDK patches and discussions
 help / color / mirror / Atom feed
From: Ciara Loftus <ciara.loftus@intel.com>
To: dev@dpdk.org, xiaolong.ye@intel.com, kevin.laatz@intel.com,
	bruce.richardson@intel.com, ciara.loftus@intel.com
Subject: [dpdk-dev] [PATCH 2/3] net/af_xdp: support pinning of IRQs
Date: Thu, 19 Sep 2019 14:15:19 +0000	[thread overview]
Message-ID: <20190919141520.4227-3-ciara.loftus@intel.com> (raw)
In-Reply-To: <20190919141520.4227-1-ciara.loftus@intel.com>

Network devices using the AF_XDP PMD will trigger interrupts
on reception of packets. The new PMD argument 'queue_irq'
allows the user to specify a core on which to pin interrupts
for a given queue. Multiple queue_irq arguments can be specified.
For example:

  --vdev=net_af_xdp1,iface=eth0,queue_count=2,
           queue_irq=0:2,queue_irq=1:5

..will pin queue 0 interrupts to core 2 and queue 1 interrupts
to core 5.

The queue argument refers to the ethdev queue as opposed to the
netdev queue. These values are the same unless a value greater
than 0 is specified in a start_queue argument.

The drivers supported for this feature are those with support for
AF_XDP zero copy in the kernel, namely ixgbe, i40e and mlx5_core.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 doc/guides/nics/af_xdp.rst             |  15 ++
 doc/guides/rel_notes/release_19_11.rst |   7 +
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 357 ++++++++++++++++++++++++-
 3 files changed, 374 insertions(+), 5 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index ec46f08f0..a255ba4e7 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -36,6 +36,11 @@ The following options can be provided to set up an af_xdp port in DPDK.
 *   ``start_queue`` - starting netdev queue id (optional, default 0);
 *   ``queue_count`` - total netdev queue number (optional, default 1);
 *   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
+*   ``queue_irq`` - pin queue irqs to specified core <queue:core> (optional,
+    default no pinning). The queue argument refers to the ethdev queue as
+    opposed to the netdev queue. These values are the same unless a value
+    greater than 0 is specified for start_queue. ixgbe, i40e and mlx5 drivers
+    supported;
 
 Prerequisites
 -------------
@@ -57,3 +62,13 @@ The following example will set up an af_xdp interface in DPDK:
 .. code-block:: console
 
     --vdev net_af_xdp,iface=ens786f1
+
+Pin queue IRQs
+--------------
+The following example will pin queue 0 interrupts to core 2 and queue 1
+interrupts to core 5.
+
+.. code-block:: console
+
+      --vdev=net_af_xdp1,iface=eth0,queue_count=2,
+               queue_irq=0:2,queue_irq=1:5
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index 27cfbd9e3..06bf57c42 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -56,6 +56,13 @@ New Features
      Also, make sure to start the actual text at the margin.
      =========================================================
 
+* **Updated the AF_XDP PMD.**
+
+  Updated the AF_XDP PMD. The new features include:
+
+  * Support for pinning netdev queue IRQs to cores specified by the user.
+    Available for ixgbe, i40e and mlx5 drivers.
+
 
 Removed Items
 -------------
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index e496e9aaa..a00eb6460 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -3,6 +3,7 @@
  */
 #include <unistd.h>
 #include <errno.h>
+#include <regex.h>
 #include <stdlib.h>
 #include <string.h>
 #include <poll.h>
@@ -10,6 +11,7 @@
 #include <net/if.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
+#include <sys/sysinfo.h>
 #include <linux/if_ether.h>
 #include <linux/if_xdp.h>
 #include <linux/if_link.h>
@@ -17,6 +19,8 @@
 #include <linux/sockios.h>
 #include "af_xdp_deps.h"
 #include <bpf/xsk.h>
+#include <sys/stat.h>
+#include <libgen.h>
 
 #include <rte_ethdev.h>
 #include <rte_ethdev_driver.h>
@@ -116,6 +120,7 @@ struct pmd_internals {
 	int queue_cnt;
 	int max_queue_cnt;
 	int combined_queue_cnt;
+	int queue_irqs[RTE_MAX_QUEUES_PER_PORT];
 
 	int pmd_zc;
 	struct rte_ether_addr eth_addr;
@@ -128,12 +133,14 @@ struct pmd_internals {
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
 #define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
+#define ETH_AF_XDP_QUEUE_IRQ_ARG		"queue_irq"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
 	ETH_AF_XDP_START_QUEUE_ARG,
 	ETH_AF_XDP_QUEUE_COUNT_ARG,
 	ETH_AF_XDP_PMD_ZC_ARG,
+	ETH_AF_XDP_QUEUE_IRQ_ARG,
 	NULL
 };
 
@@ -144,6 +151,21 @@ static const struct rte_eth_link pmd_link = {
 	.link_autoneg = ETH_LINK_AUTONEG
 };
 
+/* drivers supported for the queue_irq option */
+enum {I40E_DRIVER, IXGBE_DRIVER, MLX5_DRIVER, NUM_DRIVERS};
+char driver_array[NUM_DRIVERS][NAME_MAX] = {"i40e", "ixgbe", "mlx5_core"};
+
+/*
+ * function pointer template to be implemented for each driver in 'driver_array'
+ * to generate the appropriate regular expression to search for in
+ * /proc/interrupts in order to identify the IRQ number for the netdev_qid of
+ * the given interface.
+ */
+typedef
+int (*generate_driver_regex_func)(char *iface_regex_str,
+				  struct pmd_internals *internals,
+				  uint16_t netdev_qid);
+
 static inline int
 reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
 {
@@ -660,6 +682,283 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	return ret;
 }
 
+/** get interface's driver name to determine /proc/interrupts entry format */
+static int
+get_driver_name(struct pmd_internals *internals, char *driver)
+{
+	char driver_path[PATH_MAX];
+	struct stat s;
+	char link[PATH_MAX];
+	int len;
+
+	snprintf(driver_path, sizeof(driver_path),
+			"/sys/class/net/%s/device/driver", internals->if_name);
+	if (lstat(driver_path, &s)) {
+		AF_XDP_LOG(ERR, "Error reading %s: %s\n",
+					driver_path, strerror(errno));
+		return -errno;
+	}
+
+	/* driver_path should link to /sys/bus/pci/drivers/<driver_name> */
+	len = readlink(driver_path, link, PATH_MAX - 1);
+	if (len == -1) {
+		AF_XDP_LOG(ERR, "Error reading symbolic link %s: %s\n",
+					driver_path, strerror(errno));
+		return -errno;
+	}
+
+	link[len] = '\0';
+	strlcpy(driver, basename(link), NAME_MAX);
+	if (!strncmp(driver, ".", strlen(driver))) {
+		AF_XDP_LOG(ERR, "Error getting driver name from %s: %s\n",
+					link, strerror(errno));
+		return -errno;
+	}
+
+	return 0;
+}
+
+static int
+generate_ixgbe_i40e_regex(char *iface_regex_str,
+			  struct pmd_internals *internals, uint16_t netdev_qid)
+{
+	if (snprintf(iface_regex_str, 128,
+			"-%s.*-%d", internals->if_name, netdev_qid) >= 128) {
+		AF_XDP_LOG(INFO, "Cannot get interrupt for %s q %i\n",
+					internals->if_name, netdev_qid);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+generate_mlx5_regex(char *iface_regex_str, struct pmd_internals *internals,
+		    uint16_t netdev_qid)
+{
+	char pci_path[PATH_MAX];
+	char *pci;
+	int ret = -1;
+	struct stat s;
+	char *link;
+	int len;
+
+	snprintf(pci_path, sizeof(pci_path),
+			"/sys/class/net/%s/device", internals->if_name);
+	if (lstat(pci_path, &s)) {
+		AF_XDP_LOG(ERR, "Error reading %s: %s\n",
+					pci_path, strerror(errno));
+		return -errno;
+	}
+
+	/* pci_path should link to a directory whose name is the pci addr */
+	link = malloc(s.st_size + 1);
+	len = readlink(pci_path, link, PATH_MAX - 1);
+	if (len == -1) {
+		AF_XDP_LOG(ERR, "Error reading symbolic link %s: %s\n",
+					pci_path, strerror(errno));
+		ret = -errno;
+		goto out;
+	}
+
+	link[len] = '\0';
+	pci = basename(link);
+	if (!strncmp(pci, ".", strlen(pci))) {
+		AF_XDP_LOG(ERR, "Error getting pci from %s\n", link);
+		goto out;
+	}
+
+	if (snprintf(iface_regex_str, 128, ".*p%i@pci:%s", netdev_qid, pci) >=
+			128) {
+		AF_XDP_LOG(INFO, "Cannot get interrupt for %s q %i\n",
+					internals->if_name, netdev_qid);
+		goto out;
+	}
+
+	ret = 0;
+
+out:
+	if (link)
+		free(link);
+
+	return ret;
+}
+
+/*
+ * array of handlers for different drivers for generating appropriate regex
+ * format for searching /proc/interrupts
+ */
+generate_driver_regex_func driver_handlers[NUM_DRIVERS] = {
+					generate_ixgbe_i40e_regex,
+					generate_ixgbe_i40e_regex,
+					generate_mlx5_regex};
+
+/*
+ * function for getting the index into driver_handlers array that corresponds
+ * to 'driver'
+ */
+static int
+get_driver_idx(char *driver)
+{
+	for (int i = 0; i < NUM_DRIVERS; i++) {
+		if (strncmp(driver, driver_array[i], strlen(driver_array[i])))
+			continue;
+		return i;
+	}
+
+	return -1;
+}
+
+/** generate /proc/interrupts search regex based on driver type */
+static int
+generate_search_regex(const char *driver, struct pmd_internals *internals,
+		      uint16_t netdev_qid, regex_t *r)
+{
+	char iface_regex_str[128];
+	int ret = -1;
+	char *driver_dup = strdup(driver);
+	int idx = get_driver_idx(driver_dup);
+
+	if (idx == -1) {
+		AF_XDP_LOG(ERR, "Error getting driver index for %s\n",
+					internals->if_name);
+		goto out;
+	}
+
+	if (driver_handlers[idx](iface_regex_str, internals, netdev_qid)) {
+		AF_XDP_LOG(ERR, "Error getting regex string for %s\n",
+					internals->if_name);
+		goto out;
+	}
+
+	if (regcomp(r, iface_regex_str, 0)) {
+		AF_XDP_LOG(ERR, "Error computing regex %s\n", iface_regex_str);
+		goto out;
+	}
+
+	ret = 0;
+
+out:
+	free(driver_dup);
+	return ret;
+}
+
+/** get interrupt number associated with the given interface qid */
+static int
+get_interrupt_number(regex_t *r, int *interrupt,
+		     struct pmd_internals *internals)
+{
+	FILE *f_int_proc;
+	int found = 0;
+	char line[4096];
+	int ret = 0;
+
+	f_int_proc = fopen("/proc/interrupts", "r");
+	if (f_int_proc == NULL) {
+		AF_XDP_LOG(ERR, "Failed to open /proc/interrupts.\n");
+		return -1;
+	}
+
+	while (!feof(f_int_proc) && !found) {
+		/* Make sure to read a full line at a time */
+		if (fgets(line, sizeof(line), f_int_proc) == NULL ||
+				line[strlen(line) - 1] != '\n') {
+			AF_XDP_LOG(ERR, "Error reading from interrupts file\n");
+			ret = -1;
+			break;
+		}
+
+		/* Extract interrupt number from line */
+		if (regexec(r, line, 0, NULL, 0) == 0) {
+			*interrupt = atoi(line);
+			found = true;
+			AF_XDP_LOG(INFO, "Got interrupt %d for %s\n",
+						*interrupt, internals->if_name);
+		}
+	}
+
+	fclose(f_int_proc);
+
+	return ret;
+}
+
+/** affinitise interrupts for the given qid to the given coreid */
+static int
+set_irq_affinity(int coreid, struct pmd_internals *internals,
+		 uint16_t rx_queue_id, uint16_t netdev_qid, int interrupt)
+{
+	char bitmask[128];
+	char smp_affinity_filename[NAME_MAX];
+	FILE *f_int_smp_affinity;
+	int i;
+
+	/* Create affinity bitmask. Every 32 bits are separated by a comma */
+	snprintf(bitmask, sizeof(bitmask), "%x", 1 << (coreid % 32));
+	for (i = 0; i < coreid / 32; i++)
+		strlcat(bitmask, ",00000000", sizeof(bitmask));
+
+	/* Write the new affinity bitmask */
+	snprintf(smp_affinity_filename, sizeof(smp_affinity_filename),
+			"/proc/irq/%d/smp_affinity", interrupt);
+	f_int_smp_affinity = fopen(smp_affinity_filename, "w");
+	if (f_int_smp_affinity == NULL) {
+		AF_XDP_LOG(ERR, "Error opening %s\n", smp_affinity_filename);
+		return -1;
+	}
+	fwrite(bitmask, strlen(bitmask), 1, f_int_smp_affinity);
+	fclose(f_int_smp_affinity);
+	AF_XDP_LOG(INFO, "IRQs for %s ethdev queue %i (netdev queue %i)"
+				" affinitised to core %i\n",
+				internals->if_name, rx_queue_id,
+				netdev_qid, coreid);
+
+	return 0;
+}
+
+static void
+configure_irqs(struct pmd_internals *internals, uint16_t rx_queue_id)
+{
+	int coreid = internals->queue_irqs[rx_queue_id];
+	char driver[NAME_MAX];
+	uint16_t netdev_qid = rx_queue_id + internals->start_queue_idx;
+	regex_t r;
+	int interrupt;
+
+	if (coreid < 0)
+		return;
+
+	if (coreid > (get_nprocs() - 1)) {
+		AF_XDP_LOG(ERR, "Affinitisation failed - invalid coreid %i\n",
+					coreid);
+		return;
+	}
+
+	if (get_driver_name(internals, driver)) {
+		AF_XDP_LOG(ERR, "Error retrieving driver name for %s\n",
+					internals->if_name);
+		return;
+	}
+
+	if (generate_search_regex(driver, internals, netdev_qid, &r)) {
+		AF_XDP_LOG(ERR, "Error generating search regex for %s\n",
+					internals->if_name);
+		return;
+	}
+
+	if (get_interrupt_number(&r, &interrupt, internals)) {
+		AF_XDP_LOG(ERR, "Error getting interrupt number for %s\n",
+					internals->if_name);
+		return;
+	}
+
+	if (set_irq_affinity(coreid, internals, rx_queue_id, netdev_qid,
+				interrupt)) {
+		AF_XDP_LOG(ERR, "Error setting interrupt affinity for %s\n",
+					internals->if_name);
+		return;
+	}
+}
+
 static int
 eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   uint16_t rx_queue_id,
@@ -697,6 +996,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		goto err;
 	}
 
+	configure_irqs(internals, rx_queue_id);
+
 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
 	rxq->fds[0].events = POLLIN;
 
@@ -834,6 +1135,39 @@ parse_name_arg(const char *key __rte_unused,
 	return 0;
 }
 
+/** parse queue irq argument */
+static int
+parse_queue_irq_arg(const char *key __rte_unused,
+		   const char *value, void *extra_args)
+{
+	int (*queue_irqs)[RTE_MAX_QUEUES_PER_PORT] = extra_args;
+	char *parse_str = strdup(value);
+	char delimiter[] = ":";
+	char *queue_str;
+
+	queue_str = strtok(parse_str, delimiter);
+	if (queue_str != NULL && strncmp(queue_str, value, strlen(value))) {
+		char *end;
+		long queue = strtol(queue_str, &end, 10);
+
+		if (*end == '\0' && queue >= 0 &&
+				queue < RTE_MAX_QUEUES_PER_PORT) {
+			char *core_str = strtok(NULL, delimiter);
+			long core = strtol(core_str, &end, 10);
+
+			if (*end == '\0' && core >= 0 && core < get_nprocs()) {
+				(*queue_irqs)[queue] = core;
+				free(parse_str);
+				return 0;
+			}
+		}
+	}
+
+	AF_XDP_LOG(ERR, "Invalid queue_irq argument.\n");
+	free(parse_str);
+	return -1;
+}
+
 static int
 xdp_get_channels_info(const char *if_name, int *max_queues,
 				int *combined_queues)
@@ -877,7 +1211,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues,
 
 static int
 parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
-			int *queue_cnt, int *pmd_zc)
+			int *queue_cnt, int *pmd_zc,
+			int (*queue_irqs)[RTE_MAX_QUEUES_PER_PORT])
 {
 	int ret;
 
@@ -903,6 +1238,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
 	if (ret < 0)
 		goto free_kvlist;
 
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_IRQ_ARG,
+				 &parse_queue_irq_arg, queue_irqs);
+	if (ret < 0)
+		goto free_kvlist;
+
 free_kvlist:
 	rte_kvargs_free(kvlist);
 	return ret;
@@ -940,7 +1280,8 @@ get_iface_info(const char *if_name,
 
 static struct rte_eth_dev *
 init_internals(struct rte_vdev_device *dev, const char *if_name,
-			int start_queue_idx, int queue_cnt, int pmd_zc)
+			int start_queue_idx, int queue_cnt, int pmd_zc,
+			int queue_irqs[RTE_MAX_QUEUES_PER_PORT])
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
@@ -957,6 +1298,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	internals->queue_cnt = queue_cnt;
 	internals->pmd_zc = pmd_zc;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
+	memcpy(internals->queue_irqs, queue_irqs,
+		sizeof(int) * RTE_MAX_QUEUES_PER_PORT);
 
 	if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
 				  &internals->combined_queue_cnt)) {
@@ -1035,6 +1378,9 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
 	int pmd_zc = 0;
+	int queue_irqs[RTE_MAX_QUEUES_PER_PORT];
+
+	memset(queue_irqs, -1, sizeof(int) * RTE_MAX_QUEUES_PER_PORT);
 
 	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
 		rte_vdev_device_name(dev));
@@ -1062,7 +1408,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		dev->device.numa_node = rte_socket_id();
 
 	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
-			     &xsk_queue_cnt, &pmd_zc) < 0) {
+			     &xsk_queue_cnt, &pmd_zc, &queue_irqs) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -1073,7 +1419,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	}
 
 	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
-					xsk_queue_cnt, pmd_zc);
+					xsk_queue_cnt, pmd_zc, queue_irqs);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -1117,7 +1463,8 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
 			      "start_queue=<int> "
 			      "queue_count=<int> "
-			      "pmd_zero_copy=<0|1>");
+			      "pmd_zero_copy=<0|1> "
+			      "queue_irq=<int>:<int>");
 
 RTE_INIT(af_xdp_init_log)
 {
-- 
2.17.1


  parent reply	other threads:[~2019-09-19 14:17 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-19 14:15 [dpdk-dev] [PATCH 0/3] AF_XDP tx halt fix, IRQ pinning and unaligned chunks Ciara Loftus
2019-09-19 14:15 ` [dpdk-dev] [PATCH 1/3] net/af_xdp: fix Tx halt when no recv packets Ciara Loftus
2019-09-19 14:15 ` Ciara Loftus [this message]
2019-09-24 14:12   ` [dpdk-dev] [PATCH 2/3] net/af_xdp: support pinning of IRQs Ye Xiaolong
2019-09-27 13:21     ` Loftus, Ciara
2019-09-27 14:06       ` Ye Xiaolong
2019-09-24 16:42   ` Stephen Hemminger
2019-09-19 14:15 ` [dpdk-dev] [PATCH 3/3] net/af_xdp: enable support for unaligned umem chunks Ciara Loftus

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190919141520.4227-3-ciara.loftus@intel.com \
    --to=ciara.loftus@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=kevin.laatz@intel.com \
    --cc=xiaolong.ye@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).