From: Robin Jarry <rjarry@redhat.com>
To: dev@dpdk.org, Stephen Hemminger <stephen@networkplumber.org>
Subject: [PATCH dpdk 3/4] net/tap: use netlink if possible
Date: Mon, 27 Oct 2025 16:37:54 +0100 [thread overview]
Message-ID: <20251027153750.445275-9-rjarry@redhat.com> (raw)
In-Reply-To: <20251027153750.445275-6-rjarry@redhat.com>
Make netlink socket available unconditionally, not just for rte_flow.
Use netlink for get/set operations on link flags, MAC, and MTU when
available. Fall back to ioctl if netlink socket creation fails.
Signed-off-by: Robin Jarry <rjarry@redhat.com>
---
drivers/net/tap/rte_eth_tap.c | 143 ++++++++++++++++++++++++++++------
drivers/net/tap/rte_eth_tap.h | 2 +-
2 files changed, 122 insertions(+), 23 deletions(-)
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 5b98e381b424..b53c85746056 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -774,6 +774,89 @@ tap_ctrl_req2str(unsigned long request)
return "UNKNOWN";
}
+static int
+tap_nl_ctrl(struct pmd_internals *pmd, unsigned long request,
+ struct ifreq *ifr, int set, enum ctrl_mode mode)
+{
+ bool remote = pmd->remote_if_index && (mode == REMOTE_ONLY || mode == LOCAL_AND_REMOTE);
+ struct rte_ether_addr *mac;
+ int ret = 0;
+
+ switch (request) {
+ case SIOCSIFFLAGS:
+ if (mode == LOCAL_ONLY || mode == LOCAL_AND_REMOTE) {
+ ret = tap_nl_set_link_flags(pmd->nlsk_fd, pmd->if_index,
+ ifr->ifr_flags, set);
+ if (ret < 0)
+ return ret;
+ }
+ if (remote)
+ ret = tap_nl_set_link_flags(pmd->nlsk_fd, pmd->remote_if_index,
+ ifr->ifr_flags, set);
+ break;
+
+ case SIOCGIFFLAGS:
+ if (mode == REMOTE_ONLY && pmd->remote_if_index) {
+ unsigned int flags = 0;
+ ret = tap_nl_get_link_flags(pmd->nlsk_fd, pmd->remote_if_index, &flags);
+ if (ret == 0)
+ ifr->ifr_flags = flags;
+ } else {
+ unsigned int flags = 0;
+ ret = tap_nl_get_link_flags(pmd->nlsk_fd, pmd->if_index, &flags);
+ if (ret == 0)
+ ifr->ifr_flags = flags;
+ }
+ break;
+
+ case SIOCGIFHWADDR:
+ mac = (struct rte_ether_addr *)ifr->ifr_hwaddr.sa_data;
+ if (mode == REMOTE_ONLY && pmd->remote_if_index) {
+ ret = tap_nl_get_link_mac(pmd->nlsk_fd, pmd->remote_if_index, mac);
+ if (ret == 0)
+ ifr->ifr_hwaddr.sa_family = AF_LOCAL;
+ } else {
+ ret = tap_nl_get_link_mac(pmd->nlsk_fd, pmd->if_index, mac);
+ if (ret == 0)
+ ifr->ifr_hwaddr.sa_family = AF_LOCAL;
+ }
+ break;
+
+ case SIOCSIFHWADDR:
+ mac = (struct rte_ether_addr *)ifr->ifr_hwaddr.sa_data;
+ if (mode == LOCAL_ONLY || mode == LOCAL_AND_REMOTE) {
+ ret = tap_nl_set_link_mac(pmd->nlsk_fd, pmd->if_index, mac);
+ if (ret < 0)
+ return ret;
+ }
+ if (remote)
+ ret = tap_nl_set_link_mac(pmd->nlsk_fd, pmd->remote_if_index, mac);
+ break;
+
+ case SIOCSIFMTU:
+ if (mode == LOCAL_ONLY || mode == LOCAL_AND_REMOTE) {
+ ret = tap_nl_set_link_mtu(pmd->nlsk_fd, pmd->if_index, ifr->ifr_mtu);
+ if (ret < 0)
+ return ret;
+ }
+ if (remote)
+ ret = tap_nl_set_link_mtu(pmd->nlsk_fd, pmd->remote_if_index, ifr->ifr_mtu);
+ break;
+
+ default:
+ TAP_LOG(WARNING, "%s: unsupported netlink request", pmd->name);
+ return -EINVAL;
+ }
+
+ if (ret < 0) {
+ TAP_LOG(DEBUG, "%s: netlink %s failed: %s(%d)", pmd->name,
+ tap_ctrl_req2str(request), strerror(errno), errno);
+ return -errno;
+ }
+
+ return 0;
+}
+
static int
tap_ioctl(struct pmd_internals *pmd, unsigned long request,
struct ifreq *ifr, int set, enum ctrl_mode mode)
@@ -782,8 +865,6 @@ tap_ioctl(struct pmd_internals *pmd, unsigned long request,
int remote = pmd->remote_if_index &&
(mode == REMOTE_ONLY || mode == LOCAL_AND_REMOTE);
- if (!pmd->remote_if_index && mode == REMOTE_ONLY)
- return 0;
/*
* If there is a remote netdevice, apply ioctl on it, then apply it on
* the tap netdevice.
@@ -829,6 +910,14 @@ static int
tap_ctrl(struct pmd_internals *pmd, unsigned long request,
struct ifreq *ifr, int set, enum ctrl_mode mode)
{
+ if (!pmd->remote_if_index && mode == REMOTE_ONLY)
+ return 0;
+
+ /* Use netlink if available */
+ if (pmd->nlsk_fd >= 0 && pmd->if_index > 0)
+ return tap_nl_ctrl(pmd, request, ifr, set, mode);
+
+ /* Otherwise, fall back to ioctl */
return tap_ioctl(pmd, request, ifr, set, mode);
}
@@ -1138,12 +1227,15 @@ tap_dev_close(struct rte_eth_dev *dev)
if (internals->nlsk_fd != -1) {
tap_flow_flush(dev, NULL);
tap_flow_implicit_flush(internals, NULL);
- tap_nl_final(internals->nlsk_fd);
- internals->nlsk_fd = -1;
tap_flow_bpf_destroy(internals);
}
#endif
+ if (internals->nlsk_fd != -1) {
+ tap_nl_final(internals->nlsk_fd);
+ internals->nlsk_fd = -1;
+ }
+
for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
struct rx_queue *rxq = &internals->rxq[i];
@@ -1953,10 +2045,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
strlcpy(pmd->name, tap_name, sizeof(pmd->name));
pmd->type = type;
pmd->ka_fd = -1;
-
-#ifdef HAVE_TCA_FLOWER
pmd->nlsk_fd = -1;
-#endif
pmd->gso_ctx_mp = NULL;
pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
@@ -2035,26 +2124,38 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
/* Make network device persist after application exit */
pmd->persist = persist;
-#ifdef HAVE_TCA_FLOWER
/*
- * Set up everything related to rte_flow:
- * - netlink socket
- * - tap / remote if_index
- * - mandatory QDISCs
- * - rte_flow actual/implicit lists
- * - implicit rules
+ * Try to create netlink socket for better interface control.
+ * This provides ifindex-based operations and is more namespace-safe.
+ * Fall back to ioctl if netlink is not available.
*/
pmd->nlsk_fd = tap_nl_init(0);
if (pmd->nlsk_fd == -1) {
- TAP_LOG(WARNING, "%s: failed to create netlink socket.",
+ TAP_LOG(INFO, "%s: netlink unavailable, using ioctl fallback.",
+ pmd->name);
+ } else {
+ pmd->if_index = if_nametoindex(pmd->name);
+ if (!pmd->if_index) {
+ TAP_LOG(WARNING, "%s: failed to get if_index.",
+ pmd->name);
+ close(pmd->nlsk_fd);
+ pmd->nlsk_fd = -1;
+ }
+ }
+
+#ifdef HAVE_TCA_FLOWER
+ /*
+ * Set up everything related to rte_flow:
+ * - mandatory QDISCs (requires netlink)
+ * - rte_flow actual/implicit lists
+ * - implicit rules
+ */
+ if (pmd->nlsk_fd == -1) {
+ TAP_LOG(WARNING, "%s: rte_flow requires netlink support.",
pmd->name);
goto disable_rte_flow;
}
- pmd->if_index = if_nametoindex(pmd->name);
- if (!pmd->if_index) {
- TAP_LOG(ERR, "%s: failed to get if_index.", pmd->name);
- goto disable_rte_flow;
- }
+
if (qdisc_create_multiq(pmd->nlsk_fd, pmd->if_index) < 0) {
TAP_LOG(ERR, "%s: failed to create multiq qdisc.",
pmd->name);
@@ -2141,10 +2242,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
#endif
error_exit:
-#ifdef HAVE_TCA_FLOWER
if (pmd->nlsk_fd != -1)
close(pmd->nlsk_fd);
-#endif
if (pmd->ka_fd != -1)
close(pmd->ka_fd);
if (pmd->ioctl_sock != -1)
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index ce4322ad046e..bb5aa8966bb0 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -77,9 +77,9 @@ struct pmd_internals {
int remote_if_index; /* remote netdevice IF_INDEX */
int if_index; /* IF_INDEX for the port */
int ioctl_sock; /* socket for ioctl calls */
+ int nlsk_fd; /* Netlink socket fd */
#ifdef HAVE_TCA_FLOWER
- int nlsk_fd; /* Netlink socket fd */
int flow_isolate; /* 1 if flow isolation is enabled */
struct tap_rss *rss; /* BPF program */
--
2.51.0
next prev parent reply other threads:[~2025-10-27 15:38 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-27 15:37 [PATCH dpdk 0/4] net/tap: add network namespace support Robin Jarry
2025-10-27 15:37 ` [PATCH dpdk 1/4] net/tap: add netlink helpers Robin Jarry
2025-10-27 15:37 ` [PATCH dpdk 2/4] net/tap: rename internal ioctl wrapper Robin Jarry
2025-10-27 15:37 ` Robin Jarry [this message]
2025-10-27 16:06 ` [PATCH dpdk 3/4] net/tap: use netlink if possible Stephen Hemminger
2025-10-27 16:10 ` Robin Jarry
2025-10-27 16:58 ` Stephen Hemminger
2025-10-27 15:37 ` [PATCH dpdk 4/4] net/tap: detect namespace change Robin Jarry
2025-10-27 18:19 ` [PATCH dpdk v2 0/3] net/tap: add network namespace support Robin Jarry
2025-10-27 18:19 ` [PATCH dpdk v2 1/3] net/tap: add netlink helpers Robin Jarry
2025-10-27 18:19 ` [PATCH dpdk v2 2/3] net/tap: replace ioctl with netlink Robin Jarry
2025-10-27 18:19 ` [PATCH dpdk v2 3/3] net/tap: detect namespace change Robin Jarry
2025-10-27 21:55 ` [PATCH dpdk v2 0/3] net/tap: add network namespace support Stephen Hemminger
2025-10-27 22:16 ` [PATCH dpdk v3 " Robin Jarry
2025-10-27 22:16 ` [PATCH dpdk v3 1/3] net/tap: add netlink helpers Robin Jarry
2025-10-27 22:16 ` [PATCH dpdk v3 2/3] net/tap: replace ioctl with netlink Robin Jarry
2025-10-27 22:16 ` [PATCH dpdk v3 3/3] net/tap: detect namespace change Robin Jarry
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251027153750.445275-9-rjarry@redhat.com \
--to=rjarry@redhat.com \
--cc=dev@dpdk.org \
--cc=stephen@networkplumber.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).