DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v1] net/tap: add tun support
@ 2018-02-20 23:12 Vipin Varghese
  2018-02-22 12:22 ` [dpdk-dev] [dpdk-dev,v1] " Pascal Mazon
  2018-02-26  6:15 ` [dpdk-dev] [PATCH v2] " Vipin Varghese
  0 siblings, 2 replies; 21+ messages in thread
From: Vipin Varghese @ 2018-02-20 23:12 UTC (permalink / raw)
  To: dev, john.mcnamara, ferruh.yigit; +Cc: marko.kovacevic, Vipin Varghese

The change adds TUN PMD logic to the existing TAP PMD. TUN PMD can
be initialized with 'net_tunX' where 'X' represents unique id. PMD
supports argument interface, while MAC address and remote are not
supported.

Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
---
 doc/guides/nics/tap.rst       |  15 ++++-
 drivers/net/tap/rte_eth_tap.c | 132 +++++++++++++++++++++++++++++++++---------
 2 files changed, 118 insertions(+), 29 deletions(-)

diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
index ea61be3..fe337c3 100644
--- a/doc/guides/nics/tap.rst
+++ b/doc/guides/nics/tap.rst
@@ -1,8 +1,8 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
     Copyright(c) 2016 Intel Corporation.
 
-Tap Poll Mode Driver
-====================
+Tun|Tap Poll Mode Driver
+========================
 
 The ``rte_eth_tap.c`` PMD creates a device using TAP interfaces on the
 local host. The PMD allows for DPDK and the host to communicate using a raw
@@ -77,6 +77,17 @@ can utilize that stack to handle the network protocols. Plus you would be able
 to address the interface using an IP address assigned to the internal
 interface.
 
+The TUN PMD allows user to create a TUN device on host. The PMD allows user
+to transmit and recieve packets via DPDK API calls with L3 header and payload.
+The devices in host can be accessed via ``ifconfig`` or ``ip`` command. TUN
+interfaces are passed to DPDK ``rte_eal_init`` arguments as ``--vdev=net_tunX``,
+where X stands for unique id, example::
+
+   --vdev=net_tun0 --vdev=net_tun1,iface=foo1, ...
+
+Unlike TAP PMD, TUN PMD does not support user arguments as ``MAC`` or ``remote`` user
+options. Default interface name is ``dtunX``, where X stands for unique id.
+
 Flow API support
 ----------------
 
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index f09db0e..42c9db4 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -42,6 +42,7 @@
 /* Linux based path to the TUN device */
 #define TUN_TAP_DEV_PATH        "/dev/net/tun"
 #define DEFAULT_TAP_NAME        "dtap"
+#define DEFAULT_TUN_NAME        "dtun"
 
 #define ETH_TAP_IFACE_ARG       "iface"
 #define ETH_TAP_REMOTE_ARG      "remote"
@@ -49,6 +50,7 @@
 #define ETH_TAP_MAC_FIXED       "fixed"
 
 static struct rte_vdev_driver pmd_tap_drv;
+static struct rte_vdev_driver pmd_tun_drv;
 
 static const char *valid_arguments[] = {
 	ETH_TAP_IFACE_ARG,
@@ -58,6 +60,10 @@
 };
 
 static int tap_unit;
+static int tun_unit;
+
+static int tap_type;
+static char tuntap_name[8];
 
 static volatile uint32_t tap_trigger;	/* Rx trigger */
 
@@ -104,24 +110,26 @@ enum ioctl_mode {
 	 * Do not set IFF_NO_PI as packet information header will be needed
 	 * to check if a received packet has been truncated.
 	 */
-	ifr.ifr_flags = IFF_TAP;
+	ifr.ifr_flags = (tap_type) ? IFF_TAP : IFF_TUN;
 	snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
 
 	RTE_LOG(DEBUG, PMD, "ifr_name '%s'\n", ifr.ifr_name);
 
 	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
 	if (fd < 0) {
-		RTE_LOG(ERR, PMD, "Unable to create TAP interface\n");
+		RTE_LOG(ERR, PMD, "Unable to create %s interface\n",
+			tuntap_name);
 		goto error;
 	}
 
 #ifdef IFF_MULTI_QUEUE
 	/* Grab the TUN features to verify we can work multi-queue */
 	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
-		RTE_LOG(ERR, PMD, "TAP unable to get TUN/TAP features\n");
+		RTE_LOG(ERR, PMD, "%s unable to get TUN/TAP features\n",
+			tuntap_name);
 		goto error;
 	}
-	RTE_LOG(DEBUG, PMD, "  TAP Features %08x\n", features);
+	RTE_LOG(DEBUG, PMD, " %s Features %08x\n", tuntap_name, features);
 
 	if (features & IFF_MULTI_QUEUE) {
 		RTE_LOG(DEBUG, PMD, "  Multi-queue support for %d queues\n",
@@ -1108,7 +1116,7 @@ enum ioctl_mode {
 		tmp = &(*tmp)->next;
 	}
 
-	RTE_LOG(DEBUG, PMD, "  RX TAP device name %s, qid %d on fd %d\n",
+	RTE_LOG(DEBUG, PMD, "  RX TUNTAP device name %s, qid %d on fd %d\n",
 		internals->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
 
 	return 0;
@@ -1163,7 +1171,7 @@ enum ioctl_mode {
 	if (ret == -1)
 		return -1;
 	RTE_LOG(DEBUG, PMD,
-		"  TX TAP device name %s, qid %d on fd %d csum %s\n",
+		"  TX TUNTAP device name %s, qid %d on fd %d csum %s\n",
 		internals->name, tx_queue_id, internals->txq[tx_queue_id].fd,
 		txq->csum ? "on" : "off");
 
@@ -1346,17 +1354,19 @@ enum ioctl_mode {
 	struct ifreq ifr;
 	int i;
 
-	RTE_LOG(DEBUG, PMD, "  TAP device on numa %u\n", rte_socket_id());
+	RTE_LOG(DEBUG, PMD, "  %s device on numa %u\n",
+		tuntap_name, rte_socket_id());
 
 	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
 	if (!data) {
-		RTE_LOG(ERR, PMD, "TAP Failed to allocate data\n");
+		RTE_LOG(ERR, PMD, "%s Failed to allocate data\n", tuntap_name);
 		goto error_exit_nodev;
 	}
 
 	dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
 	if (!dev) {
-		RTE_LOG(ERR, PMD, "TAP Unable to allocate device struct\n");
+		RTE_LOG(ERR, PMD, "%s Unable to allocate device struct\n",
+			tuntap_name);
 		goto error_exit_nodev;
 	}
 
@@ -1367,8 +1377,8 @@ enum ioctl_mode {
 	pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
 	if (pmd->ioctl_sock == -1) {
 		RTE_LOG(ERR, PMD,
-			"TAP Unable to get a socket for management: %s\n",
-			strerror(errno));
+			"%s Unable to get a socket for management: %s\n",
+			tuntap_name, strerror(errno));
 		goto error_exit;
 	}
 
@@ -1399,15 +1409,17 @@ enum ioctl_mode {
 		pmd->txq[i].fd = -1;
 	}
 
-	if (fixed_mac_type) {
-		/* fixed mac = 00:64:74:61:70:<iface_idx> */
-		static int iface_idx;
-		char mac[ETHER_ADDR_LEN] = "\0dtap";
+	if (tap_type) {
+		if (fixed_mac_type) {
+			/* fixed mac = 00:64:74:61:70:<iface_idx> */
+			static int iface_idx;
+			char mac[ETHER_ADDR_LEN] = "\0dtap";
 
-		mac[ETHER_ADDR_LEN - 1] = iface_idx++;
-		rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
-	} else {
-		eth_random_addr((uint8_t *)&pmd->eth_addr);
+			mac[ETHER_ADDR_LEN - 1] = iface_idx++;
+			rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
+		} else {
+			eth_random_addr((uint8_t *)&pmd->eth_addr);
+		}
 	}
 
 	/* Immediately create the netdevice (this will create the 1st queue). */
@@ -1422,11 +1434,13 @@ enum ioctl_mode {
 	if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE) < 0)
 		goto error_exit;
 
-	memset(&ifr, 0, sizeof(struct ifreq));
-	ifr.ifr_hwaddr.sa_family = AF_LOCAL;
-	rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, ETHER_ADDR_LEN);
-	if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
-		goto error_exit;
+	if (tap_type) {
+		memset(&ifr, 0, sizeof(struct ifreq));
+		ifr.ifr_hwaddr.sa_family = AF_LOCAL;
+		rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, ETHER_ADDR_LEN);
+		if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
+			goto error_exit;
+	}
 
 	/*
 	 * Set up everything related to rte_flow:
@@ -1533,8 +1547,8 @@ enum ioctl_mode {
 	rte_eth_dev_release_port(dev);
 
 error_exit_nodev:
-	RTE_LOG(ERR, PMD, "TAP Unable to initialize %s\n",
-		rte_vdev_device_name(vdev));
+	RTE_LOG(ERR, PMD, "%s Unable to initialize %s\n",
+		tuntap_name, rte_vdev_device_name(vdev));
 
 	rte_free(data);
 	return -EINVAL;
@@ -1580,6 +1594,61 @@ enum ioctl_mode {
 	return 0;
 }
 
+/* Open a TUN interface device.
+ */
+static int
+rte_pmd_tun_probe(struct rte_vdev_device *dev)
+{
+	const char *name, *params;
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	char tun_name[RTE_ETH_NAME_MAX_LEN];
+	char remote_iface[RTE_ETH_NAME_MAX_LEN];
+
+	tap_type = 0;
+	strcpy(tuntap_name, "TUN");
+
+	name = rte_vdev_device_name(dev);
+	params = rte_vdev_device_args(dev);
+
+	memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN);
+	snprintf(tun_name, sizeof(tun_name), "%s%d",
+		DEFAULT_TUN_NAME, tun_unit++);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(DEBUG, PMD, "parameters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+					ETH_TAP_IFACE_ARG,
+					&set_interface_name,
+					tun_name);
+
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = ETH_SPEED_NUM_10G;
+
+	RTE_LOG(NOTICE, PMD, "Initializing pmd_tun for %s as %s\n",
+		name, tun_name);
+
+	ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(ERR, PMD, "Failed to create pmd for %s as %s\n",
+			name, tun_name);
+		tun_unit--; /* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
 /* Open a TAP interface device.
  */
 static int
@@ -1593,6 +1662,9 @@ enum ioctl_mode {
 	char remote_iface[RTE_ETH_NAME_MAX_LEN];
 	int fixed_mac_type = 0;
 
+	tap_type = 1;
+	strcpy(tuntap_name, "TAP");
+
 	name = rte_vdev_device_name(dev);
 	params = rte_vdev_device_args(dev);
 
@@ -1652,7 +1724,7 @@ enum ioctl_mode {
 	return ret;
 }
 
-/* detach a TAP device.
+/* detach a TUNTAP device.
  */
 static int
 rte_pmd_tap_remove(struct rte_vdev_device *dev)
@@ -1695,11 +1767,17 @@ enum ioctl_mode {
 	return 0;
 }
 
+static struct rte_vdev_driver pmd_tun_drv = {
+	.probe = rte_pmd_tun_probe,
+	.remove = rte_pmd_tap_remove,
+};
+
 static struct rte_vdev_driver pmd_tap_drv = {
 	.probe = rte_pmd_tap_probe,
 	.remove = rte_pmd_tap_remove,
 };
 RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
+RTE_PMD_REGISTER_VDEV(net_tun, pmd_tun_drv);
 RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
 RTE_PMD_REGISTER_PARAM_STRING(net_tap,
 			      ETH_TAP_IFACE_ARG "=<string> "
-- 
1.9.1

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [dpdk-dev,v1] net/tap: add tun support
  2018-02-20 23:12 [dpdk-dev] [PATCH v1] net/tap: add tun support Vipin Varghese
@ 2018-02-22 12:22 ` Pascal Mazon
  2018-02-26 11:01   ` Varghese, Vipin
  2018-02-26  6:15 ` [dpdk-dev] [PATCH v2] " Vipin Varghese
  1 sibling, 1 reply; 21+ messages in thread
From: Pascal Mazon @ 2018-02-22 12:22 UTC (permalink / raw)
  To: Vipin Varghese, dev, john.mcnamara, ferruh.yigit; +Cc: marko.kovacevic

Personally, I'm mostly ok with this.
I added a couple of comments inline.

On 21/02/2018 00:12, Vipin Varghese wrote:
> The change adds TUN PMD logic to the existing TAP PMD. TUN PMD can
> be initialized with 'net_tunX' where 'X' represents unique id. PMD
> supports argument interface, while MAC address and remote are not
> supported.
>
> Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
> ---
>  doc/guides/nics/tap.rst       |  15 ++++-
>  drivers/net/tap/rte_eth_tap.c | 132 +++++++++++++++++++++++++++++++++---------
>  2 files changed, 118 insertions(+), 29 deletions(-)
>
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> index ea61be3..fe337c3 100644
> --- a/doc/guides/nics/tap.rst
> +++ b/doc/guides/nics/tap.rst
> @@ -1,8 +1,8 @@
>  ..  SPDX-License-Identifier: BSD-3-Clause
>      Copyright(c) 2016 Intel Corporation.
>  
> -Tap Poll Mode Driver
> -====================
> +Tun|Tap Poll Mode Driver
> +========================
>  
>  The ``rte_eth_tap.c`` PMD creates a device using TAP interfaces on the
>  local host. The PMD allows for DPDK and the host to communicate using a raw
> @@ -77,6 +77,17 @@ can utilize that stack to handle the network protocols. Plus you would be able
>  to address the interface using an IP address assigned to the internal
>  interface.
>  
> +The TUN PMD allows user to create a TUN device on host. The PMD allows user
> +to transmit and recieve packets via DPDK API calls with L3 header and payload.
s/recieve/receive/
> +The devices in host can be accessed via ``ifconfig`` or ``ip`` command. TUN
> +interfaces are passed to DPDK ``rte_eal_init`` arguments as ``--vdev=net_tunX``,
> +where X stands for unique id, example::
> +
> +   --vdev=net_tun0 --vdev=net_tun1,iface=foo1, ...
> +
> +Unlike TAP PMD, TUN PMD does not support user arguments as ``MAC`` or ``remote`` user
> +options. Default interface name is ``dtunX``, where X stands for unique id.
> +
>  Flow API support
>  ----------------
>  
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index f09db0e..42c9db4 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -42,6 +42,7 @@
>  /* Linux based path to the TUN device */
>  #define TUN_TAP_DEV_PATH        "/dev/net/tun"
>  #define DEFAULT_TAP_NAME        "dtap"
> +#define DEFAULT_TUN_NAME        "dtun"
>  
>  #define ETH_TAP_IFACE_ARG       "iface"
>  #define ETH_TAP_REMOTE_ARG      "remote"
> @@ -49,6 +50,7 @@
>  #define ETH_TAP_MAC_FIXED       "fixed"
>  
>  static struct rte_vdev_driver pmd_tap_drv;
> +static struct rte_vdev_driver pmd_tun_drv;
>  
>  static const char *valid_arguments[] = {
>  	ETH_TAP_IFACE_ARG,
> @@ -58,6 +60,10 @@
>  };
>  
>  static int tap_unit;
> +static int tun_unit;
> +
> +static int tap_type;
I'm not a huge fan of considering "tap_type = 1 means it's a TAP device".
Wouldn't it be clearer to have a tuntap_type variable and an enum
TUNTAP_TYPE with the two types;
then checking  for tuntap_type == TUN_TYPE or tuntap == TAP_TYPE?
It's not a deal breaker though, just a consideration.
> +static char tuntap_name[8];
>  
>  static volatile uint32_t tap_trigger;	/* Rx trigger */
>  
> @@ -104,24 +110,26 @@ enum ioctl_mode {
>  	 * Do not set IFF_NO_PI as packet information header will be needed
>  	 * to check if a received packet has been truncated.
>  	 */
> -	ifr.ifr_flags = IFF_TAP;
> +	ifr.ifr_flags = (tap_type) ? IFF_TAP : IFF_TUN;
>  	snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
>  
>  	RTE_LOG(DEBUG, PMD, "ifr_name '%s'\n", ifr.ifr_name);
>  
>  	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
>  	if (fd < 0) {
> -		RTE_LOG(ERR, PMD, "Unable to create TAP interface\n");
> +		RTE_LOG(ERR, PMD, "Unable to create %s interface\n",
> +			tuntap_name);
>  		goto error;
>  	}
>  
>  #ifdef IFF_MULTI_QUEUE
>  	/* Grab the TUN features to verify we can work multi-queue */
>  	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
> -		RTE_LOG(ERR, PMD, "TAP unable to get TUN/TAP features\n");
> +		RTE_LOG(ERR, PMD, "%s unable to get TUN/TAP features\n",
> +			tuntap_name);
>  		goto error;
>  	}
> -	RTE_LOG(DEBUG, PMD, "  TAP Features %08x\n", features);
> +	RTE_LOG(DEBUG, PMD, " %s Features %08x\n", tuntap_name, features);
>  
>  	if (features & IFF_MULTI_QUEUE) {
>  		RTE_LOG(DEBUG, PMD, "  Multi-queue support for %d queues\n",
> @@ -1108,7 +1116,7 @@ enum ioctl_mode {
>  		tmp = &(*tmp)->next;
>  	}
>  
> -	RTE_LOG(DEBUG, PMD, "  RX TAP device name %s, qid %d on fd %d\n",
> +	RTE_LOG(DEBUG, PMD, "  RX TUNTAP device name %s, qid %d on fd %d\n",
>  		internals->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
>  
>  	return 0;
> @@ -1163,7 +1171,7 @@ enum ioctl_mode {
>  	if (ret == -1)
>  		return -1;
>  	RTE_LOG(DEBUG, PMD,
> -		"  TX TAP device name %s, qid %d on fd %d csum %s\n",
> +		"  TX TUNTAP device name %s, qid %d on fd %d csum %s\n",
>  		internals->name, tx_queue_id, internals->txq[tx_queue_id].fd,
>  		txq->csum ? "on" : "off");
>  
> @@ -1346,17 +1354,19 @@ enum ioctl_mode {
>  	struct ifreq ifr;
>  	int i;
>  
> -	RTE_LOG(DEBUG, PMD, "  TAP device on numa %u\n", rte_socket_id());
> +	RTE_LOG(DEBUG, PMD, "  %s device on numa %u\n",
> +		tuntap_name, rte_socket_id());
>  
>  	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
>  	if (!data) {
> -		RTE_LOG(ERR, PMD, "TAP Failed to allocate data\n");
> +		RTE_LOG(ERR, PMD, "%s Failed to allocate data\n", tuntap_name);
>  		goto error_exit_nodev;
>  	}
>  
>  	dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
>  	if (!dev) {
> -		RTE_LOG(ERR, PMD, "TAP Unable to allocate device struct\n");
> +		RTE_LOG(ERR, PMD, "%s Unable to allocate device struct\n",
> +			tuntap_name);
>  		goto error_exit_nodev;
>  	}
>  
> @@ -1367,8 +1377,8 @@ enum ioctl_mode {
>  	pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
>  	if (pmd->ioctl_sock == -1) {
>  		RTE_LOG(ERR, PMD,
> -			"TAP Unable to get a socket for management: %s\n",
> -			strerror(errno));
> +			"%s Unable to get a socket for management: %s\n",
> +			tuntap_name, strerror(errno));
>  		goto error_exit;
>  	}
>  
> @@ -1399,15 +1409,17 @@ enum ioctl_mode {
>  		pmd->txq[i].fd = -1;
>  	}
>  
> -	if (fixed_mac_type) {
> -		/* fixed mac = 00:64:74:61:70:<iface_idx> */
> -		static int iface_idx;
> -		char mac[ETHER_ADDR_LEN] = "\0dtap";
> +	if (tap_type) {
> +		if (fixed_mac_type) {
> +			/* fixed mac = 00:64:74:61:70:<iface_idx> */
> +			static int iface_idx;
> +			char mac[ETHER_ADDR_LEN] = "\0dtap";
>  
> -		mac[ETHER_ADDR_LEN - 1] = iface_idx++;
> -		rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
> -	} else {
> -		eth_random_addr((uint8_t *)&pmd->eth_addr);
> +			mac[ETHER_ADDR_LEN - 1] = iface_idx++;
> +			rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
> +		} else {
> +			eth_random_addr((uint8_t *)&pmd->eth_addr);
> +		}
>  	}
>  
>  	/* Immediately create the netdevice (this will create the 1st queue). */
> @@ -1422,11 +1434,13 @@ enum ioctl_mode {
>  	if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE) < 0)
>  		goto error_exit;
>  
> -	memset(&ifr, 0, sizeof(struct ifreq));
> -	ifr.ifr_hwaddr.sa_family = AF_LOCAL;
> -	rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, ETHER_ADDR_LEN);
> -	if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
> -		goto error_exit;
> +	if (tap_type) {
> +		memset(&ifr, 0, sizeof(struct ifreq));
> +		ifr.ifr_hwaddr.sa_family = AF_LOCAL;
> +		rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, ETHER_ADDR_LEN);
> +		if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
> +			goto error_exit;
> +	}
>  
>  	/*
>  	 * Set up everything related to rte_flow:
> @@ -1533,8 +1547,8 @@ enum ioctl_mode {
>  	rte_eth_dev_release_port(dev);
>  
>  error_exit_nodev:
> -	RTE_LOG(ERR, PMD, "TAP Unable to initialize %s\n",
> -		rte_vdev_device_name(vdev));
> +	RTE_LOG(ERR, PMD, "%s Unable to initialize %s\n",
> +		tuntap_name, rte_vdev_device_name(vdev));
>  
>  	rte_free(data);
>  	return -EINVAL;
> @@ -1580,6 +1594,61 @@ enum ioctl_mode {
>  	return 0;
>  }
>  
> +/* Open a TUN interface device.
> + */
> +static int
> +rte_pmd_tun_probe(struct rte_vdev_device *dev)
> +{
> +	const char *name, *params;
> +	int ret;
> +	struct rte_kvargs *kvlist = NULL;
> +	char tun_name[RTE_ETH_NAME_MAX_LEN];
> +	char remote_iface[RTE_ETH_NAME_MAX_LEN];
> +
> +	tap_type = 0;
> +	strcpy(tuntap_name, "TUN");
> +
> +	name = rte_vdev_device_name(dev);
> +	params = rte_vdev_device_args(dev);
> +
> +	memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN);
> +	snprintf(tun_name, sizeof(tun_name), "%s%d",
> +		DEFAULT_TUN_NAME, tun_unit++);
> +
> +	if (params && (params[0] != '\0')) {
> +		RTE_LOG(DEBUG, PMD, "parameters (%s)\n", params);
> +
> +		kvlist = rte_kvargs_parse(params, valid_arguments);
> +		if (kvlist) {
> +			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
> +				ret = rte_kvargs_process(kvlist,
> +					ETH_TAP_IFACE_ARG,
> +					&set_interface_name,
> +					tun_name);
> +
> +				if (ret == -1)
> +					goto leave;
> +			}
> +		}
> +	}
> +	pmd_link.link_speed = ETH_SPEED_NUM_10G;
> +
> +	RTE_LOG(NOTICE, PMD, "Initializing pmd_tun for %s as %s\n",
> +		name, tun_name);
> +
> +	ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0);
> +
> +leave:
> +	if (ret == -1) {
> +		RTE_LOG(ERR, PMD, "Failed to create pmd for %s as %s\n",
> +			name, tun_name);
> +		tun_unit--; /* Restore the unit number */
> +	}
> +	rte_kvargs_free(kvlist);
> +
> +	return ret;
> +}
> +
>  /* Open a TAP interface device.
>   */
>  static int
> @@ -1593,6 +1662,9 @@ enum ioctl_mode {
>  	char remote_iface[RTE_ETH_NAME_MAX_LEN];
>  	int fixed_mac_type = 0;
>  
> +	tap_type = 1;
> +	strcpy(tuntap_name, "TAP");
> +
>  	name = rte_vdev_device_name(dev);
>  	params = rte_vdev_device_args(dev);
>  
> @@ -1652,7 +1724,7 @@ enum ioctl_mode {
>  	return ret;
>  }
>  
> -/* detach a TAP device.
> +/* detach a TUNTAP device.
>   */
>  static int
>  rte_pmd_tap_remove(struct rte_vdev_device *dev)
> @@ -1695,11 +1767,17 @@ enum ioctl_mode {
>  	return 0;
>  }
>  
> +static struct rte_vdev_driver pmd_tun_drv = {
> +	.probe = rte_pmd_tun_probe,
> +	.remove = rte_pmd_tap_remove,
> +};
> +
>  static struct rte_vdev_driver pmd_tap_drv = {
>  	.probe = rte_pmd_tap_probe,
>  	.remove = rte_pmd_tap_remove,
>  };
>  RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
> +RTE_PMD_REGISTER_VDEV(net_tun, pmd_tun_drv);
>  RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
>  RTE_PMD_REGISTER_PARAM_STRING(net_tap,
>  			      ETH_TAP_IFACE_ARG "=<string> "
The rest looks fine.

Regards,
Pascal

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [dpdk-dev] [PATCH v2] net/tap: add tun support
  2018-02-20 23:12 [dpdk-dev] [PATCH v1] net/tap: add tun support Vipin Varghese
  2018-02-22 12:22 ` [dpdk-dev] [dpdk-dev,v1] " Pascal Mazon
@ 2018-02-26  6:15 ` Vipin Varghese
  2018-02-27 13:06   ` Pascal Mazon
  2018-04-02 21:37   ` [dpdk-dev] [PATCH 1/2] " Vipin Varghese
  1 sibling, 2 replies; 21+ messages in thread
From: Vipin Varghese @ 2018-02-26  6:15 UTC (permalink / raw)
  To: dev, ferruh.yigit, pascal.mazon
  Cc: john.mcnamara, marko.kovacevic, Vipin Varghese

The change adds TUN PMD logic to the existing TAP PMD. TUN PMD can
be initialized with 'net_tunX' where 'X' represents unique id. PMD
supports argument interface, while MAC address and remote are not
supported.

Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
---

Changes in V2:
 - updated the documentation word error - Pascal
---
 doc/guides/nics/tap.rst       |  15 ++++-
 drivers/net/tap/rte_eth_tap.c | 132 +++++++++++++++++++++++++++++++++---------
 2 files changed, 118 insertions(+), 29 deletions(-)

diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
index ea61be3..0fbbbba 100644
--- a/doc/guides/nics/tap.rst
+++ b/doc/guides/nics/tap.rst
@@ -1,8 +1,8 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
     Copyright(c) 2016 Intel Corporation.
 
-Tap Poll Mode Driver
-====================
+Tun|Tap Poll Mode Driver
+========================
 
 The ``rte_eth_tap.c`` PMD creates a device using TAP interfaces on the
 local host. The PMD allows for DPDK and the host to communicate using a raw
@@ -77,6 +77,17 @@ can utilize that stack to handle the network protocols. Plus you would be able
 to address the interface using an IP address assigned to the internal
 interface.
 
+The TUN PMD allows user to create a TUN device on host. The PMD allows user
+to transmit and receive packets via DPDK API calls with L3 header and payload.
+The devices in host can be accessed via ``ifconfig`` or ``ip`` command. TUN
+interfaces are passed to DPDK ``rte_eal_init`` arguments as ``--vdev=net_tunX``,
+where X stands for unique id, example::
+
+   --vdev=net_tun0 --vdev=net_tun1,iface=foo1, ...
+
+Unlike TAP PMD, TUN PMD does not support user arguments as ``MAC`` or ``remote`` user
+options. Default interface name is ``dtunX``, where X stands for unique id.
+
 Flow API support
 ----------------
 
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index f09db0e..42c9db4 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -42,6 +42,7 @@
 /* Linux based path to the TUN device */
 #define TUN_TAP_DEV_PATH        "/dev/net/tun"
 #define DEFAULT_TAP_NAME        "dtap"
+#define DEFAULT_TUN_NAME        "dtun"
 
 #define ETH_TAP_IFACE_ARG       "iface"
 #define ETH_TAP_REMOTE_ARG      "remote"
@@ -49,6 +50,7 @@
 #define ETH_TAP_MAC_FIXED       "fixed"
 
 static struct rte_vdev_driver pmd_tap_drv;
+static struct rte_vdev_driver pmd_tun_drv;
 
 static const char *valid_arguments[] = {
 	ETH_TAP_IFACE_ARG,
@@ -58,6 +60,10 @@
 };
 
 static int tap_unit;
+static int tun_unit;
+
+static int tap_type;
+static char tuntap_name[8];
 
 static volatile uint32_t tap_trigger;	/* Rx trigger */
 
@@ -104,24 +110,26 @@ enum ioctl_mode {
 	 * Do not set IFF_NO_PI as packet information header will be needed
 	 * to check if a received packet has been truncated.
 	 */
-	ifr.ifr_flags = IFF_TAP;
+	ifr.ifr_flags = (tap_type) ? IFF_TAP : IFF_TUN;
 	snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
 
 	RTE_LOG(DEBUG, PMD, "ifr_name '%s'\n", ifr.ifr_name);
 
 	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
 	if (fd < 0) {
-		RTE_LOG(ERR, PMD, "Unable to create TAP interface\n");
+		RTE_LOG(ERR, PMD, "Unable to create %s interface\n",
+			tuntap_name);
 		goto error;
 	}
 
 #ifdef IFF_MULTI_QUEUE
 	/* Grab the TUN features to verify we can work multi-queue */
 	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
-		RTE_LOG(ERR, PMD, "TAP unable to get TUN/TAP features\n");
+		RTE_LOG(ERR, PMD, "%s unable to get TUN/TAP features\n",
+			tuntap_name);
 		goto error;
 	}
-	RTE_LOG(DEBUG, PMD, "  TAP Features %08x\n", features);
+	RTE_LOG(DEBUG, PMD, " %s Features %08x\n", tuntap_name, features);
 
 	if (features & IFF_MULTI_QUEUE) {
 		RTE_LOG(DEBUG, PMD, "  Multi-queue support for %d queues\n",
@@ -1108,7 +1116,7 @@ enum ioctl_mode {
 		tmp = &(*tmp)->next;
 	}
 
-	RTE_LOG(DEBUG, PMD, "  RX TAP device name %s, qid %d on fd %d\n",
+	RTE_LOG(DEBUG, PMD, "  RX TUNTAP device name %s, qid %d on fd %d\n",
 		internals->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
 
 	return 0;
@@ -1163,7 +1171,7 @@ enum ioctl_mode {
 	if (ret == -1)
 		return -1;
 	RTE_LOG(DEBUG, PMD,
-		"  TX TAP device name %s, qid %d on fd %d csum %s\n",
+		"  TX TUNTAP device name %s, qid %d on fd %d csum %s\n",
 		internals->name, tx_queue_id, internals->txq[tx_queue_id].fd,
 		txq->csum ? "on" : "off");
 
@@ -1346,17 +1354,19 @@ enum ioctl_mode {
 	struct ifreq ifr;
 	int i;
 
-	RTE_LOG(DEBUG, PMD, "  TAP device on numa %u\n", rte_socket_id());
+	RTE_LOG(DEBUG, PMD, "  %s device on numa %u\n",
+		tuntap_name, rte_socket_id());
 
 	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
 	if (!data) {
-		RTE_LOG(ERR, PMD, "TAP Failed to allocate data\n");
+		RTE_LOG(ERR, PMD, "%s Failed to allocate data\n", tuntap_name);
 		goto error_exit_nodev;
 	}
 
 	dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
 	if (!dev) {
-		RTE_LOG(ERR, PMD, "TAP Unable to allocate device struct\n");
+		RTE_LOG(ERR, PMD, "%s Unable to allocate device struct\n",
+			tuntap_name);
 		goto error_exit_nodev;
 	}
 
@@ -1367,8 +1377,8 @@ enum ioctl_mode {
 	pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
 	if (pmd->ioctl_sock == -1) {
 		RTE_LOG(ERR, PMD,
-			"TAP Unable to get a socket for management: %s\n",
-			strerror(errno));
+			"%s Unable to get a socket for management: %s\n",
+			tuntap_name, strerror(errno));
 		goto error_exit;
 	}
 
@@ -1399,15 +1409,17 @@ enum ioctl_mode {
 		pmd->txq[i].fd = -1;
 	}
 
-	if (fixed_mac_type) {
-		/* fixed mac = 00:64:74:61:70:<iface_idx> */
-		static int iface_idx;
-		char mac[ETHER_ADDR_LEN] = "\0dtap";
+	if (tap_type) {
+		if (fixed_mac_type) {
+			/* fixed mac = 00:64:74:61:70:<iface_idx> */
+			static int iface_idx;
+			char mac[ETHER_ADDR_LEN] = "\0dtap";
 
-		mac[ETHER_ADDR_LEN - 1] = iface_idx++;
-		rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
-	} else {
-		eth_random_addr((uint8_t *)&pmd->eth_addr);
+			mac[ETHER_ADDR_LEN - 1] = iface_idx++;
+			rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
+		} else {
+			eth_random_addr((uint8_t *)&pmd->eth_addr);
+		}
 	}
 
 	/* Immediately create the netdevice (this will create the 1st queue). */
@@ -1422,11 +1434,13 @@ enum ioctl_mode {
 	if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE) < 0)
 		goto error_exit;
 
-	memset(&ifr, 0, sizeof(struct ifreq));
-	ifr.ifr_hwaddr.sa_family = AF_LOCAL;
-	rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, ETHER_ADDR_LEN);
-	if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
-		goto error_exit;
+	if (tap_type) {
+		memset(&ifr, 0, sizeof(struct ifreq));
+		ifr.ifr_hwaddr.sa_family = AF_LOCAL;
+		rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, ETHER_ADDR_LEN);
+		if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
+			goto error_exit;
+	}
 
 	/*
 	 * Set up everything related to rte_flow:
@@ -1533,8 +1547,8 @@ enum ioctl_mode {
 	rte_eth_dev_release_port(dev);
 
 error_exit_nodev:
-	RTE_LOG(ERR, PMD, "TAP Unable to initialize %s\n",
-		rte_vdev_device_name(vdev));
+	RTE_LOG(ERR, PMD, "%s Unable to initialize %s\n",
+		tuntap_name, rte_vdev_device_name(vdev));
 
 	rte_free(data);
 	return -EINVAL;
@@ -1580,6 +1594,61 @@ enum ioctl_mode {
 	return 0;
 }
 
+/* Open a TUN interface device.
+ */
+static int
+rte_pmd_tun_probe(struct rte_vdev_device *dev)
+{
+	const char *name, *params;
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	char tun_name[RTE_ETH_NAME_MAX_LEN];
+	char remote_iface[RTE_ETH_NAME_MAX_LEN];
+
+	tap_type = 0;
+	strcpy(tuntap_name, "TUN");
+
+	name = rte_vdev_device_name(dev);
+	params = rte_vdev_device_args(dev);
+
+	memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN);
+	snprintf(tun_name, sizeof(tun_name), "%s%d",
+		DEFAULT_TUN_NAME, tun_unit++);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(DEBUG, PMD, "parameters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+					ETH_TAP_IFACE_ARG,
+					&set_interface_name,
+					tun_name);
+
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = ETH_SPEED_NUM_10G;
+
+	RTE_LOG(NOTICE, PMD, "Initializing pmd_tun for %s as %s\n",
+		name, tun_name);
+
+	ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(ERR, PMD, "Failed to create pmd for %s as %s\n",
+			name, tun_name);
+		tun_unit--; /* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
 /* Open a TAP interface device.
  */
 static int
@@ -1593,6 +1662,9 @@ enum ioctl_mode {
 	char remote_iface[RTE_ETH_NAME_MAX_LEN];
 	int fixed_mac_type = 0;
 
+	tap_type = 1;
+	strcpy(tuntap_name, "TAP");
+
 	name = rte_vdev_device_name(dev);
 	params = rte_vdev_device_args(dev);
 
@@ -1652,7 +1724,7 @@ enum ioctl_mode {
 	return ret;
 }
 
-/* detach a TAP device.
+/* detach a TUNTAP device.
  */
 static int
 rte_pmd_tap_remove(struct rte_vdev_device *dev)
@@ -1695,11 +1767,17 @@ enum ioctl_mode {
 	return 0;
 }
 
+static struct rte_vdev_driver pmd_tun_drv = {
+	.probe = rte_pmd_tun_probe,
+	.remove = rte_pmd_tap_remove,
+};
+
 static struct rte_vdev_driver pmd_tap_drv = {
 	.probe = rte_pmd_tap_probe,
 	.remove = rte_pmd_tap_remove,
 };
 RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
+RTE_PMD_REGISTER_VDEV(net_tun, pmd_tun_drv);
 RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
 RTE_PMD_REGISTER_PARAM_STRING(net_tap,
 			      ETH_TAP_IFACE_ARG "=<string> "
-- 
1.9.1

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [dpdk-dev,v1] net/tap: add tun support
  2018-02-22 12:22 ` [dpdk-dev] [dpdk-dev,v1] " Pascal Mazon
@ 2018-02-26 11:01   ` Varghese, Vipin
  0 siblings, 0 replies; 21+ messages in thread
From: Varghese, Vipin @ 2018-02-26 11:01 UTC (permalink / raw)
  To: Pascal Mazon, dev, Mcnamara, John, Yigit, Ferruh; +Cc: Kovacevic, Marko

Hi Pascal,

Thanks for the comments, Please find my updates inline

> -----Original Message-----
> From: Pascal Mazon [mailto:pascal.mazon@6wind.com]
> Sent: Thursday, February 22, 2018 12:22 PM
> To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org; Mcnamara,
> John <john.mcnamara@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>
> Cc: Kovacevic, Marko <marko.kovacevic@intel.com>
> Subject: Re: [dpdk-dev,v1] net/tap: add tun support
> 
> Personally, I'm mostly ok with this.
> I added a couple of comments inline.
> 

<snipped>

> > +The TUN PMD allows user to create a TUN device on host. The PMD
> > +allows user to transmit and recieve packets via DPDK API calls with L3
> header and payload.
> s/recieve/receive/

Thanks, making the changes for the next version

> > +The devices in host can be accessed via ``ifconfig`` or ``ip``
> > +command. TUN interfaces are passed to DPDK ``rte_eal_init`` arguments
> > +as ``--vdev=net_tunX``, where X stands for unique id, example::

<snipped>

> >
> >  static int tap_unit;
> > +static int tun_unit;
> > +
> > +static int tap_type;
> I'm not a huge fan of considering "tap_type = 1 means it's a TAP device".
> Wouldn't it be clearer to have a tuntap_type variable and an enum
> TUNTAP_TYPE with the two types; then checking  for tuntap_type ==
> TUN_TYPE or tuntap == TAP_TYPE?
> It's not a deal breaker though, just a consideration.

Thanks, my first idea was use the same. Later argued myself in using 'tap_type' since the check for assigning MAC address goes well. Hence I  hope not making the change 'tuntap_type' is ok?

> > +static char tuntap_name[8];
> >
> >  static volatile uint32_t tap_trigger;	/* Rx trigger */
> >

<snipped> 

> > RTE_PMD_REGISTER_PARAM_STRING(net_tap,
> >  			      ETH_TAP_IFACE_ARG "=<string> "
> The rest looks fine.
> 
> Regards,
> Pascal

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/tap: add tun support
  2018-02-26  6:15 ` [dpdk-dev] [PATCH v2] " Vipin Varghese
@ 2018-02-27 13:06   ` Pascal Mazon
  2018-04-02 21:37   ` [dpdk-dev] [PATCH 1/2] " Vipin Varghese
  1 sibling, 0 replies; 21+ messages in thread
From: Pascal Mazon @ 2018-02-27 13:06 UTC (permalink / raw)
  To: Vipin Varghese, dev, ferruh.yigit; +Cc: john.mcnamara, marko.kovacevic

> Thanks, my first idea was use the
same.                                                                                                                                                                     

> Later argued myself in using 'tap_type' since the check for assigning
MAC                                                                                                                                   

> address goes well. Hence I  hope not making the change 'tuntap_type'
is ok?

Well yeah it's still readable, and makes that check simpler.
As we don't use that variable much, I'm ok with it.

Acked-by: Pascal Mazon <pascal.mazon@6wind.com>

On 26/02/2018 07:15, Vipin Varghese wrote:
> The change adds TUN PMD logic to the existing TAP PMD. TUN PMD can
> be initialized with 'net_tunX' where 'X' represents unique id. PMD
> supports argument interface, while MAC address and remote are not
> supported.
>
> Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
> ---
>
> Changes in V2:
>  - updated the documentation word error - Pascal
> ---
>  doc/guides/nics/tap.rst       |  15 ++++-
>  drivers/net/tap/rte_eth_tap.c | 132 +++++++++++++++++++++++++++++++++---------
>  2 files changed, 118 insertions(+), 29 deletions(-)
>
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> index ea61be3..0fbbbba 100644
> --- a/doc/guides/nics/tap.rst
> +++ b/doc/guides/nics/tap.rst
> @@ -1,8 +1,8 @@
>  ..  SPDX-License-Identifier: BSD-3-Clause
>      Copyright(c) 2016 Intel Corporation.
>  
> -Tap Poll Mode Driver
> -====================
> +Tun|Tap Poll Mode Driver
> +========================
>  
>  The ``rte_eth_tap.c`` PMD creates a device using TAP interfaces on the
>  local host. The PMD allows for DPDK and the host to communicate using a raw
> @@ -77,6 +77,17 @@ can utilize that stack to handle the network protocols. Plus you would be able
>  to address the interface using an IP address assigned to the internal
>  interface.
>  
> +The TUN PMD allows user to create a TUN device on host. The PMD allows user
> +to transmit and receive packets via DPDK API calls with L3 header and payload.
> +The devices in host can be accessed via ``ifconfig`` or ``ip`` command. TUN
> +interfaces are passed to DPDK ``rte_eal_init`` arguments as ``--vdev=net_tunX``,
> +where X stands for unique id, example::
> +
> +   --vdev=net_tun0 --vdev=net_tun1,iface=foo1, ...
> +
> +Unlike TAP PMD, TUN PMD does not support user arguments as ``MAC`` or ``remote`` user
> +options. Default interface name is ``dtunX``, where X stands for unique id.
> +
>  Flow API support
>  ----------------
>  
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index f09db0e..42c9db4 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -42,6 +42,7 @@
>  /* Linux based path to the TUN device */
>  #define TUN_TAP_DEV_PATH        "/dev/net/tun"
>  #define DEFAULT_TAP_NAME        "dtap"
> +#define DEFAULT_TUN_NAME        "dtun"
>  
>  #define ETH_TAP_IFACE_ARG       "iface"
>  #define ETH_TAP_REMOTE_ARG      "remote"
> @@ -49,6 +50,7 @@
>  #define ETH_TAP_MAC_FIXED       "fixed"
>  
>  static struct rte_vdev_driver pmd_tap_drv;
> +static struct rte_vdev_driver pmd_tun_drv;
>  
>  static const char *valid_arguments[] = {
>  	ETH_TAP_IFACE_ARG,
> @@ -58,6 +60,10 @@
>  };
>  
>  static int tap_unit;
> +static int tun_unit;
> +
> +static int tap_type;
> +static char tuntap_name[8];
>  
>  static volatile uint32_t tap_trigger;	/* Rx trigger */
>  
> @@ -104,24 +110,26 @@ enum ioctl_mode {
>  	 * Do not set IFF_NO_PI as packet information header will be needed
>  	 * to check if a received packet has been truncated.
>  	 */
> -	ifr.ifr_flags = IFF_TAP;
> +	ifr.ifr_flags = (tap_type) ? IFF_TAP : IFF_TUN;
>  	snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
>  
>  	RTE_LOG(DEBUG, PMD, "ifr_name '%s'\n", ifr.ifr_name);
>  
>  	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
>  	if (fd < 0) {
> -		RTE_LOG(ERR, PMD, "Unable to create TAP interface\n");
> +		RTE_LOG(ERR, PMD, "Unable to create %s interface\n",
> +			tuntap_name);
>  		goto error;
>  	}
>  
>  #ifdef IFF_MULTI_QUEUE
>  	/* Grab the TUN features to verify we can work multi-queue */
>  	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
> -		RTE_LOG(ERR, PMD, "TAP unable to get TUN/TAP features\n");
> +		RTE_LOG(ERR, PMD, "%s unable to get TUN/TAP features\n",
> +			tuntap_name);
>  		goto error;
>  	}
> -	RTE_LOG(DEBUG, PMD, "  TAP Features %08x\n", features);
> +	RTE_LOG(DEBUG, PMD, " %s Features %08x\n", tuntap_name, features);
>  
>  	if (features & IFF_MULTI_QUEUE) {
>  		RTE_LOG(DEBUG, PMD, "  Multi-queue support for %d queues\n",
> @@ -1108,7 +1116,7 @@ enum ioctl_mode {
>  		tmp = &(*tmp)->next;
>  	}
>  
> -	RTE_LOG(DEBUG, PMD, "  RX TAP device name %s, qid %d on fd %d\n",
> +	RTE_LOG(DEBUG, PMD, "  RX TUNTAP device name %s, qid %d on fd %d\n",
>  		internals->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
>  
>  	return 0;
> @@ -1163,7 +1171,7 @@ enum ioctl_mode {
>  	if (ret == -1)
>  		return -1;
>  	RTE_LOG(DEBUG, PMD,
> -		"  TX TAP device name %s, qid %d on fd %d csum %s\n",
> +		"  TX TUNTAP device name %s, qid %d on fd %d csum %s\n",
>  		internals->name, tx_queue_id, internals->txq[tx_queue_id].fd,
>  		txq->csum ? "on" : "off");
>  
> @@ -1346,17 +1354,19 @@ enum ioctl_mode {
>  	struct ifreq ifr;
>  	int i;
>  
> -	RTE_LOG(DEBUG, PMD, "  TAP device on numa %u\n", rte_socket_id());
> +	RTE_LOG(DEBUG, PMD, "  %s device on numa %u\n",
> +		tuntap_name, rte_socket_id());
>  
>  	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
>  	if (!data) {
> -		RTE_LOG(ERR, PMD, "TAP Failed to allocate data\n");
> +		RTE_LOG(ERR, PMD, "%s Failed to allocate data\n", tuntap_name);
>  		goto error_exit_nodev;
>  	}
>  
>  	dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
>  	if (!dev) {
> -		RTE_LOG(ERR, PMD, "TAP Unable to allocate device struct\n");
> +		RTE_LOG(ERR, PMD, "%s Unable to allocate device struct\n",
> +			tuntap_name);
>  		goto error_exit_nodev;
>  	}
>  
> @@ -1367,8 +1377,8 @@ enum ioctl_mode {
>  	pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
>  	if (pmd->ioctl_sock == -1) {
>  		RTE_LOG(ERR, PMD,
> -			"TAP Unable to get a socket for management: %s\n",
> -			strerror(errno));
> +			"%s Unable to get a socket for management: %s\n",
> +			tuntap_name, strerror(errno));
>  		goto error_exit;
>  	}
>  
> @@ -1399,15 +1409,17 @@ enum ioctl_mode {
>  		pmd->txq[i].fd = -1;
>  	}
>  
> -	if (fixed_mac_type) {
> -		/* fixed mac = 00:64:74:61:70:<iface_idx> */
> -		static int iface_idx;
> -		char mac[ETHER_ADDR_LEN] = "\0dtap";
> +	if (tap_type) {
> +		if (fixed_mac_type) {
> +			/* fixed mac = 00:64:74:61:70:<iface_idx> */
> +			static int iface_idx;
> +			char mac[ETHER_ADDR_LEN] = "\0dtap";
>  
> -		mac[ETHER_ADDR_LEN - 1] = iface_idx++;
> -		rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
> -	} else {
> -		eth_random_addr((uint8_t *)&pmd->eth_addr);
> +			mac[ETHER_ADDR_LEN - 1] = iface_idx++;
> +			rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
> +		} else {
> +			eth_random_addr((uint8_t *)&pmd->eth_addr);
> +		}
>  	}
>  
>  	/* Immediately create the netdevice (this will create the 1st queue). */
> @@ -1422,11 +1434,13 @@ enum ioctl_mode {
>  	if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE) < 0)
>  		goto error_exit;
>  
> -	memset(&ifr, 0, sizeof(struct ifreq));
> -	ifr.ifr_hwaddr.sa_family = AF_LOCAL;
> -	rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, ETHER_ADDR_LEN);
> -	if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
> -		goto error_exit;
> +	if (tap_type) {
> +		memset(&ifr, 0, sizeof(struct ifreq));
> +		ifr.ifr_hwaddr.sa_family = AF_LOCAL;
> +		rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, ETHER_ADDR_LEN);
> +		if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
> +			goto error_exit;
> +	}
>  
>  	/*
>  	 * Set up everything related to rte_flow:
> @@ -1533,8 +1547,8 @@ enum ioctl_mode {
>  	rte_eth_dev_release_port(dev);
>  
>  error_exit_nodev:
> -	RTE_LOG(ERR, PMD, "TAP Unable to initialize %s\n",
> -		rte_vdev_device_name(vdev));
> +	RTE_LOG(ERR, PMD, "%s Unable to initialize %s\n",
> +		tuntap_name, rte_vdev_device_name(vdev));
>  
>  	rte_free(data);
>  	return -EINVAL;
> @@ -1580,6 +1594,61 @@ enum ioctl_mode {
>  	return 0;
>  }
>  
> +/* Open a TUN interface device.
> + */
> +static int
> +rte_pmd_tun_probe(struct rte_vdev_device *dev)
> +{
> +	const char *name, *params;
> +	int ret;
> +	struct rte_kvargs *kvlist = NULL;
> +	char tun_name[RTE_ETH_NAME_MAX_LEN];
> +	char remote_iface[RTE_ETH_NAME_MAX_LEN];
> +
> +	tap_type = 0;
> +	strcpy(tuntap_name, "TUN");
> +
> +	name = rte_vdev_device_name(dev);
> +	params = rte_vdev_device_args(dev);
> +
> +	memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN);
> +	snprintf(tun_name, sizeof(tun_name), "%s%d",
> +		DEFAULT_TUN_NAME, tun_unit++);
> +
> +	if (params && (params[0] != '\0')) {
> +		RTE_LOG(DEBUG, PMD, "parameters (%s)\n", params);
> +
> +		kvlist = rte_kvargs_parse(params, valid_arguments);
> +		if (kvlist) {
> +			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
> +				ret = rte_kvargs_process(kvlist,
> +					ETH_TAP_IFACE_ARG,
> +					&set_interface_name,
> +					tun_name);
> +
> +				if (ret == -1)
> +					goto leave;
> +			}
> +		}
> +	}
> +	pmd_link.link_speed = ETH_SPEED_NUM_10G;
> +
> +	RTE_LOG(NOTICE, PMD, "Initializing pmd_tun for %s as %s\n",
> +		name, tun_name);
> +
> +	ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0);
> +
> +leave:
> +	if (ret == -1) {
> +		RTE_LOG(ERR, PMD, "Failed to create pmd for %s as %s\n",
> +			name, tun_name);
> +		tun_unit--; /* Restore the unit number */
> +	}
> +	rte_kvargs_free(kvlist);
> +
> +	return ret;
> +}
> +
>  /* Open a TAP interface device.
>   */
>  static int
> @@ -1593,6 +1662,9 @@ enum ioctl_mode {
>  	char remote_iface[RTE_ETH_NAME_MAX_LEN];
>  	int fixed_mac_type = 0;
>  
> +	tap_type = 1;
> +	strcpy(tuntap_name, "TAP");
> +
>  	name = rte_vdev_device_name(dev);
>  	params = rte_vdev_device_args(dev);
>  
> @@ -1652,7 +1724,7 @@ enum ioctl_mode {
>  	return ret;
>  }
>  
> -/* detach a TAP device.
> +/* detach a TUNTAP device.
>   */
>  static int
>  rte_pmd_tap_remove(struct rte_vdev_device *dev)
> @@ -1695,11 +1767,17 @@ enum ioctl_mode {
>  	return 0;
>  }
>  
> +static struct rte_vdev_driver pmd_tun_drv = {
> +	.probe = rte_pmd_tun_probe,
> +	.remove = rte_pmd_tap_remove,
> +};
> +
>  static struct rte_vdev_driver pmd_tap_drv = {
>  	.probe = rte_pmd_tap_probe,
>  	.remove = rte_pmd_tap_remove,
>  };
>  RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
> +RTE_PMD_REGISTER_VDEV(net_tun, pmd_tun_drv);
>  RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
>  RTE_PMD_REGISTER_PARAM_STRING(net_tap,
>  			      ETH_TAP_IFACE_ARG "=<string> "

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-02-26  6:15 ` [dpdk-dev] [PATCH v2] " Vipin Varghese
  2018-02-27 13:06   ` Pascal Mazon
@ 2018-04-02 21:37   ` Vipin Varghese
  2018-04-02 21:37     ` [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation Vipin Varghese
                       ` (2 more replies)
  1 sibling, 3 replies; 21+ messages in thread
From: Vipin Varghese @ 2018-04-02 21:37 UTC (permalink / raw)
  To: dev, pascal.mazon, ferruh.yigit; +Cc: Vipin Varghese

The change adds functional TUN PMD logic to the existing TAP PMD.
TUN PMD can be initialized with 'net_tunX' where 'X' represents unique id.
PMD supports argument interface, while MAC address and remote are not
supported.

Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
---

Changes in V3:
 - fix the TUN kernel packet error - Vipin
 - seperate out function from logging - Ferruh
 - add TUN PMD_REGISTER_PARAM_STRING - Ferruh

Changes in V2:
 - updated the documentation word error - Pascal
---
 drivers/net/tap/rte_eth_tap.c | 117 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 105 insertions(+), 12 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 3e4f7a8..295db3c 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -42,6 +42,7 @@
 /* Linux based path to the TUN device */
 #define TUN_TAP_DEV_PATH        "/dev/net/tun"
 #define DEFAULT_TAP_NAME        "dtap"
+#define DEFAULT_TUN_NAME        "dtun"
 
 #define ETH_TAP_IFACE_ARG       "iface"
 #define ETH_TAP_REMOTE_ARG      "remote"
@@ -53,6 +54,7 @@
 #define ETH_TAP_MAC_ARG_FMT     ETH_TAP_MAC_FIXED "|" ETH_TAP_USR_MAC_FMT
 
 static struct rte_vdev_driver pmd_tap_drv;
+static struct rte_vdev_driver pmd_tun_drv;
 
 static const char *valid_arguments[] = {
 	ETH_TAP_IFACE_ARG,
@@ -62,6 +64,10 @@
 };
 
 static int tap_unit;
+static int tun_unit;
+
+static int tap_type;
+static char tuntap_name[8];
 
 static volatile uint32_t tap_trigger;	/* Rx trigger */
 
@@ -108,7 +114,7 @@ enum ioctl_mode {
 	 * Do not set IFF_NO_PI as packet information header will be needed
 	 * to check if a received packet has been truncated.
 	 */
-	ifr.ifr_flags = IFF_TAP;
+	ifr.ifr_flags = (tap_type) ? IFF_TAP : IFF_TUN | IFF_POINTOPOINT;
 	snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
 
 	RTE_LOG(DEBUG, PMD, "ifr_name '%s'\n", ifr.ifr_name);
@@ -495,7 +501,7 @@ enum ioctl_mode {
 	for (i = 0; i < nb_pkts; i++) {
 		struct rte_mbuf *mbuf = bufs[num_tx];
 		struct iovec iovecs[mbuf->nb_segs + 1];
-		struct tun_pi pi = { .flags = 0 };
+		struct tun_pi pi = { .flags = 0, .proto = 0x00 };
 		struct rte_mbuf *seg = mbuf;
 		char m_copy[mbuf->data_len];
 		int n;
@@ -505,6 +511,21 @@ enum ioctl_mode {
 		if (rte_pktmbuf_pkt_len(mbuf) > max_size)
 			break;
 
+		/*
+		 * TUN and TAP are created with IFF_NO_PI disabled.
+		 * For TUN PMD this mandatory as fields are used by
+		 * Kernel tun.c to determine whether its IP or non IP
+		 * packets.
+		 *
+		 * The logic fetches the first byte of data from mbuf.
+		 * compares whether its v4 or v6. If none matches default
+		 * value 0x00 is taken for protocol field.
+		 */
+		char *buff_data = rte_pktmbuf_mtod(seg, void *);
+		j = (*buff_data & 0xf0);
+		if (j & (0x40 | 0x60))
+			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
+
 		iovecs[0].iov_base = &pi;
 		iovecs[0].iov_len = sizeof(pi);
 		for (j = 1; j <= mbuf->nb_segs; j++) {
@@ -1403,10 +1424,12 @@ enum ioctl_mode {
 		pmd->txq[i].fd = -1;
 	}
 
-	if (is_zero_ether_addr(mac_addr))
-		eth_random_addr((uint8_t *)&pmd->eth_addr);
-	else
-		rte_memcpy(&pmd->eth_addr, mac_addr, sizeof(mac_addr));
+	if (tap_type) {
+		if (is_zero_ether_addr(mac_addr))
+			eth_random_addr((uint8_t *)&pmd->eth_addr);
+		else
+			rte_memcpy(&pmd->eth_addr, mac_addr, sizeof(mac_addr));
+	}
 
 	/* Immediately create the netdevice (this will create the 1st queue). */
 	/* rx queue */
@@ -1420,11 +1443,14 @@ enum ioctl_mode {
 	if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE) < 0)
 		goto error_exit;
 
-	memset(&ifr, 0, sizeof(struct ifreq));
-	ifr.ifr_hwaddr.sa_family = AF_LOCAL;
-	rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, ETHER_ADDR_LEN);
-	if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
-		goto error_exit;
+	if (tap_type) {
+		memset(&ifr, 0, sizeof(struct ifreq));
+		ifr.ifr_hwaddr.sa_family = AF_LOCAL;
+		rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr,
+				ETHER_ADDR_LEN);
+		if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
+			goto error_exit;
+	}
 
 	/*
 	 * Set up everything related to rte_flow:
@@ -1621,6 +1647,62 @@ static int parse_user_mac(struct ether_addr *user_mac,
 	return -1;
 }
 
+/*
+ * Open a TUN interface device. TUN PMD
+ * 1) sets tap_type as false
+ * 2) intakes iface as argument.
+ * 3) as interface is virtual set speed to 10G
+ */
+static int
+rte_pmd_tun_probe(struct rte_vdev_device *dev)
+{
+	const char *name, *params;
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	char tun_name[RTE_ETH_NAME_MAX_LEN];
+	char remote_iface[RTE_ETH_NAME_MAX_LEN];
+
+	tap_type = 0;
+	strcpy(tuntap_name, "TUN");
+
+	name = rte_vdev_device_name(dev);
+	params = rte_vdev_device_args(dev);
+	memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(DEBUG, PMD, "parameters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+					ETH_TAP_IFACE_ARG,
+					&set_interface_name,
+					tun_name);
+
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = ETH_SPEED_NUM_10G;
+
+	RTE_LOG(NOTICE, PMD, "Initializing pmd_tun for %s as %s\n",
+		name, tun_name);
+
+	ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(ERR, PMD, "Failed to create pmd for %s as %s\n",
+			name, tun_name);
+		tun_unit--; /* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
 /* Open a TAP interface device.
  */
 static int
@@ -1634,6 +1716,9 @@ static int parse_user_mac(struct ether_addr *user_mac,
 	char remote_iface[RTE_ETH_NAME_MAX_LEN];
 	struct ether_addr user_mac = { .addr_bytes = {0} };
 
+	tap_type = 1;
+	strcpy(tuntap_name, "TAP");
+
 	name = rte_vdev_device_name(dev);
 	params = rte_vdev_device_args(dev);
 
@@ -1693,7 +1778,7 @@ static int parse_user_mac(struct ether_addr *user_mac,
 	return ret;
 }
 
-/* detach a TAP device.
+/* detach a TUNTAP device.
  */
 static int
 rte_pmd_tap_remove(struct rte_vdev_device *dev)
@@ -1736,12 +1821,20 @@ static int parse_user_mac(struct ether_addr *user_mac,
 	return 0;
 }
 
+static struct rte_vdev_driver pmd_tun_drv = {
+	.probe = rte_pmd_tun_probe,
+	.remove = rte_pmd_tap_remove,
+};
+
 static struct rte_vdev_driver pmd_tap_drv = {
 	.probe = rte_pmd_tap_probe,
 	.remove = rte_pmd_tap_remove,
 };
 RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
+RTE_PMD_REGISTER_VDEV(net_tun, pmd_tun_drv);
 RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
+RTE_PMD_REGISTER_PARAM_STRING(net_tun,
+			      ETH_TAP_IFACE_ARG "=<string> ");
 RTE_PMD_REGISTER_PARAM_STRING(net_tap,
 			      ETH_TAP_IFACE_ARG "=<string> "
 			      ETH_TAP_MAC_ARG "=" ETH_TAP_MAC_ARG_FMT " "
-- 
1.9.1

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation
  2018-04-02 21:37   ` [dpdk-dev] [PATCH 1/2] " Vipin Varghese
@ 2018-04-02 21:37     ` Vipin Varghese
  2018-04-03  8:27       ` Pascal Mazon
  2018-04-06 17:11     ` [dpdk-dev] [PATCH 1/2] net/tap: add tun support Ferruh Yigit
  2018-04-12 11:49     ` Ophir Munk
  2 siblings, 1 reply; 21+ messages in thread
From: Vipin Varghese @ 2018-04-02 21:37 UTC (permalink / raw)
  To: dev, pascal.mazon, ferruh.yigit; +Cc: Vipin Varghese

The changes add TUN|TAP specific logs and documentation support.

Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
---
 doc/guides/nics/tap.rst       | 15 +++++++++++++--
 drivers/net/tap/rte_eth_tap.c | 28 ++++++++++++++++------------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
index 76eb0bd..c97786a 100644
--- a/doc/guides/nics/tap.rst
+++ b/doc/guides/nics/tap.rst
@@ -1,8 +1,8 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
     Copyright(c) 2016 Intel Corporation.
 
-Tap Poll Mode Driver
-====================
+Tun|Tap Poll Mode Driver
+========================
 
 The ``rte_eth_tap.c`` PMD creates a device using TAP interfaces on the
 local host. The PMD allows for DPDK and the host to communicate using a raw
@@ -83,6 +83,17 @@ can utilize that stack to handle the network protocols. Plus you would be able
 to address the interface using an IP address assigned to the internal
 interface.
 
+The TUN PMD allows user to create a TUN device on host. The PMD allows user
+to transmit and receive packets via DPDK API calls with L3 header and payload.
+The devices in host can be accessed via ``ifconfig`` or ``ip`` command. TUN
+interfaces are passed to DPDK ``rte_eal_init`` arguments as ``--vdev=net_tunX``,
+where X stands for unique id, example::
+
+   --vdev=net_tun0 --vdev=net_tun1,iface=foo1, ...
+
+Unlike TAP PMD, TUN PMD does not support user arguments as ``MAC`` or ``remote`` user
+options. Default interface name is ``dtunX``, where X stands for unique id.
+
 Flow API support
 ----------------
 
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 295db3c..7c6704b 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -121,17 +121,19 @@ enum ioctl_mode {
 
 	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
 	if (fd < 0) {
-		RTE_LOG(ERR, PMD, "Unable to create TAP interface\n");
+		RTE_LOG(ERR, PMD, "Unable to create %s interface\n",
+				tuntap_name);
 		goto error;
 	}
 
 #ifdef IFF_MULTI_QUEUE
 	/* Grab the TUN features to verify we can work multi-queue */
 	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
-		RTE_LOG(ERR, PMD, "TAP unable to get TUN/TAP features\n");
+		RTE_LOG(ERR, PMD, "%s unable to get TUN/TAP features\n",
+				tuntap_name);
 		goto error;
 	}
-	RTE_LOG(DEBUG, PMD, "  TAP Features %08x\n", features);
+	RTE_LOG(DEBUG, PMD, "%s Features %08x\n", tuntap_name, features);
 
 	if (features & IFF_MULTI_QUEUE) {
 		RTE_LOG(DEBUG, PMD, "  Multi-queue support for %d queues\n",
@@ -1133,7 +1135,7 @@ enum ioctl_mode {
 		tmp = &(*tmp)->next;
 	}
 
-	RTE_LOG(DEBUG, PMD, "  RX TAP device name %s, qid %d on fd %d\n",
+	RTE_LOG(DEBUG, PMD, "  RX TUNTAP device name %s, qid %d on fd %d\n",
 		internals->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
 
 	return 0;
@@ -1188,7 +1190,7 @@ enum ioctl_mode {
 	if (ret == -1)
 		return -1;
 	RTE_LOG(DEBUG, PMD,
-		"  TX TAP device name %s, qid %d on fd %d csum %s\n",
+		"  TX TUNTAP device name %s, qid %d on fd %d csum %s\n",
 		internals->name, tx_queue_id, internals->txq[tx_queue_id].fd,
 		txq->csum ? "on" : "off");
 
@@ -1371,17 +1373,19 @@ enum ioctl_mode {
 	struct ifreq ifr;
 	int i;
 
-	RTE_LOG(DEBUG, PMD, "  TAP device on numa %u\n", rte_socket_id());
+	RTE_LOG(DEBUG, PMD, "%s device on numa %u\n",
+			tuntap_name, rte_socket_id());
 
 	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
 	if (!data) {
-		RTE_LOG(ERR, PMD, "TAP Failed to allocate data\n");
+		RTE_LOG(ERR, PMD, "%s Failed to allocate data\n", tuntap_name);
 		goto error_exit_nodev;
 	}
 
 	dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
 	if (!dev) {
-		RTE_LOG(ERR, PMD, "TAP Unable to allocate device struct\n");
+		RTE_LOG(ERR, PMD, "%s Unable to allocate device struct\n",
+				tuntap_name);
 		goto error_exit_nodev;
 	}
 
@@ -1392,8 +1396,8 @@ enum ioctl_mode {
 	pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
 	if (pmd->ioctl_sock == -1) {
 		RTE_LOG(ERR, PMD,
-			"TAP Unable to get a socket for management: %s\n",
-			strerror(errno));
+			"%s Unable to get a socket for management: %s\n",
+			tuntap_name, strerror(errno));
 		goto error_exit;
 	}
 
@@ -1557,8 +1561,8 @@ enum ioctl_mode {
 	rte_eth_dev_release_port(dev);
 
 error_exit_nodev:
-	RTE_LOG(ERR, PMD, "TAP Unable to initialize %s\n",
-		rte_vdev_device_name(vdev));
+	RTE_LOG(ERR, PMD, "%s Unable to initialize %s\n",
+		tuntap_name, rte_vdev_device_name(vdev));
 
 	rte_free(data);
 	return -EINVAL;
-- 
1.9.1

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation
  2018-04-02 21:37     ` [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation Vipin Varghese
@ 2018-04-03  8:27       ` Pascal Mazon
  2018-04-03 10:05         ` Ferruh Yigit
  2018-04-03 10:05         ` Ferruh Yigit
  0 siblings, 2 replies; 21+ messages in thread
From: Pascal Mazon @ 2018-04-03  8:27 UTC (permalink / raw)
  To: Vipin Varghese, dev, ferruh.yigit

Hi,

I'm not sure why you split this patch in two.
It seemed to me that it was fine to change the doc+logs in the same
patch as the code.
Especially since documentation change is very small.

Anyway, there's just the "documnetation" typo in the commit title,
otherwise it's fine by me.

Best regards,
Pascal

On 02/04/2018 23:37, Vipin Varghese wrote:
> The changes add TUN|TAP specific logs and documentation support.
>
> Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
> ---
>  doc/guides/nics/tap.rst       | 15 +++++++++++++--
>  drivers/net/tap/rte_eth_tap.c | 28 ++++++++++++++++------------
>  2 files changed, 29 insertions(+), 14 deletions(-)
>
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> index 76eb0bd..c97786a 100644
> --- a/doc/guides/nics/tap.rst
> +++ b/doc/guides/nics/tap.rst
> @@ -1,8 +1,8 @@
>  ..  SPDX-License-Identifier: BSD-3-Clause
>      Copyright(c) 2016 Intel Corporation.
>  
> -Tap Poll Mode Driver
> -====================
> +Tun|Tap Poll Mode Driver
> +========================
>  
>  The ``rte_eth_tap.c`` PMD creates a device using TAP interfaces on the
>  local host. The PMD allows for DPDK and the host to communicate using a raw
> @@ -83,6 +83,17 @@ can utilize that stack to handle the network protocols. Plus you would be able
>  to address the interface using an IP address assigned to the internal
>  interface.
>  
> +The TUN PMD allows user to create a TUN device on host. The PMD allows user
> +to transmit and receive packets via DPDK API calls with L3 header and payload.
> +The devices in host can be accessed via ``ifconfig`` or ``ip`` command. TUN
> +interfaces are passed to DPDK ``rte_eal_init`` arguments as ``--vdev=net_tunX``,
> +where X stands for unique id, example::
> +
> +   --vdev=net_tun0 --vdev=net_tun1,iface=foo1, ...
> +
> +Unlike TAP PMD, TUN PMD does not support user arguments as ``MAC`` or ``remote`` user
> +options. Default interface name is ``dtunX``, where X stands for unique id.
> +
>  Flow API support
>  ----------------
>  
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 295db3c..7c6704b 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -121,17 +121,19 @@ enum ioctl_mode {
>  
>  	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
>  	if (fd < 0) {
> -		RTE_LOG(ERR, PMD, "Unable to create TAP interface\n");
> +		RTE_LOG(ERR, PMD, "Unable to create %s interface\n",
> +				tuntap_name);
>  		goto error;
>  	}
>  
>  #ifdef IFF_MULTI_QUEUE
>  	/* Grab the TUN features to verify we can work multi-queue */
>  	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
> -		RTE_LOG(ERR, PMD, "TAP unable to get TUN/TAP features\n");
> +		RTE_LOG(ERR, PMD, "%s unable to get TUN/TAP features\n",
> +				tuntap_name);
>  		goto error;
>  	}
> -	RTE_LOG(DEBUG, PMD, "  TAP Features %08x\n", features);
> +	RTE_LOG(DEBUG, PMD, "%s Features %08x\n", tuntap_name, features);
>  
>  	if (features & IFF_MULTI_QUEUE) {
>  		RTE_LOG(DEBUG, PMD, "  Multi-queue support for %d queues\n",
> @@ -1133,7 +1135,7 @@ enum ioctl_mode {
>  		tmp = &(*tmp)->next;
>  	}
>  
> -	RTE_LOG(DEBUG, PMD, "  RX TAP device name %s, qid %d on fd %d\n",
> +	RTE_LOG(DEBUG, PMD, "  RX TUNTAP device name %s, qid %d on fd %d\n",
>  		internals->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
>  
>  	return 0;
> @@ -1188,7 +1190,7 @@ enum ioctl_mode {
>  	if (ret == -1)
>  		return -1;
>  	RTE_LOG(DEBUG, PMD,
> -		"  TX TAP device name %s, qid %d on fd %d csum %s\n",
> +		"  TX TUNTAP device name %s, qid %d on fd %d csum %s\n",
>  		internals->name, tx_queue_id, internals->txq[tx_queue_id].fd,
>  		txq->csum ? "on" : "off");
>  
> @@ -1371,17 +1373,19 @@ enum ioctl_mode {
>  	struct ifreq ifr;
>  	int i;
>  
> -	RTE_LOG(DEBUG, PMD, "  TAP device on numa %u\n", rte_socket_id());
> +	RTE_LOG(DEBUG, PMD, "%s device on numa %u\n",
> +			tuntap_name, rte_socket_id());
>  
>  	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
>  	if (!data) {
> -		RTE_LOG(ERR, PMD, "TAP Failed to allocate data\n");
> +		RTE_LOG(ERR, PMD, "%s Failed to allocate data\n", tuntap_name);
>  		goto error_exit_nodev;
>  	}
>  
>  	dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
>  	if (!dev) {
> -		RTE_LOG(ERR, PMD, "TAP Unable to allocate device struct\n");
> +		RTE_LOG(ERR, PMD, "%s Unable to allocate device struct\n",
> +				tuntap_name);
>  		goto error_exit_nodev;
>  	}
>  
> @@ -1392,8 +1396,8 @@ enum ioctl_mode {
>  	pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
>  	if (pmd->ioctl_sock == -1) {
>  		RTE_LOG(ERR, PMD,
> -			"TAP Unable to get a socket for management: %s\n",
> -			strerror(errno));
> +			"%s Unable to get a socket for management: %s\n",
> +			tuntap_name, strerror(errno));
>  		goto error_exit;
>  	}
>  
> @@ -1557,8 +1561,8 @@ enum ioctl_mode {
>  	rte_eth_dev_release_port(dev);
>  
>  error_exit_nodev:
> -	RTE_LOG(ERR, PMD, "TAP Unable to initialize %s\n",
> -		rte_vdev_device_name(vdev));
> +	RTE_LOG(ERR, PMD, "%s Unable to initialize %s\n",
> +		tuntap_name, rte_vdev_device_name(vdev));
>  
>  	rte_free(data);
>  	return -EINVAL;

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation
  2018-04-03  8:27       ` Pascal Mazon
@ 2018-04-03 10:05         ` Ferruh Yigit
  2018-04-03 10:53           ` Pascal Mazon
  2018-04-03 10:05         ` Ferruh Yigit
  1 sibling, 1 reply; 21+ messages in thread
From: Ferruh Yigit @ 2018-04-03 10:05 UTC (permalink / raw)
  To: Pascal Mazon, Vipin Varghese, dev

On 4/3/2018 9:27 AM, Pascal Mazon wrote:
> Hi,
> 
> I'm not sure why you split this patch in two.
> It seemed to me that it was fine to change the doc+logs in the same
> patch as the code.
> Especially since documentation change is very small.

It was my suggestion indeed, to separate actual functional change from routine
log update noise.

> 
> Anyway, there's just the "documnetation" typo in the commit title,
> otherwise it's fine by me.
> 
> Best regards,
> Pascal
> 
> On 02/04/2018 23:37, Vipin Varghese wrote:
>> The changes add TUN|TAP specific logs and documentation support.
>>
>> Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>

<...>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation
  2018-04-03  8:27       ` Pascal Mazon
  2018-04-03 10:05         ` Ferruh Yigit
@ 2018-04-03 10:05         ` Ferruh Yigit
  1 sibling, 0 replies; 21+ messages in thread
From: Ferruh Yigit @ 2018-04-03 10:05 UTC (permalink / raw)
  To: Pascal Mazon, Vipin Varghese, dev

On 4/3/2018 9:27 AM, Pascal Mazon wrote:
> Hi,
> 
> I'm not sure why you split this patch in two.
> It seemed to me that it was fine to change the doc+logs in the same
> patch as the code.
> Especially since documentation change is very small.

It was my suggestion indeed, to separate actual functional change from routine
log update noise.

> 
> Anyway, there's just the "documnetation" typo in the commit title,
> otherwise it's fine by me.
> 
> Best regards,
> Pascal
> 
> On 02/04/2018 23:37, Vipin Varghese wrote:
>> The changes add TUN|TAP specific logs and documentation support.
>>
>> Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>

<...>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation
  2018-04-03 10:05         ` Ferruh Yigit
@ 2018-04-03 10:53           ` Pascal Mazon
  0 siblings, 0 replies; 21+ messages in thread
From: Pascal Mazon @ 2018-04-03 10:53 UTC (permalink / raw)
  To: Ferruh Yigit, Vipin Varghese, dev

Ok Ferruh, I missed that.
Apart for the type in "documentation" the patches are ok with me.

Acked-by: Pascal Mazon <pascal.mazon@6wind.com>

On 03/04/2018 12:05, Ferruh Yigit wrote:
> On 4/3/2018 9:27 AM, Pascal Mazon wrote:
>> Hi,
>>
>> I'm not sure why you split this patch in two.
>> It seemed to me that it was fine to change the doc+logs in the same
>> patch as the code.
>> Especially since documentation change is very small.
> It was my suggestion indeed, to separate actual functional change from routine
> log update noise.
>
>> Anyway, there's just the "documnetation" typo in the commit title,
>> otherwise it's fine by me.
>>
>> Best regards,
>> Pascal
>>
>> On 02/04/2018 23:37, Vipin Varghese wrote:
>>> The changes add TUN|TAP specific logs and documentation support.
>>>
>>> Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
> <...>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-02 21:37   ` [dpdk-dev] [PATCH 1/2] " Vipin Varghese
  2018-04-02 21:37     ` [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation Vipin Varghese
@ 2018-04-06 17:11     ` Ferruh Yigit
  2018-04-06 17:19       ` Ferruh Yigit
  2018-04-12 11:49     ` Ophir Munk
  2 siblings, 1 reply; 21+ messages in thread
From: Ferruh Yigit @ 2018-04-06 17:11 UTC (permalink / raw)
  To: Vipin Varghese, dev, pascal.mazon

On 4/2/2018 10:37 PM, Vipin Varghese wrote:
> The change adds functional TUN PMD logic to the existing TAP PMD.
> TUN PMD can be initialized with 'net_tunX' where 'X' represents unique id.
> PMD supports argument interface, while MAC address and remote are not
> supported.
> 
> Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
> ---
> 
> Changes in V3:
>  - fix the TUN kernel packet error - Vipin
>  - seperate out function from logging - Ferruh
>  - add TUN PMD_REGISTER_PARAM_STRING - Ferruh
> 
> Changes in V2:
>  - updated the documentation word error - Pascal

For series,
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-06 17:11     ` [dpdk-dev] [PATCH 1/2] net/tap: add tun support Ferruh Yigit
@ 2018-04-06 17:19       ` Ferruh Yigit
  0 siblings, 0 replies; 21+ messages in thread
From: Ferruh Yigit @ 2018-04-06 17:19 UTC (permalink / raw)
  To: Vipin Varghese, dev, pascal.mazon

On 4/6/2018 6:11 PM, Ferruh Yigit wrote:
> On 4/2/2018 10:37 PM, Vipin Varghese wrote:
>> The change adds functional TUN PMD logic to the existing TAP PMD.
>> TUN PMD can be initialized with 'net_tunX' where 'X' represents unique id.
>> PMD supports argument interface, while MAC address and remote are not
>> supported.
>>
>> Signed-off-by: Vipin Varghese <vipin.varghese@intel.com>
>> ---
>>
>> Changes in V3:
>>  - fix the TUN kernel packet error - Vipin
>>  - seperate out function from logging - Ferruh
>>  - add TUN PMD_REGISTER_PARAM_STRING - Ferruh
>>
>> Changes in V2:
>>  - updated the documentation word error - Pascal

> For series,
> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>

Acked-by: Pascal Mazon <pascal.mazon@6wind.com>

Series applied to dpdk-next-net/master, thanks.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-02 21:37   ` [dpdk-dev] [PATCH 1/2] " Vipin Varghese
  2018-04-02 21:37     ` [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation Vipin Varghese
  2018-04-06 17:11     ` [dpdk-dev] [PATCH 1/2] net/tap: add tun support Ferruh Yigit
@ 2018-04-12 11:49     ` Ophir Munk
  2018-04-13  3:18       ` Varghese, Vipin
  2 siblings, 1 reply; 21+ messages in thread
From: Ophir Munk @ 2018-04-12 11:49 UTC (permalink / raw)
  To: Vipin Varghese, dev, pascal.mazon, ferruh.yigit, Thomas Monjalon,
	Olga Shern, Shahaf Shuler

Hi Vipin,
This patch (adding TUN to TAP) has been Acked and accepted in next-net branch.
I have some questions regarding the implementation (please find below).

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Vipin Varghese
> Sent: Tuesday, April 03, 2018 12:38 AM
> To: dev@dpdk.org; pascal.mazon@6wind.com; ferruh.yigit@intel.com
> Cc: Vipin Varghese <vipin.varghese@intel.com>
> Subject: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> 
> The change adds functional TUN PMD logic to the existing TAP PMD.
> TUN PMD can be initialized with 'net_tunX' where 'X' represents unique id.
> PMD supports argument interface, while MAC address and remote are not
> supported.
> 

[...]

> 
> +		/*
> +		 * TUN and TAP are created with IFF_NO_PI disabled.
> +		 * For TUN PMD this mandatory as fields are used by
> +		 * Kernel tun.c to determine whether its IP or non IP
> +		 * packets.
> +		 *
> +		 * The logic fetches the first byte of data from mbuf.
> +		 * compares whether its v4 or v6. If none matches default
> +		 * value 0x00 is taken for protocol field.
> +		 */
> +		char *buff_data = rte_pktmbuf_mtod(seg, void *);
> +		j = (*buff_data & 0xf0);
> +		if (j & (0x40 | 0x60))
> +			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
> +

1. Accessing the first byte here assumes it is the first IP header byte (layer 3) which is correct for TUN.
For TAP however the first byte belongs to Ethernet destination address (layer 2). 
Please explain how this logic will work for TAP.

2. If the first TUN byte contains 0x2X (which is neither IPv4 nor IPv6) it will end up by setting ip.proto as 0xdd86. 
Please explain how this logic will work for non-IP packets in TUN

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-12 11:49     ` Ophir Munk
@ 2018-04-13  3:18       ` Varghese, Vipin
  2018-04-20 21:49         ` Ophir Munk
  0 siblings, 1 reply; 21+ messages in thread
From: Varghese, Vipin @ 2018-04-13  3:18 UTC (permalink / raw)
  To: Ophir Munk, dev, pascal.mazon, Yigit, Ferruh, Thomas Monjalon,
	Olga Shern, Shahaf Shuler

Hi Ophir,

Please find my answers inline to the queries.

> -----Original Message-----
> From: Ophir Munk [mailto:ophirmu@mellanox.com]
> Sent: Thursday, April 12, 2018 5:19 PM
> To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org;
> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Thomas
> Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
> Shahaf Shuler <shahafs@mellanox.com>
> Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> 
> Hi Vipin,
> This patch (adding TUN to TAP) has been Acked and accepted in next-net
> branch.
> I have some questions regarding the implementation (please find below).
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Vipin Varghese
> > Sent: Tuesday, April 03, 2018 12:38 AM
> > To: dev@dpdk.org; pascal.mazon@6wind.com; ferruh.yigit@intel.com
> > Cc: Vipin Varghese <vipin.varghese@intel.com>
> > Subject: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> >
> > The change adds functional TUN PMD logic to the existing TAP PMD.
> > TUN PMD can be initialized with 'net_tunX' where 'X' represents unique id.
> > PMD supports argument interface, while MAC address and remote are not
> > supported.
> >
> 
> [...]
> 
> >
> > +		/*
> > +		 * TUN and TAP are created with IFF_NO_PI disabled.
> > +		 * For TUN PMD this mandatory as fields are used by
> > +		 * Kernel tun.c to determine whether its IP or non IP
> > +		 * packets.
> > +		 *
> > +		 * The logic fetches the first byte of data from mbuf.
> > +		 * compares whether its v4 or v6. If none matches default
> > +		 * value 0x00 is taken for protocol field.
> > +		 */
> > +		char *buff_data = rte_pktmbuf_mtod(seg, void *);
> > +		j = (*buff_data & 0xf0);
> > +		if (j & (0x40 | 0x60))
> > +			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
> > +
> 
> 1. Accessing the first byte here assumes it is the first IP header byte (layer 3)
> which is correct for TUN.
> For TAP however the first byte belongs to Ethernet destination address
> (layer 2).
> Please explain how this logic will work for TAP.

Based on linux code base '/driver/net/tap.c' and '/driver/net/tun.c' from 3.13. to  4.16, 

Please find my observation below
1. File: tun.c, function: tun_get_user, check for 'tun->flags & TUN_TYPE_MASK' is done and if non ip is taken counter 'rx_dropped' is updated.
2. File: tap.c, there are no checks for 'tap->flags' for IFF_NO_PI in rx data path. Counter 'rx_dropped' is updated in 'tap_handle_frame'. 

Please find my reasoning below
1. First approach was to have separate function for tap and tun TX and RX. But this will introduce code duplication, hence reworked the code as above.
2. During my internal testing assigning dummy value for protocol field in TAP packets, did not show a difference in behaviour. May be there are some specific cases this failing. 

If there difference in behaviour, can please share the same?

> 
> 2. If the first TUN byte contains 0x2X (which is neither IPv4 nor IPv6) it will
> end up by setting ip.proto as 0xdd86.
> Please explain how this logic will work for non-IP packets in TUN

I see your point. You are correct about this. Thanks for pointing out, may I send correction for this as

"""
-		if (j & (0x40 | 0x60))
-			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
+		pi.proto = (j == 0x40) ? 0x0008 : 
+					(j == 0x60) ? 0xdd86 :
+					0x00;
"""

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-13  3:18       ` Varghese, Vipin
@ 2018-04-20 21:49         ` Ophir Munk
  2018-04-20 21:58           ` Ophir Munk
  0 siblings, 1 reply; 21+ messages in thread
From: Ophir Munk @ 2018-04-20 21:49 UTC (permalink / raw)
  To: Varghese, Vipin, dev, pascal.mazon, Yigit, Ferruh,
	Thomas Monjalon, Olga Shern, Shahaf Shuler

Hi Vipin,

Please find comments inline.

> -----Original Message-----
> From: Varghese, Vipin [mailto:vipin.varghese@intel.com]
> Sent: Friday, April 13, 2018 6:18 AM
> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org;
> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Thomas
> Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
> Shahaf Shuler <shahafs@mellanox.com>
> Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> 
> Hi Ophir,
> 
> Please find my answers inline to the queries.
> 
> > -----Original Message-----
> > From: Ophir Munk [mailto:ophirmu@mellanox.com]
> > Sent: Thursday, April 12, 2018 5:19 PM
> > To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org;
> > pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Thomas
> > Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
> > Shahaf Shuler <shahafs@mellanox.com>
> > Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> >
> > Hi Vipin,
> > This patch (adding TUN to TAP) has been Acked and accepted in next-net
> > branch.
> > I have some questions regarding the implementation (please find below).
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Vipin Varghese
> > > Sent: Tuesday, April 03, 2018 12:38 AM
> > > To: dev@dpdk.org; pascal.mazon@6wind.com; ferruh.yigit@intel.com
> > > Cc: Vipin Varghese <vipin.varghese@intel.com>
> > > Subject: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> > >
> > > The change adds functional TUN PMD logic to the existing TAP PMD.
> > > TUN PMD can be initialized with 'net_tunX' where 'X' represents unique
> id.
> > > PMD supports argument interface, while MAC address and remote are
> > > not supported.
> > >
> >
> > [...]
> >
> > >
> > > +		/*
> > > +		 * TUN and TAP are created with IFF_NO_PI disabled.
> > > +		 * For TUN PMD this mandatory as fields are used by
> > > +		 * Kernel tun.c to determine whether its IP or non IP
> > > +		 * packets.
> > > +		 *
> > > +		 * The logic fetches the first byte of data from mbuf.
> > > +		 * compares whether its v4 or v6. If none matches default
> > > +		 * value 0x00 is taken for protocol field.
> > > +		 */
> > > +		char *buff_data = rte_pktmbuf_mtod(seg, void *);
> > > +		j = (*buff_data & 0xf0);
> > > +		if (j & (0x40 | 0x60))
> > > +			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
> > > +
> >
> > 1. Accessing the first byte here assumes it is the first IP header
> > byte (layer 3) which is correct for TUN.
> > For TAP however the first byte belongs to Ethernet destination address
> > (layer 2).
> > Please explain how this logic will work for TAP.
> 
> Based on linux code base '/driver/net/tap.c' and '/driver/net/tun.c' from
> 3.13. to  4.16,
> 
> Please find my observation below
> 1. File: tun.c, function: tun_get_user, check for 'tun->flags &
> TUN_TYPE_MASK' is done and if non ip is taken counter 'rx_dropped' is
> updated.
> 2. File: tap.c, there are no checks for 'tap->flags' for IFF_NO_PI in rx data
> path. Counter 'rx_dropped' is updated in 'tap_handle_frame'.
> 

I understand that in kernel implementation there is no check for tap->flags in file tap.c, however I think there is a bug in dpdk rte_eth_tap.c file.
Please find below an example which demonstrates this claim.

> Please find my reasoning below
> 1. First approach was to have separate function for tap and tun TX and RX.
> But this will introduce code duplication, hence reworked the code as above.

I agree. Avoiding code duplication is a good approach. 

> 2. During my internal testing assigning dummy value for protocol field in TAP
> packets, did not show a difference in behaviour. May be there are some
> specific cases this failing.
> 
> If there difference in behaviour, can please share the same?
> 

Please consider the following example:
I am running testpmd with a TAP device, --forward-mode=csum. 
I am injecting a TCP packet, which is forwarded back (mac addresses swapped) to the sender. 
Using gdb I set a breakpoint at pmd_tx_burst() in file rte_eth_tap.c

Looking at the following code inside pmd_tx_burst():

527                 char *buff_data = rte_pktmbuf_mtod(seg, void *);
528                 j = (*buff_data & 0xf0);
529                 pi.proto = (j == 0x40) ? 0x0008 :
530                                 (j == 0x60) ? 0xdd86 : 0x00;

I am printing the first 20 bytes of buff_data in line 527:

(gdb) p/x *(unsigned char *)buff_data@20
$3 = {0x0, 0x25, 0x88, 0x10, 0x66, 0x2, 0xf4, 0x52, 0x14, 0x7a, 0x59, 0x81, 0x8, 0x0, 0x45, 0x0, 0x4, 0xdf, 0x0, 0x1}

The gdb printout refers to:
6 bytes of destination MAC address: 0x0, 0x25, 0x88, 0x10, 0x66, 0x2
6 bytes of source MAC address: 0xf4, 0x52, 0x14, 0x7a, 0x59, 0x81
2 bytes of Ethernet type: 0x8, 0x0 - (IPv4)
IP header starting with 0x45, ... which is the byte (0x45) that "j" should have looked at

In the case of TAP - buff_data starts with the destination MAC address of the sender (0x0, ...). 
The code in line 528 expects that buff_data would start with an IP header protocol (e.g. 0x45), but it is not the case for TAP.
In my case j=0x0 (line 528) which is harmless (as it ends up with setting pi.proto=0x00, which is correct for TAP).
However, if the sender had an Intel NIC - the destination MAC address could have started with:
$3 = {0x40, 0x25, 0xC2, ...
Or-
$3 = {0x64, 0xD4, 0xDA, ...

as 4025C2 and 64D4DA are reserved prefixes for Intel Ethernet MAC addresses, see: http://www.coffer.com/mac_find/?string=intel

In this case pi.proto could end up with 0x0008 or 0xdd86 instead of 0x0 as expected for TAP.

I hope that this example clarifies the bug I am referring to. 

> >
> > 2. If the first TUN byte contains 0x2X (which is neither IPv4 nor
> > IPv6) it will end up by setting ip.proto as 0xdd86.
> > Please explain how this logic will work for non-IP packets in TUN
> 
> I see your point. You are correct about this. Thanks for pointing out, may I
> send correction for this as
> 
> """
> -		if (j & (0x40 | 0x60))
> -			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
> +		pi.proto = (j == 0x40) ? 0x0008 :
> +					(j == 0x60) ? 0xdd86 :
> +					0x00;
> """

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-20 21:49         ` Ophir Munk
@ 2018-04-20 21:58           ` Ophir Munk
  2018-04-21 15:09             ` Varghese, Vipin
  0 siblings, 1 reply; 21+ messages in thread
From: Ophir Munk @ 2018-04-20 21:58 UTC (permalink / raw)
  To: Varghese, Vipin, dev, pascal.mazon, Yigit, Ferruh,
	Thomas Monjalon, Olga Shern, Shahaf Shuler

Hi Vipin,
I missed your point:
You claim that TAP should work regardless of any pi.proto values.
Can you confirm that for ALL kernels versions (past and future)?

> -----Original Message-----
> From: Ophir Munk
> Sent: Saturday, April 21, 2018 12:49 AM
> To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org;
> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Thomas
> Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
> Shahaf Shuler <shahafs@mellanox.com>
> Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> 
> Hi Vipin,
> 
> Please find comments inline.
> 
> > -----Original Message-----
> > From: Varghese, Vipin [mailto:vipin.varghese@intel.com]
> > Sent: Friday, April 13, 2018 6:18 AM
> > To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org;
> > pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Thomas
> > Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
> > Shahaf Shuler <shahafs@mellanox.com>
> > Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> >
> > Hi Ophir,
> >
> > Please find my answers inline to the queries.
> >
> > > -----Original Message-----
> > > From: Ophir Munk [mailto:ophirmu@mellanox.com]
> > > Sent: Thursday, April 12, 2018 5:19 PM
> > > To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org;
> > > pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > > Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> > > <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> > > Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> > >
> > > Hi Vipin,
> > > This patch (adding TUN to TAP) has been Acked and accepted in
> > > next-net branch.
> > > I have some questions regarding the implementation (please find below).
> > >
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Vipin
> > > > Varghese
> > > > Sent: Tuesday, April 03, 2018 12:38 AM
> > > > To: dev@dpdk.org; pascal.mazon@6wind.com; ferruh.yigit@intel.com
> > > > Cc: Vipin Varghese <vipin.varghese@intel.com>
> > > > Subject: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> > > >
> > > > The change adds functional TUN PMD logic to the existing TAP PMD.
> > > > TUN PMD can be initialized with 'net_tunX' where 'X' represents
> > > > unique
> > id.
> > > > PMD supports argument interface, while MAC address and remote are
> > > > not supported.
> > > >
> > >
> > > [...]
> > >
> > > >
> > > > +		/*
> > > > +		 * TUN and TAP are created with IFF_NO_PI disabled.
> > > > +		 * For TUN PMD this mandatory as fields are used by
> > > > +		 * Kernel tun.c to determine whether its IP or non IP
> > > > +		 * packets.
> > > > +		 *
> > > > +		 * The logic fetches the first byte of data from mbuf.
> > > > +		 * compares whether its v4 or v6. If none matches default
> > > > +		 * value 0x00 is taken for protocol field.
> > > > +		 */
> > > > +		char *buff_data = rte_pktmbuf_mtod(seg, void *);
> > > > +		j = (*buff_data & 0xf0);
> > > > +		if (j & (0x40 | 0x60))
> > > > +			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
> > > > +
> > >
> > > 1. Accessing the first byte here assumes it is the first IP header
> > > byte (layer 3) which is correct for TUN.
> > > For TAP however the first byte belongs to Ethernet destination
> > > address (layer 2).
> > > Please explain how this logic will work for TAP.
> >
> > Based on linux code base '/driver/net/tap.c' and '/driver/net/tun.c'
> > from 3.13. to  4.16,
> >
> > Please find my observation below
> > 1. File: tun.c, function: tun_get_user, check for 'tun->flags &
> > TUN_TYPE_MASK' is done and if non ip is taken counter 'rx_dropped' is
> > updated.
> > 2. File: tap.c, there are no checks for 'tap->flags' for IFF_NO_PI in
> > rx data path. Counter 'rx_dropped' is updated in 'tap_handle_frame'.
> >
> 
> I understand that in kernel implementation there is no check for tap->flags in
> file tap.c, however I think there is a bug in dpdk rte_eth_tap.c file.
> Please find below an example which demonstrates this claim.
> 
> > Please find my reasoning below
> > 1. First approach was to have separate function for tap and tun TX and RX.
> > But this will introduce code duplication, hence reworked the code as
> above.
> 
> I agree. Avoiding code duplication is a good approach.
> 
> > 2. During my internal testing assigning dummy value for protocol field
> > in TAP packets, did not show a difference in behaviour. May be there
> > are some specific cases this failing.
> >
> > If there difference in behaviour, can please share the same?
> >
> 
> Please consider the following example:
> I am running testpmd with a TAP device, --forward-mode=csum.
> I am injecting a TCP packet, which is forwarded back (mac addresses
> swapped) to the sender.
> Using gdb I set a breakpoint at pmd_tx_burst() in file rte_eth_tap.c
> 
> Looking at the following code inside pmd_tx_burst():
> 
> 527                 char *buff_data = rte_pktmbuf_mtod(seg, void *);
> 528                 j = (*buff_data & 0xf0);
> 529                 pi.proto = (j == 0x40) ? 0x0008 :
> 530                                 (j == 0x60) ? 0xdd86 : 0x00;
> 
> I am printing the first 20 bytes of buff_data in line 527:
> 
> (gdb) p/x *(unsigned char *)buff_data@20
> $3 = {0x0, 0x25, 0x88, 0x10, 0x66, 0x2, 0xf4, 0x52, 0x14, 0x7a, 0x59, 0x81,
> 0x8, 0x0, 0x45, 0x0, 0x4, 0xdf, 0x0, 0x1}
> 
> The gdb printout refers to:
> 6 bytes of destination MAC address: 0x0, 0x25, 0x88, 0x10, 0x66, 0x2
> 6 bytes of source MAC address: 0xf4, 0x52, 0x14, 0x7a, 0x59, 0x81
> 2 bytes of Ethernet type: 0x8, 0x0 - (IPv4) IP header starting with 0x45, ...
> which is the byte (0x45) that "j" should have looked at
> 
> In the case of TAP - buff_data starts with the destination MAC address of the
> sender (0x0, ...).
> The code in line 528 expects that buff_data would start with an IP header
> protocol (e.g. 0x45), but it is not the case for TAP.
> In my case j=0x0 (line 528) which is harmless (as it ends up with setting
> pi.proto=0x00, which is correct for TAP).
> However, if the sender had an Intel NIC - the destination MAC address could
> have started with:
> $3 = {0x40, 0x25, 0xC2, ...
> Or-
> $3 = {0x64, 0xD4, 0xDA, ...
> 
> as 4025C2 and 64D4DA are reserved prefixes for Intel Ethernet MAC
> addresses, see: http://www.coffer.com/mac_find/?string=intel
> 
> In this case pi.proto could end up with 0x0008 or 0xdd86 instead of 0x0 as
> expected for TAP.
> 
> I hope that this example clarifies the bug I am referring to.
> 
> > >
> > > 2. If the first TUN byte contains 0x2X (which is neither IPv4 nor
> > > IPv6) it will end up by setting ip.proto as 0xdd86.
> > > Please explain how this logic will work for non-IP packets in TUN
> >
> > I see your point. You are correct about this. Thanks for pointing out,
> > may I send correction for this as
> >
> > """
> > -		if (j & (0x40 | 0x60))
> > -			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
> > +		pi.proto = (j == 0x40) ? 0x0008 :
> > +					(j == 0x60) ? 0xdd86 :
> > +					0x00;
> > """

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-20 21:58           ` Ophir Munk
@ 2018-04-21 15:09             ` Varghese, Vipin
  2018-04-23 12:58               ` Varghese, Vipin
  0 siblings, 1 reply; 21+ messages in thread
From: Varghese, Vipin @ 2018-04-21 15:09 UTC (permalink / raw)
  To: Ophir Munk, dev, pascal.mazon, Yigit, Ferruh, Thomas Monjalon,
	Olga Shern, Shahaf Shuler

Hi Ophir,

<Snip>

> Hi Vipin,
> I missed your point:
> You claim that TAP should work regardless of any pi.proto values.
> Can you confirm that for ALL kernels versions (past and future)?

I have tested with 3.13.0 , 4.4.0 with patch fix.

> 
> > -----Original Message-----
> > From: Ophir Munk
> > Sent: Saturday, April 21, 2018 12:49 AM
> > To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org;
> > pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Thomas
> > Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
> > Shahaf Shuler <shahafs@mellanox.com>
> > Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> >
> > Hi Vipin,
> >
> > Please find comments inline.
> >
> > > -----Original Message-----
> > > From: Varghese, Vipin [mailto:vipin.varghese@intel.com]
> > > Sent: Friday, April 13, 2018 6:18 AM
> > > To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org;
> > > pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > > Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> > > <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> > > Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> > >

<Snip>

> > > > 1. Accessing the first byte here assumes it is the first IP header
> > > > byte (layer 3) which is correct for TUN.
> > > > For TAP however the first byte belongs to Ethernet destination
> > > > address (layer 2).
> > > > Please explain how this logic will work for TAP.
> > >
> > > Based on linux code base '/driver/net/tap.c' and '/driver/net/tun.c'
> > > from 3.13. to  4.16,
> > >
> > > Please find my observation below
> > > 1. File: tun.c, function: tun_get_user, check for 'tun->flags &
> > > TUN_TYPE_MASK' is done and if non ip is taken counter 'rx_dropped'
> > > is updated.
> > > 2. File: tap.c, there are no checks for 'tap->flags' for IFF_NO_PI
> > > in rx data path. Counter 'rx_dropped' is updated in 'tap_handle_frame'.
> > >
> >
> > I understand that in kernel implementation there is no check for
> > tap->flags in file tap.c, however I think there is a bug in dpdk rte_eth_tap.c
> file.
> > Please find below an example which demonstrates this claim.
> >
> > > Please find my reasoning below
> > > 1. First approach was to have separate function for tap and tun TX and RX.
> > > But this will introduce code duplication, hence reworked the code as
> > above.
> >
> > I agree. Avoiding code duplication is a good approach.
> >
> > > 2. During my internal testing assigning dummy value for protocol
> > > field in TAP packets, did not show a difference in behaviour. May be
> > > there are some specific cases this failing.
> > >
> > > If there difference in behaviour, can please share the same?
> > >
> >
> > Please consider the following example:
> > I am running testpmd with a TAP device, --forward-mode=csum.
> > I am injecting a TCP packet, which is forwarded back (mac addresses
> > swapped) to the sender.
> > Using gdb I set a breakpoint at pmd_tx_burst() in file rte_eth_tap.c
> >
> > Looking at the following code inside pmd_tx_burst():
> >
> > 527                 char *buff_data = rte_pktmbuf_mtod(seg, void *);
> > 528                 j = (*buff_data & 0xf0);
> > 529                 pi.proto = (j == 0x40) ? 0x0008 :
> > 530                                 (j == 0x60) ? 0xdd86 : 0x00;
> >
> > I am printing the first 20 bytes of buff_data in line 527:
> >
> > (gdb) p/x *(unsigned char *)buff_data@20
> > $3 = {0x0, 0x25, 0x88, 0x10, 0x66, 0x2, 0xf4, 0x52, 0x14, 0x7a, 0x59,
> > 0x81, 0x8, 0x0, 0x45, 0x0, 0x4, 0xdf, 0x0, 0x1}
> >
> > The gdb printout refers to:
> > 6 bytes of destination MAC address: 0x0, 0x25, 0x88, 0x10, 0x66, 0x2
> > 6 bytes of source MAC address: 0xf4, 0x52, 0x14, 0x7a, 0x59, 0x81
> > 2 bytes of Ethernet type: 0x8, 0x0 - (IPv4) IP header starting with 0x45, ...
> > which is the byte (0x45) that "j" should have looked at
> >
> > In the case of TAP - buff_data starts with the destination MAC address
> > of the sender (0x0, ...).
> > The code in line 528 expects that buff_data would start with an IP
> > header protocol (e.g. 0x45), but it is not the case for TAP.
> > In my case j=0x0 (line 528) which is harmless (as it ends up with
> > setting pi.proto=0x00, which is correct for TAP).
> > However, if the sender had an Intel NIC - the destination MAC address
> > could have started with:
> > $3 = {0x40, 0x25, 0xC2, ...
> > Or-
> > $3 = {0x64, 0xD4, 0xDA, ...
> >
> > as 4025C2 and 64D4DA are reserved prefixes for Intel Ethernet MAC
> > addresses, see: http://www.coffer.com/mac_find/?string=intel
> >
> > In this case pi.proto could end up with 0x0008 or 0xdd86 instead of
> > 0x0 as expected for TAP.
> >
> > I hope that this example clarifies the bug I am referring to.
> >

Thanks for sharing detailed example overview. But as you mentioned this will break ' 4025C2' and ' 64D4DA', 
This will not solve for the correction patch  ' https://dpdk.org/dev/patchwork/patch/37986/'. 

Only choice left is separate tx_burst for TAP and TUN PMD, as we do not want to check PMD type on each call. 

Questions:
1) Is this ok to split tx_burst and have redundant code?
2) Does applications transparently send packets coming from Physical NIC to TAP interface? Does not the application
Modifies the DEST MAC addr to TAP interface?

> > > >
> > > > 2. If the first TUN byte contains 0x2X (which is neither IPv4 nor
> > > > IPv6) it will end up by setting ip.proto as 0xdd86.
> > > > Please explain how this logic will work for non-IP packets in TUN
> > >
> > > I see your point. You are correct about this. Thanks for pointing
> > > out, may I send correction for this as
> > >
> > > """
> > > -		if (j & (0x40 | 0x60))
> > > -			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
> > > +		pi.proto = (j == 0x40) ? 0x0008 :
> > > +					(j == 0x60) ? 0xdd86 :
> > > +					0x00;
> > > """

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-21 15:09             ` Varghese, Vipin
@ 2018-04-23 12:58               ` Varghese, Vipin
  2018-04-23 15:42                 ` Ferruh Yigit
  0 siblings, 1 reply; 21+ messages in thread
From: Varghese, Vipin @ 2018-04-23 12:58 UTC (permalink / raw)
  To: Varghese, Vipin, Ophir Munk, dev, pascal.mazon, Yigit, Ferruh,
	Thomas Monjalon, Olga Shern, Shahaf Shuler

Hi Ophir,

Can you help me with the investigation with the following information?
1) The kernel or distro in which the TAP proto flag set breaks the logic?
2) Is the above still valid even after applying the patch ' https://dpdk.org/dev/patchwork/patch/37986/'?

Note: I am testing with 3.13.0, 4.4.0 and 4.13.0.

Thanks
Vipin Varghese

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Varghese, Vipin
> Sent: Saturday, April 21, 2018 8:40 PM
> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org;
> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Thomas
> Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
> Shahaf Shuler <shahafs@mellanox.com>
> Subject: Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> 
> Hi Ophir,
> 
> <Snip>
> 
> > Hi Vipin,
> > I missed your point:
> > You claim that TAP should work regardless of any pi.proto values.
> > Can you confirm that for ALL kernels versions (past and future)?
> 
> I have tested with 3.13.0 , 4.4.0 with patch fix.
> 
> >
> > > -----Original Message-----
> > > From: Ophir Munk
> > > Sent: Saturday, April 21, 2018 12:49 AM
> > > To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org;
> > > pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > > Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> > > <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> > > Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> > >
> > > Hi Vipin,
> > >
> > > Please find comments inline.
> > >
> > > > -----Original Message-----
> > > > From: Varghese, Vipin [mailto:vipin.varghese@intel.com]
> > > > Sent: Friday, April 13, 2018 6:18 AM
> > > > To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org;
> > > > pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > > > Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> > > > <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> > > > Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> > > >
> 
> <Snip>
> 
> > > > > 1. Accessing the first byte here assumes it is the first IP
> > > > > header byte (layer 3) which is correct for TUN.
> > > > > For TAP however the first byte belongs to Ethernet destination
> > > > > address (layer 2).
> > > > > Please explain how this logic will work for TAP.
> > > >
> > > > Based on linux code base '/driver/net/tap.c' and '/driver/net/tun.c'
> > > > from 3.13. to  4.16,
> > > >
> > > > Please find my observation below
> > > > 1. File: tun.c, function: tun_get_user, check for 'tun->flags &
> > > > TUN_TYPE_MASK' is done and if non ip is taken counter 'rx_dropped'
> > > > is updated.
> > > > 2. File: tap.c, there are no checks for 'tap->flags' for IFF_NO_PI
> > > > in rx data path. Counter 'rx_dropped' is updated in 'tap_handle_frame'.
> > > >
> > >
> > > I understand that in kernel implementation there is no check for
> > > tap->flags in file tap.c, however I think there is a bug in dpdk
> > > tap->rte_eth_tap.c
> > file.
> > > Please find below an example which demonstrates this claim.
> > >
> > > > Please find my reasoning below
> > > > 1. First approach was to have separate function for tap and tun TX and
> RX.
> > > > But this will introduce code duplication, hence reworked the code
> > > > as
> > > above.
> > >
> > > I agree. Avoiding code duplication is a good approach.
> > >
> > > > 2. During my internal testing assigning dummy value for protocol
> > > > field in TAP packets, did not show a difference in behaviour. May
> > > > be there are some specific cases this failing.
> > > >
> > > > If there difference in behaviour, can please share the same?
> > > >
> > >
> > > Please consider the following example:
> > > I am running testpmd with a TAP device, --forward-mode=csum.
> > > I am injecting a TCP packet, which is forwarded back (mac addresses
> > > swapped) to the sender.
> > > Using gdb I set a breakpoint at pmd_tx_burst() in file rte_eth_tap.c
> > >
> > > Looking at the following code inside pmd_tx_burst():
> > >
> > > 527                 char *buff_data = rte_pktmbuf_mtod(seg, void *);
> > > 528                 j = (*buff_data & 0xf0);
> > > 529                 pi.proto = (j == 0x40) ? 0x0008 :
> > > 530                                 (j == 0x60) ? 0xdd86 : 0x00;
> > >
> > > I am printing the first 20 bytes of buff_data in line 527:
> > >
> > > (gdb) p/x *(unsigned char *)buff_data@20
> > > $3 = {0x0, 0x25, 0x88, 0x10, 0x66, 0x2, 0xf4, 0x52, 0x14, 0x7a,
> > > 0x59, 0x81, 0x8, 0x0, 0x45, 0x0, 0x4, 0xdf, 0x0, 0x1}
> > >
> > > The gdb printout refers to:
> > > 6 bytes of destination MAC address: 0x0, 0x25, 0x88, 0x10, 0x66, 0x2
> > > 6 bytes of source MAC address: 0xf4, 0x52, 0x14, 0x7a, 0x59, 0x81
> > > 2 bytes of Ethernet type: 0x8, 0x0 - (IPv4) IP header starting with 0x45, ...
> > > which is the byte (0x45) that "j" should have looked at
> > >
> > > In the case of TAP - buff_data starts with the destination MAC
> > > address of the sender (0x0, ...).
> > > The code in line 528 expects that buff_data would start with an IP
> > > header protocol (e.g. 0x45), but it is not the case for TAP.
> > > In my case j=0x0 (line 528) which is harmless (as it ends up with
> > > setting pi.proto=0x00, which is correct for TAP).
> > > However, if the sender had an Intel NIC - the destination MAC
> > > address could have started with:
> > > $3 = {0x40, 0x25, 0xC2, ...
> > > Or-
> > > $3 = {0x64, 0xD4, 0xDA, ...
> > >
> > > as 4025C2 and 64D4DA are reserved prefixes for Intel Ethernet MAC
> > > addresses, see: http://www.coffer.com/mac_find/?string=intel
> > >
> > > In this case pi.proto could end up with 0x0008 or 0xdd86 instead of
> > > 0x0 as expected for TAP.
> > >
> > > I hope that this example clarifies the bug I am referring to.
> > >
> 
> Thanks for sharing detailed example overview. But as you mentioned this will
> break ' 4025C2' and ' 64D4DA', This will not solve for the correction patch  '
> https://dpdk.org/dev/patchwork/patch/37986/'.
> 
> Only choice left is separate tx_burst for TAP and TUN PMD, as we do not
> want to check PMD type on each call.
> 
> Questions:
> 1) Is this ok to split tx_burst and have redundant code?
> 2) Does applications transparently send packets coming from Physical NIC to
> TAP interface? Does not the application Modifies the DEST MAC addr to TAP
> interface?
> 
> > > > >
> > > > > 2. If the first TUN byte contains 0x2X (which is neither IPv4
> > > > > nor
> > > > > IPv6) it will end up by setting ip.proto as 0xdd86.
> > > > > Please explain how this logic will work for non-IP packets in
> > > > > TUN
> > > >
> > > > I see your point. You are correct about this. Thanks for pointing
> > > > out, may I send correction for this as
> > > >
> > > > """
> > > > -		if (j & (0x40 | 0x60))
> > > > -			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
> > > > +		pi.proto = (j == 0x40) ? 0x0008 :
> > > > +					(j == 0x60) ? 0xdd86 :
> > > > +					0x00;
> > > > """

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-23 12:58               ` Varghese, Vipin
@ 2018-04-23 15:42                 ` Ferruh Yigit
  2018-05-03  5:59                   ` Varghese, Vipin
  0 siblings, 1 reply; 21+ messages in thread
From: Ferruh Yigit @ 2018-04-23 15:42 UTC (permalink / raw)
  To: Varghese, Vipin, Ophir Munk, dev, pascal.mazon, Thomas Monjalon,
	Olga Shern, Shahaf Shuler

On 4/23/2018 1:58 PM, Varghese, Vipin wrote:
> Hi Ophir,
> 
> Can you help me with the investigation with the following information?
> 1) The kernel or distro in which the TAP proto flag set breaks the logic?

Hi Vipin,

I guess Ophir's point is not this is broken with some kernels but a valid field
set wrong for tap, perhaps someone can be using a custom kernel module to use
those fields, we can't know it.

Instead of duplicating [rt]x_burst() functions, I suggest creating a variable to
set if this is tun or tap and set pi.proto only for tun, this will lead less
comparison for tap and correct proto value.

> 2) Is the above still valid even after applying the patch ' https://dpdk.org/dev/patchwork/patch/37986/'?

I guess his concern is for tap, that some MAC addresses cause wrong pi.proto,
not for tun which your patch fixes.

> 
> Note: I am testing with 3.13.0, 4.4.0 and 4.13.0.
> 
> Thanks
> Vipin Varghese
> 
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Varghese, Vipin
>> Sent: Saturday, April 21, 2018 8:40 PM
>> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org;
>> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Thomas
>> Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
>> Shahaf Shuler <shahafs@mellanox.com>
>> Subject: Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
>>
>> Hi Ophir,
>>
>> <Snip>
>>
>>> Hi Vipin,
>>> I missed your point:
>>> You claim that TAP should work regardless of any pi.proto values.
>>> Can you confirm that for ALL kernels versions (past and future)?
>>
>> I have tested with 3.13.0 , 4.4.0 with patch fix.
>>
>>>
>>>> -----Original Message-----
>>>> From: Ophir Munk
>>>> Sent: Saturday, April 21, 2018 12:49 AM
>>>> To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org;
>>>> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
>>>> Thomas Monjalon <thomas@monjalon.net>; Olga Shern
>>>> <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
>>>> Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
>>>>
>>>> Hi Vipin,
>>>>
>>>> Please find comments inline.
>>>>
>>>>> -----Original Message-----
>>>>> From: Varghese, Vipin [mailto:vipin.varghese@intel.com]
>>>>> Sent: Friday, April 13, 2018 6:18 AM
>>>>> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org;
>>>>> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
>>>>> Thomas Monjalon <thomas@monjalon.net>; Olga Shern
>>>>> <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
>>>>> Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
>>>>>
>>
>> <Snip>
>>
>>>>>> 1. Accessing the first byte here assumes it is the first IP
>>>>>> header byte (layer 3) which is correct for TUN.
>>>>>> For TAP however the first byte belongs to Ethernet destination
>>>>>> address (layer 2).
>>>>>> Please explain how this logic will work for TAP.
>>>>>
>>>>> Based on linux code base '/driver/net/tap.c' and '/driver/net/tun.c'
>>>>> from 3.13. to  4.16,
>>>>>
>>>>> Please find my observation below
>>>>> 1. File: tun.c, function: tun_get_user, check for 'tun->flags &
>>>>> TUN_TYPE_MASK' is done and if non ip is taken counter 'rx_dropped'
>>>>> is updated.
>>>>> 2. File: tap.c, there are no checks for 'tap->flags' for IFF_NO_PI
>>>>> in rx data path. Counter 'rx_dropped' is updated in 'tap_handle_frame'.
>>>>>
>>>>
>>>> I understand that in kernel implementation there is no check for
>>>> tap->flags in file tap.c, however I think there is a bug in dpdk
>>>> tap->rte_eth_tap.c
>>> file.
>>>> Please find below an example which demonstrates this claim.
>>>>
>>>>> Please find my reasoning below
>>>>> 1. First approach was to have separate function for tap and tun TX and
>> RX.
>>>>> But this will introduce code duplication, hence reworked the code
>>>>> as
>>>> above.
>>>>
>>>> I agree. Avoiding code duplication is a good approach.
>>>>
>>>>> 2. During my internal testing assigning dummy value for protocol
>>>>> field in TAP packets, did not show a difference in behaviour. May
>>>>> be there are some specific cases this failing.
>>>>>
>>>>> If there difference in behaviour, can please share the same?
>>>>>
>>>>
>>>> Please consider the following example:
>>>> I am running testpmd with a TAP device, --forward-mode=csum.
>>>> I am injecting a TCP packet, which is forwarded back (mac addresses
>>>> swapped) to the sender.
>>>> Using gdb I set a breakpoint at pmd_tx_burst() in file rte_eth_tap.c
>>>>
>>>> Looking at the following code inside pmd_tx_burst():
>>>>
>>>> 527                 char *buff_data = rte_pktmbuf_mtod(seg, void *);
>>>> 528                 j = (*buff_data & 0xf0);
>>>> 529                 pi.proto = (j == 0x40) ? 0x0008 :
>>>> 530                                 (j == 0x60) ? 0xdd86 : 0x00;
>>>>
>>>> I am printing the first 20 bytes of buff_data in line 527:
>>>>
>>>> (gdb) p/x *(unsigned char *)buff_data@20
>>>> $3 = {0x0, 0x25, 0x88, 0x10, 0x66, 0x2, 0xf4, 0x52, 0x14, 0x7a,
>>>> 0x59, 0x81, 0x8, 0x0, 0x45, 0x0, 0x4, 0xdf, 0x0, 0x1}
>>>>
>>>> The gdb printout refers to:
>>>> 6 bytes of destination MAC address: 0x0, 0x25, 0x88, 0x10, 0x66, 0x2
>>>> 6 bytes of source MAC address: 0xf4, 0x52, 0x14, 0x7a, 0x59, 0x81
>>>> 2 bytes of Ethernet type: 0x8, 0x0 - (IPv4) IP header starting with 0x45, ...
>>>> which is the byte (0x45) that "j" should have looked at
>>>>
>>>> In the case of TAP - buff_data starts with the destination MAC
>>>> address of the sender (0x0, ...).
>>>> The code in line 528 expects that buff_data would start with an IP
>>>> header protocol (e.g. 0x45), but it is not the case for TAP.
>>>> In my case j=0x0 (line 528) which is harmless (as it ends up with
>>>> setting pi.proto=0x00, which is correct for TAP).
>>>> However, if the sender had an Intel NIC - the destination MAC
>>>> address could have started with:
>>>> $3 = {0x40, 0x25, 0xC2, ...
>>>> Or-
>>>> $3 = {0x64, 0xD4, 0xDA, ...
>>>>
>>>> as 4025C2 and 64D4DA are reserved prefixes for Intel Ethernet MAC
>>>> addresses, see: http://www.coffer.com/mac_find/?string=intel
>>>>
>>>> In this case pi.proto could end up with 0x0008 or 0xdd86 instead of
>>>> 0x0 as expected for TAP.
>>>>
>>>> I hope that this example clarifies the bug I am referring to.
>>>>
>>
>> Thanks for sharing detailed example overview. But as you mentioned this will
>> break ' 4025C2' and ' 64D4DA', This will not solve for the correction patch  '
>> https://dpdk.org/dev/patchwork/patch/37986/'.
>>
>> Only choice left is separate tx_burst for TAP and TUN PMD, as we do not
>> want to check PMD type on each call.
>>
>> Questions:
>> 1) Is this ok to split tx_burst and have redundant code?
>> 2) Does applications transparently send packets coming from Physical NIC to
>> TAP interface? Does not the application Modifies the DEST MAC addr to TAP
>> interface?
>>
>>>>>>
>>>>>> 2. If the first TUN byte contains 0x2X (which is neither IPv4
>>>>>> nor
>>>>>> IPv6) it will end up by setting ip.proto as 0xdd86.
>>>>>> Please explain how this logic will work for non-IP packets in
>>>>>> TUN
>>>>>
>>>>> I see your point. You are correct about this. Thanks for pointing
>>>>> out, may I send correction for this as
>>>>>
>>>>> """
>>>>> -		if (j & (0x40 | 0x60))
>>>>> -			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
>>>>> +		pi.proto = (j == 0x40) ? 0x0008 :
>>>>> +					(j == 0x60) ? 0xdd86 :
>>>>> +					0x00;
>>>>> """

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
  2018-04-23 15:42                 ` Ferruh Yigit
@ 2018-05-03  5:59                   ` Varghese, Vipin
  0 siblings, 0 replies; 21+ messages in thread
From: Varghese, Vipin @ 2018-05-03  5:59 UTC (permalink / raw)
  To: Yigit, Ferruh, Ophir Munk, dev, pascal.mazon, Thomas Monjalon,
	Olga Shern, Shahaf Shuler

Hi Ophir,

Shall I investigate and implement the suggestion from Ferruh to use variable logic?

Thanks
Vipin Varghese

> -----Original Message-----
> From: Yigit, Ferruh
> Sent: Monday, April 23, 2018 9:12 PM
> To: Varghese, Vipin <vipin.varghese@intel.com>; Ophir Munk
> <ophirmu@mellanox.com>; dev@dpdk.org; pascal.mazon@6wind.com;
> Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> Subject: Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> 
> On 4/23/2018 1:58 PM, Varghese, Vipin wrote:
> > Hi Ophir,
> >
> > Can you help me with the investigation with the following information?
> > 1) The kernel or distro in which the TAP proto flag set breaks the logic?
> 
> Hi Vipin,
> 
> I guess Ophir's point is not this is broken with some kernels but a valid field
> set wrong for tap, perhaps someone can be using a custom kernel module to
> use those fields, we can't know it.
> 
> Instead of duplicating [rt]x_burst() functions, I suggest creating a variable to
> set if this is tun or tap and set pi.proto only for tun, this will lead less
> comparison for tap and correct proto value.
> 
> > 2) Is the above still valid even after applying the patch '
> https://dpdk.org/dev/patchwork/patch/37986/'?
> 
> I guess his concern is for tap, that some MAC addresses cause wrong
> pi.proto, not for tun which your patch fixes.
> 
> >
> > Note: I am testing with 3.13.0, 4.4.0 and 4.13.0.
> >
> > Thanks
> > Vipin Varghese
> >
> >> -----Original Message-----
> >> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Varghese, Vipin
> >> Sent: Saturday, April 21, 2018 8:40 PM
> >> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org;
> >> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> >> Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> >> <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> >> Subject: Re: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> >>
> >> Hi Ophir,
> >>
> >> <Snip>
> >>
> >>> Hi Vipin,
> >>> I missed your point:
> >>> You claim that TAP should work regardless of any pi.proto values.
> >>> Can you confirm that for ALL kernels versions (past and future)?
> >>
> >> I have tested with 3.13.0 , 4.4.0 with patch fix.
> >>
> >>>
> >>>> -----Original Message-----
> >>>> From: Ophir Munk
> >>>> Sent: Saturday, April 21, 2018 12:49 AM
> >>>> To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org;
> >>>> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> >>>> Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> >>>> <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> >>>> Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> >>>>
> >>>> Hi Vipin,
> >>>>
> >>>> Please find comments inline.
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: Varghese, Vipin [mailto:vipin.varghese@intel.com]
> >>>>> Sent: Friday, April 13, 2018 6:18 AM
> >>>>> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org;
> >>>>> pascal.mazon@6wind.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> >>>>> Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> >>>>> <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> >>>>> Subject: RE: [dpdk-dev] [PATCH 1/2] net/tap: add tun support
> >>>>>
> >>
> >> <Snip>
> >>
> >>>>>> 1. Accessing the first byte here assumes it is the first IP
> >>>>>> header byte (layer 3) which is correct for TUN.
> >>>>>> For TAP however the first byte belongs to Ethernet destination
> >>>>>> address (layer 2).
> >>>>>> Please explain how this logic will work for TAP.
> >>>>>
> >>>>> Based on linux code base '/driver/net/tap.c' and '/driver/net/tun.c'
> >>>>> from 3.13. to  4.16,
> >>>>>
> >>>>> Please find my observation below
> >>>>> 1. File: tun.c, function: tun_get_user, check for 'tun->flags &
> >>>>> TUN_TYPE_MASK' is done and if non ip is taken counter 'rx_dropped'
> >>>>> is updated.
> >>>>> 2. File: tap.c, there are no checks for 'tap->flags' for IFF_NO_PI
> >>>>> in rx data path. Counter 'rx_dropped' is updated in
> 'tap_handle_frame'.
> >>>>>
> >>>>
> >>>> I understand that in kernel implementation there is no check for
> >>>> tap->flags in file tap.c, however I think there is a bug in dpdk
> >>>> tap->rte_eth_tap.c
> >>> file.
> >>>> Please find below an example which demonstrates this claim.
> >>>>
> >>>>> Please find my reasoning below
> >>>>> 1. First approach was to have separate function for tap and tun TX
> >>>>> and
> >> RX.
> >>>>> But this will introduce code duplication, hence reworked the code
> >>>>> as
> >>>> above.
> >>>>
> >>>> I agree. Avoiding code duplication is a good approach.
> >>>>
> >>>>> 2. During my internal testing assigning dummy value for protocol
> >>>>> field in TAP packets, did not show a difference in behaviour. May
> >>>>> be there are some specific cases this failing.
> >>>>>
> >>>>> If there difference in behaviour, can please share the same?
> >>>>>
> >>>>
> >>>> Please consider the following example:
> >>>> I am running testpmd with a TAP device, --forward-mode=csum.
> >>>> I am injecting a TCP packet, which is forwarded back (mac addresses
> >>>> swapped) to the sender.
> >>>> Using gdb I set a breakpoint at pmd_tx_burst() in file
> >>>> rte_eth_tap.c
> >>>>
> >>>> Looking at the following code inside pmd_tx_burst():
> >>>>
> >>>> 527                 char *buff_data = rte_pktmbuf_mtod(seg, void *);
> >>>> 528                 j = (*buff_data & 0xf0);
> >>>> 529                 pi.proto = (j == 0x40) ? 0x0008 :
> >>>> 530                                 (j == 0x60) ? 0xdd86 : 0x00;
> >>>>
> >>>> I am printing the first 20 bytes of buff_data in line 527:
> >>>>
> >>>> (gdb) p/x *(unsigned char *)buff_data@20
> >>>> $3 = {0x0, 0x25, 0x88, 0x10, 0x66, 0x2, 0xf4, 0x52, 0x14, 0x7a,
> >>>> 0x59, 0x81, 0x8, 0x0, 0x45, 0x0, 0x4, 0xdf, 0x0, 0x1}
> >>>>
> >>>> The gdb printout refers to:
> >>>> 6 bytes of destination MAC address: 0x0, 0x25, 0x88, 0x10, 0x66,
> >>>> 0x2
> >>>> 6 bytes of source MAC address: 0xf4, 0x52, 0x14, 0x7a, 0x59, 0x81
> >>>> 2 bytes of Ethernet type: 0x8, 0x0 - (IPv4) IP header starting with 0x45,
> ...
> >>>> which is the byte (0x45) that "j" should have looked at
> >>>>
> >>>> In the case of TAP - buff_data starts with the destination MAC
> >>>> address of the sender (0x0, ...).
> >>>> The code in line 528 expects that buff_data would start with an IP
> >>>> header protocol (e.g. 0x45), but it is not the case for TAP.
> >>>> In my case j=0x0 (line 528) which is harmless (as it ends up with
> >>>> setting pi.proto=0x00, which is correct for TAP).
> >>>> However, if the sender had an Intel NIC - the destination MAC
> >>>> address could have started with:
> >>>> $3 = {0x40, 0x25, 0xC2, ...
> >>>> Or-
> >>>> $3 = {0x64, 0xD4, 0xDA, ...
> >>>>
> >>>> as 4025C2 and 64D4DA are reserved prefixes for Intel Ethernet MAC
> >>>> addresses, see: http://www.coffer.com/mac_find/?string=intel
> >>>>
> >>>> In this case pi.proto could end up with 0x0008 or 0xdd86 instead of
> >>>> 0x0 as expected for TAP.
> >>>>
> >>>> I hope that this example clarifies the bug I am referring to.
> >>>>
> >>
> >> Thanks for sharing detailed example overview. But as you mentioned
> >> this will break ' 4025C2' and ' 64D4DA', This will not solve for the correction
> patch  '
> >> https://dpdk.org/dev/patchwork/patch/37986/'.
> >>
> >> Only choice left is separate tx_burst for TAP and TUN PMD, as we do
> >> not want to check PMD type on each call.
> >>
> >> Questions:
> >> 1) Is this ok to split tx_burst and have redundant code?
> >> 2) Does applications transparently send packets coming from Physical
> >> NIC to TAP interface? Does not the application Modifies the DEST MAC
> >> addr to TAP interface?
> >>
> >>>>>>
> >>>>>> 2. If the first TUN byte contains 0x2X (which is neither IPv4 nor
> >>>>>> IPv6) it will end up by setting ip.proto as 0xdd86.
> >>>>>> Please explain how this logic will work for non-IP packets in TUN
> >>>>>
> >>>>> I see your point. You are correct about this. Thanks for pointing
> >>>>> out, may I send correction for this as
> >>>>>
> >>>>> """
> >>>>> -		if (j & (0x40 | 0x60))
> >>>>> -			pi.proto = (j == 0x40) ? 0x0008 : 0xdd86;
> >>>>> +		pi.proto = (j == 0x40) ? 0x0008 :
> >>>>> +					(j == 0x60) ? 0xdd86 :
> >>>>> +					0x00;
> >>>>> """


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2018-05-03  5:59 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-20 23:12 [dpdk-dev] [PATCH v1] net/tap: add tun support Vipin Varghese
2018-02-22 12:22 ` [dpdk-dev] [dpdk-dev,v1] " Pascal Mazon
2018-02-26 11:01   ` Varghese, Vipin
2018-02-26  6:15 ` [dpdk-dev] [PATCH v2] " Vipin Varghese
2018-02-27 13:06   ` Pascal Mazon
2018-04-02 21:37   ` [dpdk-dev] [PATCH 1/2] " Vipin Varghese
2018-04-02 21:37     ` [dpdk-dev] [PATCH 2/2] net/tap: add tun log and documnetation Vipin Varghese
2018-04-03  8:27       ` Pascal Mazon
2018-04-03 10:05         ` Ferruh Yigit
2018-04-03 10:53           ` Pascal Mazon
2018-04-03 10:05         ` Ferruh Yigit
2018-04-06 17:11     ` [dpdk-dev] [PATCH 1/2] net/tap: add tun support Ferruh Yigit
2018-04-06 17:19       ` Ferruh Yigit
2018-04-12 11:49     ` Ophir Munk
2018-04-13  3:18       ` Varghese, Vipin
2018-04-20 21:49         ` Ophir Munk
2018-04-20 21:58           ` Ophir Munk
2018-04-21 15:09             ` Varghese, Vipin
2018-04-23 12:58               ` Varghese, Vipin
2018-04-23 15:42                 ` Ferruh Yigit
2018-05-03  5:59                   ` Varghese, Vipin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).