- * [dpdk-dev] [PATCH v8 01/10] librte_mbuf:the rte_mbuf structure changes
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 02/10] librte_ether:add the basic data structures of VxLAN Jijiang Liu
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
Replace the "reserved2" field with the "packet_type" field and add the "inner_l2_l3_len" field in the rte_mbuf structure.
The "packet_type" field is used to indicate ordinary packet format and also tunneling packet format such as IP in IP, IP in GRE, MAC in GRE and MAC in UDP.
The "inner_l2_len" and the "inner_l3_len" fields are added in the second cache line, they use 2 bytes for TX offloading of tunnels.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_mbuf/rte_mbuf.h |   25 ++++++++++++++++++++++++-
 1 files changed, 24 insertions(+), 1 deletions(-)
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index ddadc21..497d88b 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -163,7 +163,14 @@ struct rte_mbuf {
 
 	/* remaining bytes are set on RX when pulling packet from descriptor */
 	MARKER rx_descriptor_fields1;
-	uint16_t reserved2;       /**< Unused field. Required for padding */
+
+	/**
+	 * The packet type, which is used to indicate ordinary packet and also
+	 * tunneled packet format, i.e. each number is represented a type of
+	 * packet.
+	 */
+	uint16_t packet_type;
+
 	uint16_t data_len;        /**< Amount of data in segment buffer. */
 	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
 	uint16_t vlan_tci;        /**< VLAN Tag Control Identifier (CPU order) */
@@ -196,6 +203,18 @@ struct rte_mbuf {
 			uint16_t l2_len:7;      /**< L2 (MAC) Header Length. */
 		};
 	};
+
+	/* fields for TX offloading of tunnels */
+	union {
+		uint16_t inner_l2_l3_len;
+		/**< combined inner l2/l3 lengths as single var */
+		struct {
+			uint16_t inner_l3_len:9;
+			/**< inner L3 (IP) Header Length. */
+			uint16_t inner_l2_len:7;
+			/**< inner L2 (MAC) Header Length. */
+		};
+	};
 } __rte_cache_aligned;
 
 /**
@@ -546,11 +565,13 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
 	m->next = NULL;
 	m->pkt_len = 0;
 	m->l2_l3_len = 0;
+	m->inner_l2_l3_len = 0;
 	m->vlan_tci = 0;
 	m->nb_segs = 1;
 	m->port = 0xff;
 
 	m->ol_flags = 0;
+	m->packet_type = 0;
 	m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
 			RTE_PKTMBUF_HEADROOM : m->buf_len;
 
@@ -614,12 +635,14 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
 	mi->port = md->port;
 	mi->vlan_tci = md->vlan_tci;
 	mi->l2_l3_len = md->l2_l3_len;
+	mi->inner_l2_l3_len = md->inner_l2_l3_len;
 	mi->hash = md->hash;
 
 	mi->next = NULL;
 	mi->pkt_len = mi->data_len;
 	mi->nb_segs = 1;
 	mi->ol_flags = md->ol_flags;
+	mi->packet_type = md->packet_type;
 
 	__rte_mbuf_sanity_check(mi, 1);
 	__rte_mbuf_sanity_check(md, 0);
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * [dpdk-dev] [PATCH v8 02/10] librte_ether:add the basic data structures of VxLAN
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 01/10] librte_mbuf:the rte_mbuf structure changes Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 03/10] librte_ether:add VxLAN packet identification API Jijiang Liu
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
Add definations of basic data structures of VxLAN.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_ether/rte_eth_ctrl.h |   12 ++++++++++++
 lib/librte_ether/rte_ethdev.h   |    8 ++++++++
 lib/librte_ether/rte_ether.h    |   13 +++++++++++++
 3 files changed, 33 insertions(+), 0 deletions(-)
diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h
index df21ac6..9a90d19 100644
--- a/lib/librte_ether/rte_eth_ctrl.h
+++ b/lib/librte_ether/rte_eth_ctrl.h
@@ -71,6 +71,18 @@ enum rte_filter_op {
 	RTE_ETH_FILTER_OP_MAX
 };
 
+/**
+ * Tunneled type.
+ */
+enum rte_eth_tunnel_type {
+	RTE_TUNNEL_TYPE_NONE = 0,
+	RTE_TUNNEL_TYPE_VXLAN,
+	RTE_TUNNEL_TYPE_GENEVE,
+	RTE_TUNNEL_TYPE_TEREDO,
+	RTE_TUNNEL_TYPE_NVGRE,
+	RTE_TUNNEL_TYPE_MAX,
+};
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index b69a6af..46a5568 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -710,6 +710,14 @@ struct rte_fdir_conf {
 };
 
 /**
+ * UDP tunneling configuration.
+ */
+struct rte_eth_udp_tunnel {
+	uint16_t udp_port;
+	uint8_t prot_type;
+};
+
+/**
  *  Possible l4type of FDIR filters.
  */
 enum rte_l4type {
diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
index 2e08f23..100cc52 100644
--- a/lib/librte_ether/rte_ether.h
+++ b/lib/librte_ether/rte_ether.h
@@ -286,6 +286,16 @@ struct vlan_hdr {
 	uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */
 } __attribute__((__packed__));
 
+/**
+ * VXLAN protocol header.
+ * Contains the 8-bit flag, 24-bit VXLAN Network Identifier and
+ * Reserved fields (24 bits and 8 bits)
+ */
+struct vxlan_hdr {
+	uint32_t vx_flags; /**< flag (8) + Reserved (24). */
+	uint32_t vx_vni;   /**< VNI (24) + Reserved (8). */
+} __attribute__((__packed__));
+
 /* Ethernet frame types */
 #define ETHER_TYPE_IPv4 0x0800 /**< IPv4 Protocol. */
 #define ETHER_TYPE_IPv6 0x86DD /**< IPv6 Protocol. */
@@ -294,6 +304,9 @@ struct vlan_hdr {
 #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
 #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
 
+#define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr))
+/**< VxLAN tunnel header length. */
+
 #ifdef __cplusplus
 }
 #endif
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * [dpdk-dev] [PATCH v8 03/10] librte_ether:add VxLAN packet identification API
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 01/10] librte_mbuf:the rte_mbuf structure changes Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 02/10] librte_ether:add the basic data structures of VxLAN Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 04/10] i40e:support VxLAN packet identification in i40e Jijiang Liu
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
There are "some" destination UDP port numbers that have unque meaning.
In terms of VxLAN, "IANA has assigned the value 4789 for the VXLAN UDP port, and this value SHOULD be used by default as the destination UDP port. Some early implementations of VXLAN have used other values for the destination port. To enable interoperability with these implementations, the destination port SHOULD be configurable."
Add two APIs in librte_ether for supporting UDP tunneling port configuration on i40e.
Currently, only VxLAN is implemented in this patch set.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_ether/rte_ethdev.c |   52 +++++++++++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.h |   46 ++++++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+), 0 deletions(-)
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 50f10d9..ff1c769 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -2038,6 +2038,58 @@ rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
 }
 
 int
+rte_eth_dev_udp_tunnel_add(uint8_t port_id,
+			   struct rte_eth_udp_tunnel *udp_tunnel)
+{
+	struct rte_eth_dev *dev;
+
+	if (port_id >= nb_ports) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	if (udp_tunnel == NULL) {
+		PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n");
+		return -EINVAL;
+	}
+
+	if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
+		PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_add, -ENOTSUP);
+	return (*dev->dev_ops->udp_tunnel_add)(dev, udp_tunnel);
+}
+
+int
+rte_eth_dev_udp_tunnel_delete(uint8_t port_id,
+			      struct rte_eth_udp_tunnel *udp_tunnel)
+{
+	struct rte_eth_dev *dev;
+
+	if (port_id >= nb_ports) {
+		PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+	dev = &rte_eth_devices[port_id];
+
+	if (udp_tunnel == NULL) {
+		PMD_DEBUG_TRACE("Invalid udp_tunnel parametr\n");
+		return -EINVAL;
+	}
+
+	if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
+		PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		return -EINVAL;
+	}
+
+	FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_del, -ENOTSUP);
+	return (*dev->dev_ops->udp_tunnel_del)(dev, udp_tunnel);
+}
+
+int
 rte_eth_led_on(uint8_t port_id)
 {
 	struct rte_eth_dev *dev;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 46a5568..8bf274d 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1274,6 +1274,15 @@ typedef int (*eth_mirror_rule_reset_t)(struct rte_eth_dev *dev,
 				  uint8_t rule_id);
 /**< @internal Remove a traffic mirroring rule on an Ethernet device */
 
+typedef int (*eth_udp_tunnel_add_t)(struct rte_eth_dev *dev,
+				    struct rte_eth_udp_tunnel *tunnel_udp);
+/**< @internal Add tunneling UDP info */
+
+typedef int (*eth_udp_tunnel_del_t)(struct rte_eth_dev *dev,
+				    struct rte_eth_udp_tunnel *tunnel_udp);
+/**< @internal Delete tunneling UDP info */
+
+
 #ifdef RTE_NIC_BYPASS
 
 enum {
@@ -1454,6 +1463,8 @@ struct eth_dev_ops {
 	eth_set_vf_rx_t            set_vf_rx;  /**< enable/disable a VF receive */
 	eth_set_vf_tx_t            set_vf_tx;  /**< enable/disable a VF transmit */
 	eth_set_vf_vlan_filter_t   set_vf_vlan_filter;  /**< Set VF VLAN filter */
+	eth_udp_tunnel_add_t       udp_tunnel_add;
+	eth_udp_tunnel_del_t       udp_tunnel_del;
 	eth_set_queue_rate_limit_t set_queue_rate_limit;   /**< Set queue rate limit */
 	eth_set_vf_rate_limit_t    set_vf_rate_limit;   /**< Set VF rate limit */
 
@@ -3350,6 +3361,41 @@ int
 rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
 			      struct rte_eth_rss_conf *rss_conf);
 
+ /**
+ * Add UDP tunneling port of an Ethernet device for filtering a specific
+ * tunneling packet by UDP port number.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tunnel_udp
+ *   UDP tunneling configuration.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support tunnel type.
+ */
+int
+rte_eth_dev_udp_tunnel_add(uint8_t port_id,
+			   struct rte_eth_udp_tunnel *tunnel_udp);
+
+ /**
+ * Detete UDP tunneling port configuration of Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tunnel_udp
+ *   UDP tunneling configuration.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support tunnel type.
+ */
+int
+rte_eth_dev_udp_tunnel_delete(uint8_t port_id,
+			      struct rte_eth_udp_tunnel *tunnel_udp);
+
 /**
  * add syn filter
  *
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * [dpdk-dev] [PATCH v8 04/10] i40e:support VxLAN packet identification in i40e
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
                   ` (2 preceding siblings ...)
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 03/10] librte_ether:add VxLAN packet identification API Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 05/10] app/test-pmd:test VxLAN packet identification Jijiang Liu
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
Implement the configuration API of VxLAN destination UDP port in librte_pmd_i40e,
and add new Rx offload flags for supporting VXLAN packet offload.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_mbuf/rte_mbuf.h        |    2 +
 lib/librte_pmd_i40e/i40e_ethdev.c |  157 +++++++++++++++++++++++++++++++++++++
 lib/librte_pmd_i40e/i40e_ethdev.h |    8 ++-
 lib/librte_pmd_i40e/i40e_rxtx.c   |  105 +++++++++++++-----------
 4 files changed, 223 insertions(+), 49 deletions(-)
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 497d88b..9af3bd9 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -91,6 +91,8 @@ extern "C" {
 #define PKT_RX_IPV6_HDR_EXT  (1ULL << 8)  /**< RX packet with extended IPv6 header. */
 #define PKT_RX_IEEE1588_PTP  (1ULL << 9)  /**< RX IEEE1588 L2 Ethernet PT Packet. */
 #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
+#define PKT_RX_TUNNEL_IPV4_HDR (1ULL << 11) /**< RX tunnel packet with IPv4 header.*/
+#define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
 
 #define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
 #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c b/lib/librte_pmd_i40e/i40e_ethdev.c
index 3b75f0f..eb643e5 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -186,6 +186,10 @@ static int i40e_dev_rss_hash_update(struct rte_eth_dev *dev,
 				    struct rte_eth_rss_conf *rss_conf);
 static int i40e_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
 				      struct rte_eth_rss_conf *rss_conf);
+static int i40e_dev_udp_tunnel_add(struct rte_eth_dev *dev,
+				struct rte_eth_udp_tunnel *udp_tunnel);
+static int i40e_dev_udp_tunnel_del(struct rte_eth_dev *dev,
+				struct rte_eth_udp_tunnel *udp_tunnel);
 static int i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
 				enum rte_filter_type filter_type,
 				enum rte_filter_op filter_op,
@@ -241,6 +245,8 @@ static struct eth_dev_ops i40e_eth_dev_ops = {
 	.reta_query                   = i40e_dev_rss_reta_query,
 	.rss_hash_update              = i40e_dev_rss_hash_update,
 	.rss_hash_conf_get            = i40e_dev_rss_hash_conf_get,
+	.udp_tunnel_add               = i40e_dev_udp_tunnel_add,
+	.udp_tunnel_del               = i40e_dev_udp_tunnel_del,
 	.filter_ctrl                  = i40e_dev_filter_ctrl,
 };
 
@@ -4092,6 +4098,157 @@ i40e_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
+static int
+i40e_get_vxlan_port_idx(struct i40e_pf *pf, uint16_t port)
+{
+	uint8_t i;
+
+	for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
+		if (pf->vxlan_ports[i] == port)
+			return i;
+	}
+
+	return -1;
+}
+
+static int
+i40e_add_vxlan_port(struct i40e_pf *pf, uint16_t port)
+{
+	int  idx, ret;
+	uint8_t filter_idx;
+	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+
+	idx = i40e_get_vxlan_port_idx(pf, port);
+
+	/* Check if port already exists */
+	if (idx >= 0) {
+		PMD_DRV_LOG(ERR, "Port %d already offloaded\n", port);
+		return -EINVAL;
+	}
+
+	/* Now check if there is space to add the new port */
+	idx = i40e_get_vxlan_port_idx(pf, 0);
+	if (idx < 0) {
+		PMD_DRV_LOG(ERR, "Maximum number of UDP ports reached,"
+			"not adding port %d\n", port);
+		return -ENOSPC;
+	}
+
+	ret =  i40e_aq_add_udp_tunnel(hw, port, I40E_AQC_TUNNEL_TYPE_VXLAN,
+					&filter_idx, NULL);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to add VxLAN UDP port %d\n", port);
+		return -1;
+	}
+
+	PMD_DRV_LOG(INFO, "Added %s port %d with AQ command with index %d\n",
+			 port,  filter_index);
+
+	/* New port: add it and mark its index in the bitmap */
+	pf->vxlan_ports[idx] = port;
+	pf->vxlan_bitmap |= (1 << idx);
+
+	if (!(pf->flags & I40E_FLAG_VXLAN))
+		pf->flags |= I40E_FLAG_VXLAN;
+
+	return 0;
+}
+
+static int
+i40e_del_vxlan_port(struct i40e_pf *pf, uint16_t port)
+{
+	int idx;
+	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+
+	if (!(pf->flags & I40E_FLAG_VXLAN)) {
+		PMD_DRV_LOG(ERR, "VxLAN UDP port was not configured.\n");
+		return -EINVAL;
+	}
+
+	idx = i40e_get_vxlan_port_idx(pf, port);
+
+	if (idx < 0) {
+		PMD_DRV_LOG(ERR, "Port %d doesn't exist\n", port);
+		return -EINVAL;
+	}
+
+	if (i40e_aq_del_udp_tunnel(hw, idx, NULL) < 0) {
+		PMD_DRV_LOG(ERR, "Failed to delete VxLAN UDP port %d\n", port);
+		return -1;
+	}
+
+	PMD_DRV_LOG(INFO, "Deleted port %d with AQ command with index %d\n",
+			port, idx);
+
+	pf->vxlan_ports[idx] = 0;
+	pf->vxlan_bitmap &= ~(1 << idx);
+
+	if (!pf->vxlan_bitmap)
+		pf->flags &= ~I40E_FLAG_VXLAN;
+
+	return 0;
+}
+
+/* Add UDP tunneling port */
+static int
+i40e_dev_udp_tunnel_add(struct rte_eth_dev *dev,
+			struct rte_eth_udp_tunnel *udp_tunnel)
+{
+	int ret = 0;
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+	if (udp_tunnel == NULL)
+		return -EINVAL;
+
+	switch (udp_tunnel->prot_type) {
+	case RTE_TUNNEL_TYPE_VXLAN:
+		ret = i40e_add_vxlan_port(pf, udp_tunnel->udp_port);
+		break;
+
+	case RTE_TUNNEL_TYPE_GENEVE:
+	case RTE_TUNNEL_TYPE_TEREDO:
+		PMD_DRV_LOG(ERR, "Tunnel type is not supported now.\n");
+		ret = -1;
+		break;
+
+	default:
+		PMD_DRV_LOG(ERR, "Invalid tunnel type\n");
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+/* Remove UDP tunneling port */
+static int
+i40e_dev_udp_tunnel_del(struct rte_eth_dev *dev,
+			struct rte_eth_udp_tunnel *udp_tunnel)
+{
+	int ret = 0;
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+	if (udp_tunnel == NULL)
+		return -EINVAL;
+
+	switch (udp_tunnel->prot_type) {
+	case RTE_TUNNEL_TYPE_VXLAN:
+		ret = i40e_del_vxlan_port(pf, udp_tunnel->udp_port);
+		break;
+	case RTE_TUNNEL_TYPE_GENEVE:
+	case RTE_TUNNEL_TYPE_TEREDO:
+		PMD_DRV_LOG(ERR, "Tunnel type is not supported now.\n");
+		ret = -1;
+		break;
+	default:
+		PMD_DRV_LOG(ERR, "Invalid tunnel type\n");
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
 /* Configure RSS */
 static int
 i40e_pf_config_rss(struct i40e_pf *pf)
diff --git a/lib/librte_pmd_i40e/i40e_ethdev.h b/lib/librte_pmd_i40e/i40e_ethdev.h
index 1d42cd2..826a1fb 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.h
+++ b/lib/librte_pmd_i40e/i40e_ethdev.h
@@ -60,13 +60,15 @@
 #define I40E_FLAG_HEADER_SPLIT_DISABLED (1ULL << 4)
 #define I40E_FLAG_HEADER_SPLIT_ENABLED  (1ULL << 5)
 #define I40E_FLAG_FDIR                  (1ULL << 6)
+#define I40E_FLAG_VXLAN                 (1ULL << 7)
 #define I40E_FLAG_ALL (I40E_FLAG_RSS | \
 		       I40E_FLAG_DCB | \
 		       I40E_FLAG_VMDQ | \
 		       I40E_FLAG_SRIOV | \
 		       I40E_FLAG_HEADER_SPLIT_DISABLED | \
 		       I40E_FLAG_HEADER_SPLIT_ENABLED | \
-		       I40E_FLAG_FDIR)
+		       I40E_FLAG_FDIR | \
+		       I40E_FLAG_VXLAN)
 
 #define I40E_RSS_OFFLOAD_ALL ( \
 	ETH_RSS_NONF_IPV4_UDP | \
@@ -246,6 +248,10 @@ struct i40e_pf {
 	uint16_t vmdq_nb_qps; /* The number of queue pairs of VMDq */
 	uint16_t vf_nb_qps; /* The number of queue pairs of VF */
 	uint16_t fdir_nb_qps; /* The number of queue pairs of Flow Director */
+
+	/* store VxLAN UDP ports */
+	uint16_t vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
+	uint16_t vxlan_bitmap; /* Vxlan bit mask */
 };
 
 enum pending_msg {
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index 2b53677..2108290 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -208,34 +208,34 @@ i40e_rxd_ptype_to_pkt_flags(uint64_t qword)
 		PKT_RX_IPV4_HDR_EXT, /* PTYPE 56 */
 		PKT_RX_IPV4_HDR_EXT, /* PTYPE 57 */
 		PKT_RX_IPV4_HDR_EXT, /* PTYPE 58 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 59 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 60 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 61 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 59 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 60 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 61 */
 		0, /* PTYPE 62 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 63 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 64 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 65 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 66 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 67 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 68 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 63 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 64 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 65 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 66 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 67 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 68 */
 		0, /* PTYPE 69 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 70 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 71 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 72 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 73 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 74 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 75 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 76 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 70 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 71 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 72 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 73 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 74 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 75 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 76 */
 		0, /* PTYPE 77 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 78 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 79 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 80 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 81 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 82 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 83 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 78 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 79 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 80 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 81 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 82 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 83 */
 		0, /* PTYPE 84 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 85 */
-		PKT_RX_IPV4_HDR_EXT, /* PTYPE 86 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 85 */
+		PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 86 */
 		PKT_RX_IPV4_HDR_EXT, /* PTYPE 87 */
 		PKT_RX_IPV6_HDR, /* PTYPE 88 */
 		PKT_RX_IPV6_HDR, /* PTYPE 89 */
@@ -274,34 +274,34 @@ i40e_rxd_ptype_to_pkt_flags(uint64_t qword)
 		PKT_RX_IPV6_HDR_EXT, /* PTYPE 122 */
 		PKT_RX_IPV6_HDR_EXT, /* PTYPE 123 */
 		PKT_RX_IPV6_HDR_EXT, /* PTYPE 124 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 125 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 126 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 127 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 125 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 126 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 127 */
 		0, /* PTYPE 128 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 129 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 130 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 131 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 132 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 133 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 134 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 129 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 130 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 131 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 132 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 133 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 134 */
 		0, /* PTYPE 135 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 136 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 137 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 138 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 139 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 140 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 141 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 142 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 136 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 137 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 138 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 139 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 140 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 141 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 142 */
 		0, /* PTYPE 143 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 144 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 145 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 146 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 147 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 148 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 149 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 144 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 145 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 146 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 147 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 148 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 149 */
 		0, /* PTYPE 150 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 151 */
-		PKT_RX_IPV6_HDR_EXT, /* PTYPE 152 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 151 */
+		PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 152 */
 		PKT_RX_IPV6_HDR_EXT, /* PTYPE 153 */
 		0, /* PTYPE 154 */
 		0, /* PTYPE 155 */
@@ -638,6 +638,10 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 			pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
 			pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
 			mb->ol_flags = pkt_flags;
+
+			mb->packet_type = (uint16_t)((qword1 &
+					I40E_RXD_QW1_PTYPE_MASK) >>
+					I40E_RXD_QW1_PTYPE_SHIFT);
 			if (pkt_flags & PKT_RX_RSS_HASH)
 				mb->hash.rss = rte_le_to_cpu_32(\
 					rxdp->wb.qword0.hi_dword.rss);
@@ -873,6 +877,8 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
 		pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
 		pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
+		rxm->packet_type = (uint16_t)((qword1 & I40E_RXD_QW1_PTYPE_MASK) >>
+				I40E_RXD_QW1_PTYPE_SHIFT);
 		rxm->ol_flags = pkt_flags;
 		if (pkt_flags & PKT_RX_RSS_HASH)
 			rxm->hash.rss =
@@ -1027,6 +1033,9 @@ i40e_recv_scattered_pkts(void *rx_queue,
 		pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
 		pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
 		pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
+		first_seg->packet_type = (uint16_t)((qword1 &
+					I40E_RXD_QW1_PTYPE_MASK) >>
+					I40E_RXD_QW1_PTYPE_SHIFT);
 		first_seg->ol_flags = pkt_flags;
 		if (pkt_flags & PKT_RX_RSS_HASH)
 			rxm->hash.rss =
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * [dpdk-dev] [PATCH v8 05/10] app/test-pmd:test VxLAN packet identification
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
                   ` (3 preceding siblings ...)
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 04/10] i40e:support VxLAN packet identification in i40e Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 06/10] librte_ether:add data structures of VxLAN filter Jijiang Liu
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
Add two commands to test VxLAN packet identification.
The test steps are as follows:
 1> use commands to add/delete VxLAN UDP port.
 2> use rxonly mode to receive VxLAN packet.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 app/test-pmd/cmdline.c |   65 ++++++++++++++++++++++++++++++++++++++++++++++++
 app/test-pmd/rxonly.c  |   55 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 118 insertions(+), 2 deletions(-)
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 0b972f9..4d7b4d1 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -285,6 +285,12 @@ static void cmd_help_long_parsed(void *parsed_result,
 			"    Set the outer VLAN TPID for Packet Filtering on"
 			" a port\n\n"
 
+			"rx_vxlan_port add (udp_port) (port_id)\n"
+			"    Add an UDP port for VxLAN packet filter on a port\n\n"
+
+			"rx_vxlan_port rm (udp_port) (port_id)\n"
+			"    Remove an UDP port for VxLAN packet filter on a port\n\n"
+
 			"tx_vlan set vlan_id (port_id)\n"
 			"    Set hardware insertion of VLAN ID in packets sent"
 			" on a port.\n\n"
@@ -6225,6 +6231,64 @@ cmdline_parse_inst_t cmd_vf_rate_limit = {
 	},
 };
 
+/* *** CONFIGURE TUNNEL UDP PORT *** */
+struct cmd_tunnel_udp_config {
+	cmdline_fixed_string_t cmd;
+	cmdline_fixed_string_t what;
+	uint16_t udp_port;
+	uint8_t port_id;
+};
+
+static void
+cmd_tunnel_udp_config_parsed(void *parsed_result,
+			  __attribute__((unused)) struct cmdline *cl,
+			  __attribute__((unused)) void *data)
+{
+	struct cmd_tunnel_udp_config *res = parsed_result;
+	struct rte_eth_udp_tunnel tunnel_udp;
+	int ret;
+
+	tunnel_udp.udp_port = res->udp_port;
+
+	if (!strcmp(res->cmd, "rx_vxlan_port"))
+		tunnel_udp.prot_type = RTE_TUNNEL_TYPE_VXLAN;
+
+	if (!strcmp(res->what, "add"))
+		ret = rte_eth_dev_udp_tunnel_add(res->port_id, &tunnel_udp);
+	else
+		ret = rte_eth_dev_udp_tunnel_delete(res->port_id, &tunnel_udp);
+
+	if (ret < 0)
+		printf("udp tunneling add error: (%s)\n", strerror(-ret));
+}
+
+cmdline_parse_token_string_t cmd_tunnel_udp_config_cmd =
+	TOKEN_STRING_INITIALIZER(struct cmd_tunnel_udp_config,
+				cmd, "rx_vxlan_port");
+cmdline_parse_token_string_t cmd_tunnel_udp_config_what =
+	TOKEN_STRING_INITIALIZER(struct cmd_tunnel_udp_config,
+				what, "add#rm");
+cmdline_parse_token_num_t cmd_tunnel_udp_config_udp_port =
+	TOKEN_NUM_INITIALIZER(struct cmd_tunnel_udp_config,
+				udp_port, UINT16);
+cmdline_parse_token_num_t cmd_tunnel_udp_config_port_id =
+	TOKEN_NUM_INITIALIZER(struct cmd_tunnel_udp_config,
+				port_id, UINT8);
+
+cmdline_parse_inst_t cmd_tunnel_udp_config = {
+	.f = cmd_tunnel_udp_config_parsed,
+	.data = (void *)0,
+	.help_str = "add/rm an tunneling UDP port filter: "
+			"rx_vxlan_port add udp_port port_id",
+	.tokens = {
+		(void *)&cmd_tunnel_udp_config_cmd,
+		(void *)&cmd_tunnel_udp_config_what,
+		(void *)&cmd_tunnel_udp_config_udp_port,
+		(void *)&cmd_tunnel_udp_config_port_id,
+		NULL,
+	},
+};
+
 /* *** CONFIGURE VM MIRROR VLAN/POOL RULE *** */
 struct cmd_set_mirror_mask_result {
 	cmdline_fixed_string_t set;
@@ -7518,6 +7582,7 @@ cmdline_parse_ctx_t main_ctx[] = {
 	(cmdline_parse_inst_t *)&cmd_vf_rxvlan_filter,
 	(cmdline_parse_inst_t *)&cmd_queue_rate_limit,
 	(cmdline_parse_inst_t *)&cmd_vf_rate_limit,
+	(cmdline_parse_inst_t *)&cmd_tunnel_udp_config,
 	(cmdline_parse_inst_t *)&cmd_set_mirror_mask,
 	(cmdline_parse_inst_t *)&cmd_set_mirror_link,
 	(cmdline_parse_inst_t *)&cmd_reset_mirror_rule,
diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index 98c788b..d3be62e 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -66,10 +66,12 @@
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_string_fns.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
 
 #include "testpmd.h"
 
-#define MAX_PKT_RX_FLAGS 11
+#define MAX_PKT_RX_FLAGS 13
 static const char *pkt_rx_flag_names[MAX_PKT_RX_FLAGS] = {
 	"VLAN_PKT",
 	"RSS_HASH",
@@ -84,6 +86,9 @@ static const char *pkt_rx_flag_names[MAX_PKT_RX_FLAGS] = {
 
 	"IEEE1588_PTP",
 	"IEEE1588_TMST",
+
+	"TUNNEL_IPV4_HDR",
+	"TUNNEL_IPV6_HDR",
 };
 
 static inline void
@@ -111,7 +116,9 @@ pkt_burst_receive(struct fwd_stream *fs)
 	uint16_t eth_type;
 	uint64_t ol_flags;
 	uint16_t nb_rx;
-	uint16_t i;
+	uint16_t i, packet_type;
+	uint64_t is_encapsulation;
+
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
@@ -152,6 +159,11 @@ pkt_burst_receive(struct fwd_stream *fs)
 		eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
 		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
 		ol_flags = mb->ol_flags;
+		packet_type = mb->packet_type;
+
+		is_encapsulation = ol_flags & (PKT_RX_TUNNEL_IPV4_HDR |
+				PKT_RX_TUNNEL_IPV6_HDR);
+
 		print_ether_addr("  src=", ð_hdr->s_addr);
 		print_ether_addr(" - dst=", ð_hdr->d_addr);
 		printf(" - type=0x%04x - length=%u - nb_segs=%d",
@@ -166,6 +178,45 @@ pkt_burst_receive(struct fwd_stream *fs)
 			       mb->hash.fdir.hash, mb->hash.fdir.id);
 		if (ol_flags & PKT_RX_VLAN_PKT)
 			printf(" - VLAN tci=0x%x", mb->vlan_tci);
+		if (is_encapsulation) {
+			struct ipv4_hdr *ipv4_hdr;
+			struct ipv6_hdr *ipv6_hdr;
+			struct udp_hdr *udp_hdr;
+			uint8_t l2_len;
+			uint8_t l3_len;
+			uint8_t l4_len;
+			uint8_t l4_proto;
+			struct  vxlan_hdr *vxlan_hdr;
+
+			l2_len  = sizeof(struct ether_hdr);
+
+			 /* Do not support ipv4 option field */
+			if (ol_flags & PKT_RX_TUNNEL_IPV4_HDR) {
+				l3_len = sizeof(struct ipv4_hdr);
+				ipv4_hdr = (struct ipv4_hdr *) (rte_pktmbuf_mtod(mb,
+						unsigned char *) + l2_len);
+				l4_proto = ipv4_hdr->next_proto_id;
+			} else {
+				l3_len = sizeof(struct ipv6_hdr);
+				ipv6_hdr = (struct ipv6_hdr *) (rte_pktmbuf_mtod(mb,
+						unsigned char *) + l2_len);
+				l4_proto = ipv6_hdr->proto;
+			}
+			if (l4_proto == IPPROTO_UDP) {
+				udp_hdr = (struct udp_hdr *) (rte_pktmbuf_mtod(mb,
+						unsigned char *) + l2_len + l3_len);
+				l4_len = sizeof(struct udp_hdr);
+				vxlan_hdr = (struct vxlan_hdr *) (rte_pktmbuf_mtod(mb,
+						unsigned char *) + l2_len + l3_len
+						 + l4_len);
+
+				printf(" - VxLAN packet: packet type =%d, "
+					"Destination UDP port =%d, VNI = %d",
+					packet_type, RTE_BE_TO_CPU_16(udp_hdr->dst_port),
+					rte_be_to_cpu_32(vxlan_hdr->vx_vni) >> 8);
+			}
+		}
+		printf(" - Receive queue=0x%x", (unsigned) fs->rx_queue);
 		printf("\n");
 		if (ol_flags != 0) {
 			int rxf;
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * [dpdk-dev] [PATCH v8 06/10] librte_ether:add data structures of VxLAN filter
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
                   ` (4 preceding siblings ...)
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 05/10] app/test-pmd:test VxLAN packet identification Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 07/10] i40e:implement the API of VxLAN filter in librte_pmd_i40e Jijiang Liu
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
Add definations of the data structures of tunneling packet filter in the rte_eth_ctrl.h file.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_ether/rte_eth_ctrl.h |   49 +++++++++++++++++++++++++++++++++++++++
 1 files changed, 49 insertions(+), 0 deletions(-)
diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h
index 9a90d19..b4ab731 100644
--- a/lib/librte_ether/rte_eth_ctrl.h
+++ b/lib/librte_ether/rte_eth_ctrl.h
@@ -51,6 +51,7 @@ extern "C" {
  */
 enum rte_filter_type {
 	RTE_ETH_FILTER_NONE = 0,
+	RTE_ETH_FILTER_TUNNEL,
 	RTE_ETH_FILTER_MAX
 };
 
@@ -83,6 +84,54 @@ enum rte_eth_tunnel_type {
 	RTE_TUNNEL_TYPE_MAX,
 };
 
+/**
+ * filter type of tunneling packet
+ */
+#define ETH_TUNNEL_FILTER_OMAC  0x01 /**< filter by outer MAC addr */
+#define ETH_TUNNEL_FILTER_OIP   0x02 /**< filter by outer IP Addr */
+#define ETH_TUNNEL_FILTER_TENID 0x04 /**< filter by tenant ID */
+#define ETH_TUNNEL_FILTER_IMAC  0x08 /**< filter by inner MAC addr */
+#define ETH_TUNNEL_FILTER_IVLAN 0x10 /**< filter by inner VLAN ID */
+#define ETH_TUNNEL_FILTER_IIP   0x20 /**< filter by inner IP addr */
+
+#define RTE_TUNNEL_FILTER_IMAC_IVLAN (ETH_TUNNEL_FILTER_IMAC | \
+					ETH_TUNNEL_FILTER_IVLAN)
+#define RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID (ETH_TUNNEL_FILTER_IMAC | \
+					ETH_TUNNEL_FILTER_IVLAN | \
+					ETH_TUNNEL_FILTER_TENID)
+#define RTE_TUNNEL_FILTER_IMAC_TENID (ETH_TUNNEL_FILTER_IMAC | \
+					ETH_TUNNEL_FILTER_TENID)
+#define RTE_TUNNEL_FILTER_OMAC_TENID_IMAC (ETH_TUNNEL_FILTER_OMAC | \
+					ETH_TUNNEL_FILTER_TENID | \
+					ETH_TUNNEL_FILTER_IMAC)
+
+/**
+ *  Select IPv4 or IPv6 for tunnel filters.
+ */
+enum rte_tunnel_iptype {
+	RTE_TUNNEL_IPTYPE_IPV4 = 0, /**< IPv4. */
+	RTE_TUNNEL_IPTYPE_IPV6,     /**< IPv6. */
+};
+
+/**
+ * Tunneling Packet filter configuration.
+ */
+struct rte_eth_tunnel_filter_conf {
+	struct ether_addr *outer_mac;  /**< Outer MAC address filter. */
+	struct ether_addr *inner_mac;  /**< Inner MAC address filter. */
+	uint16_t inner_vlan;           /**< Inner VLAN filter. */
+	enum rte_tunnel_iptype ip_type; /**< IP address type. */
+	union {
+		uint32_t ipv4_addr;    /**< IPv4 source address to match. */
+		uint32_t ipv6_addr[4]; /**< IPv6 source address to match. */
+	} ip_addr; /**< IPv4/IPv6 source address to match (union of above). */
+
+	uint16_t filter_type;   /**< Filter type. */
+	enum rte_eth_tunnel_type tunnel_type; /**< Tunnel Type. */
+	uint32_t tenant_id;     /** < Tenant number. */
+	uint16_t queue_id;      /** < queue number. */
+};
+
 #ifdef __cplusplus
 }
 #endif
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * [dpdk-dev] [PATCH v8 07/10] i40e:implement the API of VxLAN filter in librte_pmd_i40e
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
                   ` (5 preceding siblings ...)
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 06/10] librte_ether:add data structures of VxLAN filter Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 08/10] app/testpmd:test VxLAN packet filter Jijiang Liu
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
The filter types supported are listed below for VxLAN:
   1. Inner MAC and Inner VLAN ID.
   2. Inner MAC address, inner VLAN ID and tenant ID.
   3. Inner MAC and tenant ID.
   4. Inner MAC address.
   5. Outer MAC address, tenant ID and inner MAC address.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_pmd_i40e/i40e_ethdev.c |  174 ++++++++++++++++++++++++++++++++++++-
 1 files changed, 172 insertions(+), 2 deletions(-)
diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c b/lib/librte_pmd_i40e/i40e_ethdev.c
index eb643e5..be83268 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -48,6 +48,7 @@
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_dev.h>
+#include <rte_eth_ctrl.h>
 
 #include "i40e_logs.h"
 #include "i40e/i40e_register_x710_int.h"
@@ -4099,6 +4100,108 @@ i40e_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
 }
 
 static int
+i40e_dev_get_filter_type(uint16_t filter_type, uint16_t *flag)
+{
+	switch (filter_type) {
+	case RTE_TUNNEL_FILTER_IMAC_IVLAN:
+		*flag = I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN;
+		break;
+	case RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID:
+		*flag = I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID;
+		break;
+	case RTE_TUNNEL_FILTER_IMAC_TENID:
+		*flag = I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID;
+		break;
+	case RTE_TUNNEL_FILTER_OMAC_TENID_IMAC:
+		*flag = I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC;
+		break;
+	case ETH_TUNNEL_FILTER_IMAC:
+		*flag = I40E_AQC_ADD_CLOUD_FILTER_IMAC;
+		break;
+	default:
+		PMD_DRV_LOG(ERR, "invalid tunnel filter type\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
+			struct rte_eth_tunnel_filter_conf *tunnel_filter,
+			uint8_t add)
+{
+	uint16_t ip_type;
+	uint8_t tun_type = 0;
+	int val, ret = 0;
+	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+	struct i40e_vsi *vsi = pf->main_vsi;
+	struct i40e_aqc_add_remove_cloud_filters_element_data  *cld_filter;
+	struct i40e_aqc_add_remove_cloud_filters_element_data  *pfilter;
+
+	cld_filter = rte_zmalloc("tunnel_filter",
+		sizeof(struct i40e_aqc_add_remove_cloud_filters_element_data),
+		0);
+
+	if (NULL == cld_filter) {
+		PMD_DRV_LOG(ERR, "Failed to alloc memory.\n");
+		return -EINVAL;
+	}
+	pfilter = cld_filter;
+
+	(void)rte_memcpy(&pfilter->outer_mac, tunnel_filter->outer_mac,
+			sizeof(struct ether_addr));
+	(void)rte_memcpy(&pfilter->inner_mac, tunnel_filter->inner_mac,
+			sizeof(struct ether_addr));
+
+	pfilter->inner_vlan = tunnel_filter->inner_vlan;
+	if (tunnel_filter->ip_type == RTE_TUNNEL_IPTYPE_IPV4) {
+		ip_type = I40E_AQC_ADD_CLOUD_FLAGS_IPV4;
+		(void)rte_memcpy(&pfilter->ipaddr.v4.data,
+				&tunnel_filter->ip_addr,
+				sizeof(pfilter->ipaddr.v4.data));
+	} else {
+		ip_type = I40E_AQC_ADD_CLOUD_FLAGS_IPV6;
+		(void)rte_memcpy(&pfilter->ipaddr.v6.data,
+				&tunnel_filter->ip_addr,
+				sizeof(pfilter->ipaddr.v6.data));
+	}
+
+	/* check tunneled type */
+	switch (tunnel_filter->tunnel_type) {
+	case RTE_TUNNEL_TYPE_VXLAN:
+		tun_type = I40E_AQC_ADD_CLOUD_TNL_TYPE_XVLAN;
+		break;
+	default:
+		/* Other tunnel types is not supported. */
+		PMD_DRV_LOG(ERR, "tunnel type is not supported.\n");
+		rte_free(cld_filter);
+		return -EINVAL;
+	}
+
+	val = i40e_dev_get_filter_type(tunnel_filter->filter_type,
+						&pfilter->flags);
+	if (val < 0) {
+		rte_free(cld_filter);
+		return -EINVAL;
+	}
+
+	pfilter->flags |= I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE | ip_type |
+		(tun_type << I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
+	pfilter->tenant_id = tunnel_filter->tenant_id;
+	pfilter->queue_number = tunnel_filter->queue_id;
+
+	if (add)
+		ret = i40e_aq_add_cloud_filters(hw, vsi->seid, cld_filter, 1);
+	else
+		ret = i40e_aq_remove_cloud_filters(hw, vsi->seid,
+						cld_filter, 1);
+
+	rte_free(cld_filter);
+	return ret;
+}
+
+static int
 i40e_get_vxlan_port_idx(struct i40e_pf *pf, uint16_t port)
 {
 	uint8_t i;
@@ -4286,6 +4389,72 @@ i40e_pf_config_rss(struct i40e_pf *pf)
 }
 
 static int
+i40e_tunnel_filter_param_check(struct i40e_pf *pf,
+			struct rte_eth_tunnel_filter_conf *filter)
+{
+	if (pf == NULL || filter == NULL) {
+		PMD_DRV_LOG(ERR, "Invalid parameter\n");
+		return -EINVAL;
+	}
+
+	if (filter->queue_id >= pf->dev_data->nb_rx_queues) {
+		PMD_DRV_LOG(ERR, "Invalid queue ID\n");
+		return -EINVAL;
+	}
+
+	if (filter->inner_vlan > ETHER_MAX_VLAN_ID) {
+		PMD_DRV_LOG(ERR, "Invalid inner VLAN ID\n");
+		return -EINVAL;
+	}
+
+	if ((filter->filter_type & ETH_TUNNEL_FILTER_OMAC) &&
+		(is_zero_ether_addr(filter->outer_mac))) {
+		PMD_DRV_LOG(ERR, "Cannot add NULL outer MAC address\n");
+		return -EINVAL;
+	}
+
+	if ((filter->filter_type & ETH_TUNNEL_FILTER_IMAC) &&
+		(is_zero_ether_addr(filter->inner_mac))) {
+		PMD_DRV_LOG(ERR, "Cannot add NULL inner MAC address\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+i40e_tunnel_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
+			void *arg)
+{
+	struct rte_eth_tunnel_filter_conf *filter;
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	int ret = I40E_SUCCESS;
+
+	filter = (struct rte_eth_tunnel_filter_conf *)(arg);
+
+	if (i40e_tunnel_filter_param_check(pf, filter) < 0)
+		return I40E_ERR_PARAM;
+
+	switch (filter_op) {
+	case RTE_ETH_FILTER_NOP:
+		if (!(pf->flags & I40E_FLAG_VXLAN))
+			ret = I40E_NOT_SUPPORTED;
+	case RTE_ETH_FILTER_ADD:
+		ret = i40e_dev_tunnel_filter_set(pf, filter, 1);
+		break;
+	case RTE_ETH_FILTER_DELETE:
+		ret = i40e_dev_tunnel_filter_set(pf, filter, 0);
+		break;
+	default:
+		PMD_DRV_LOG(ERR, "unknown operation %u\n", filter_op);
+		ret = I40E_ERR_PARAM;
+		break;
+	}
+
+	return ret;
+}
+
+static int
 i40e_pf_config_mq_rx(struct i40e_pf *pf)
 {
 	if (!pf->dev_data->sriov.active) {
@@ -4309,13 +4478,14 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
 		     void *arg)
 {
 	int ret = 0;
-	(void)filter_op;
-	(void)arg;
 
 	if (dev == NULL)
 		return -EINVAL;
 
 	switch (filter_type) {
+	case RTE_ETH_FILTER_TUNNEL:
+		ret = i40e_tunnel_filter_handle(dev, filter_op, arg);
+		break;
 	default:
 		PMD_DRV_LOG(WARNING, "Filter type (%d) not supported",
 							filter_type);
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * [dpdk-dev] [PATCH v8 08/10] app/testpmd:test VxLAN packet filter
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
                   ` (6 preceding siblings ...)
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 07/10] i40e:implement the API of VxLAN filter in librte_pmd_i40e Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 09/10] i40e:support VxLAN Tx checksum offload Jijiang Liu
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
Add the "tunnel_filter" command in testpmd to test the API of VxLAN packet filter.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 app/test-pmd/cmdline.c |  150 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 150 insertions(+), 0 deletions(-)
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 4d7b4d1..da5d272 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -285,6 +285,14 @@ static void cmd_help_long_parsed(void *parsed_result,
 			"    Set the outer VLAN TPID for Packet Filtering on"
 			" a port\n\n"
 
+			"tunnel_filter add (port_id) (outer_mac) (inner_mac) (ip_addr) "
+			"(inner_vlan) (tunnel_type) (filter_type) (tenant_id) (queue_id)\n"
+			"   add a tunnel filter of a port.\n\n"
+
+			"tunnel_filter rm (port_id) (outer_mac) (inner_mac) (ip_addr) "
+			"(inner_vlan) (tunnel_type) (filter_type) (tenant_id) (queue_id)\n"
+			"   remove a tunnel filter of a port.\n\n"
+
 			"rx_vxlan_port add (udp_port) (port_id)\n"
 			"    Add an UDP port for VxLAN packet filter on a port\n\n"
 
@@ -6231,6 +6239,147 @@ cmdline_parse_inst_t cmd_vf_rate_limit = {
 	},
 };
 
+/* *** ADD TUNNEL FILTER OF A PORT *** */
+struct cmd_tunnel_filter_result {
+	cmdline_fixed_string_t cmd;
+	cmdline_fixed_string_t what;
+	uint8_t port_id;
+	struct ether_addr outer_mac;
+	struct ether_addr inner_mac;
+	cmdline_ipaddr_t ip_value;
+	uint16_t inner_vlan;
+	cmdline_fixed_string_t tunnel_type;
+	cmdline_fixed_string_t filter_type;
+	uint32_t tenant_id;
+	uint16_t queue_num;
+};
+
+static void
+cmd_tunnel_filter_parsed(void *parsed_result,
+			  __attribute__((unused)) struct cmdline *cl,
+			  __attribute__((unused)) void *data)
+{
+	struct cmd_tunnel_filter_result *res = parsed_result;
+	struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
+	int ret = 0;
+
+	tunnel_filter_conf.outer_mac = &res->outer_mac;
+	tunnel_filter_conf.inner_mac = &res->inner_mac;
+	tunnel_filter_conf.inner_vlan = res->inner_vlan;
+
+	if (res->ip_value.family == AF_INET) {
+		tunnel_filter_conf.ip_addr.ipv4_addr =
+			res->ip_value.addr.ipv4.s_addr;
+		tunnel_filter_conf.ip_type = RTE_TUNNEL_IPTYPE_IPV4;
+	} else {
+		memcpy(&(tunnel_filter_conf.ip_addr.ipv6_addr),
+			&(res->ip_value.addr.ipv6),
+			sizeof(struct in6_addr));
+		tunnel_filter_conf.ip_type = RTE_TUNNEL_IPTYPE_IPV6;
+	}
+
+	if (!strcmp(res->filter_type, "imac-ivlan"))
+		tunnel_filter_conf.filter_type = RTE_TUNNEL_FILTER_IMAC_IVLAN;
+	else if (!strcmp(res->filter_type, "imac-ivlan-tenid"))
+		tunnel_filter_conf.filter_type =
+			RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID;
+	else if (!strcmp(res->filter_type, "imac-tenid"))
+		tunnel_filter_conf.filter_type = RTE_TUNNEL_FILTER_IMAC_TENID;
+	else if (!strcmp(res->filter_type, "imac"))
+		tunnel_filter_conf.filter_type = ETH_TUNNEL_FILTER_IMAC;
+	else if (!strcmp(res->filter_type, "omac-imac-tenid"))
+		tunnel_filter_conf.filter_type =
+			RTE_TUNNEL_FILTER_OMAC_TENID_IMAC;
+	else {
+		printf("The filter type is not supported");
+		return;
+	}
+
+	if (!strcmp(res->tunnel_type, "vxlan"))
+		tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN;
+	else {
+		printf("Only VxLAN is supported now.\n");
+		return;
+	}
+
+	tunnel_filter_conf.tenant_id = res->tenant_id;
+	tunnel_filter_conf.queue_id = res->queue_num;
+	if (!strcmp(res->what, "add"))
+		ret = rte_eth_dev_filter_ctrl(res->port_id,
+					RTE_ETH_FILTER_TUNNEL,
+					RTE_ETH_FILTER_ADD,
+					&tunnel_filter_conf);
+	else
+		ret = rte_eth_dev_filter_ctrl(res->port_id,
+					RTE_ETH_FILTER_TUNNEL,
+					RTE_ETH_FILTER_DELETE,
+					&tunnel_filter_conf);
+	if (ret < 0)
+		printf("cmd_tunnel_filter_parsed error: (%s)\n",
+				strerror(-ret));
+
+}
+cmdline_parse_token_string_t cmd_tunnel_filter_cmd =
+	TOKEN_STRING_INITIALIZER(struct cmd_tunnel_filter_result,
+	cmd, "tunnel_filter");
+cmdline_parse_token_string_t cmd_tunnel_filter_what =
+	TOKEN_STRING_INITIALIZER(struct cmd_tunnel_filter_result,
+	what, "add#rm");
+cmdline_parse_token_num_t cmd_tunnel_filter_port_id =
+	TOKEN_NUM_INITIALIZER(struct cmd_tunnel_filter_result,
+	port_id, UINT8);
+cmdline_parse_token_etheraddr_t cmd_tunnel_filter_outer_mac =
+	TOKEN_ETHERADDR_INITIALIZER(struct cmd_tunnel_filter_result,
+	outer_mac);
+cmdline_parse_token_etheraddr_t cmd_tunnel_filter_inner_mac =
+	TOKEN_ETHERADDR_INITIALIZER(struct cmd_tunnel_filter_result,
+	inner_mac);
+cmdline_parse_token_num_t cmd_tunnel_filter_innner_vlan =
+	TOKEN_NUM_INITIALIZER(struct cmd_tunnel_filter_result,
+	inner_vlan, UINT16);
+cmdline_parse_token_ipaddr_t cmd_tunnel_filter_ip_value =
+	TOKEN_IPADDR_INITIALIZER(struct cmd_tunnel_filter_result,
+	ip_value);
+cmdline_parse_token_string_t cmd_tunnel_filter_tunnel_type =
+	TOKEN_STRING_INITIALIZER(struct cmd_tunnel_filter_result,
+	tunnel_type, "vxlan");
+
+cmdline_parse_token_string_t cmd_tunnel_filter_filter_type =
+	TOKEN_STRING_INITIALIZER(struct cmd_tunnel_filter_result,
+	filter_type, "imac-ivlan#imac-ivlan-tenid#imac-tenid#"
+		"imac#omac-imac-tenid");
+cmdline_parse_token_num_t cmd_tunnel_filter_tenant_id =
+	TOKEN_NUM_INITIALIZER(struct cmd_tunnel_filter_result,
+	tenant_id, UINT32);
+cmdline_parse_token_num_t cmd_tunnel_filter_queue_num =
+	TOKEN_NUM_INITIALIZER(struct cmd_tunnel_filter_result,
+	queue_num, UINT16);
+
+cmdline_parse_inst_t cmd_tunnel_filter = {
+	.f = cmd_tunnel_filter_parsed,
+	.data = (void *)0,
+	.help_str = "add/rm tunnel filter of a port: "
+			"tunnel_filter add port_id outer_mac inner_mac ip "
+			"inner_vlan tunnel_type(vxlan) filter_type "
+			"(imac-ivlan|imac-ivlan-tenid|imac-tenid|"
+			"imac|omac-imac-tenid) "
+			"tenant_id queue_num",
+	.tokens = {
+		(void *)&cmd_tunnel_filter_cmd,
+		(void *)&cmd_tunnel_filter_what,
+		(void *)&cmd_tunnel_filter_port_id,
+		(void *)&cmd_tunnel_filter_outer_mac,
+		(void *)&cmd_tunnel_filter_inner_mac,
+		(void *)&cmd_tunnel_filter_ip_value,
+		(void *)&cmd_tunnel_filter_innner_vlan,
+		(void *)&cmd_tunnel_filter_tunnel_type,
+		(void *)&cmd_tunnel_filter_filter_type,
+		(void *)&cmd_tunnel_filter_tenant_id,
+		(void *)&cmd_tunnel_filter_queue_num,
+		NULL,
+	},
+};
+
 /* *** CONFIGURE TUNNEL UDP PORT *** */
 struct cmd_tunnel_udp_config {
 	cmdline_fixed_string_t cmd;
@@ -7582,6 +7731,7 @@ cmdline_parse_ctx_t main_ctx[] = {
 	(cmdline_parse_inst_t *)&cmd_vf_rxvlan_filter,
 	(cmdline_parse_inst_t *)&cmd_queue_rate_limit,
 	(cmdline_parse_inst_t *)&cmd_vf_rate_limit,
+	(cmdline_parse_inst_t *)&cmd_tunnel_filter,
 	(cmdline_parse_inst_t *)&cmd_tunnel_udp_config,
 	(cmdline_parse_inst_t *)&cmd_set_mirror_mask,
 	(cmdline_parse_inst_t *)&cmd_set_mirror_link,
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * [dpdk-dev] [PATCH v8 09/10] i40e:support VxLAN Tx checksum offload
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
                   ` (7 preceding siblings ...)
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 08/10] app/testpmd:test VxLAN packet filter Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 10/10] app/testpmd:test " Jijiang Liu
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
Support VxLAN Tx checksum offload, which include
  - outer L3(IP) checksum offload
  - inner L3(IP) checksum offload
  - inner L4(UDP, TCP and SCTP) checksum offload
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_mbuf/rte_mbuf.h      |    1 +
 lib/librte_pmd_i40e/i40e_rxtx.c |   46 +++++++++++++++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 9af3bd9..a86dedf 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -96,6 +96,7 @@ extern "C" {
 
 #define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
 #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
+#define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VxLAN computed by NIC */
 #define PKT_TX_IPV4_CSUM     PKT_TX_IP_CKSUM /**< Alias of PKT_TX_IP_CKSUM. */
 #define PKT_TX_IPV4          PKT_RX_IPV4_HDR /**< IPv4 with no IP checksum offload. */
 #define PKT_TX_IPV6          PKT_RX_IPV6_HDR /**< IPv6 packet */
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index 2108290..7599df9 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -411,11 +411,14 @@ i40e_rxd_ptype_to_pkt_flags(uint64_t qword)
 }
 
 static inline void
-i40e_txd_enable_checksum(uint32_t ol_flags,
+i40e_txd_enable_checksum(uint64_t ol_flags,
 			uint32_t *td_cmd,
 			uint32_t *td_offset,
 			uint8_t l2_len,
-			uint8_t l3_len)
+			uint16_t l3_len,
+			uint8_t inner_l2_len,
+			uint16_t inner_l3_len,
+			uint32_t *cd_tunneling)
 {
 	if (!l2_len) {
 		PMD_DRV_LOG(DEBUG, "L2 length set to 0");
@@ -428,6 +431,27 @@ i40e_txd_enable_checksum(uint32_t ol_flags,
 		return;
 	}
 
+	/* VxLAN packet TX checksum offload */
+	if (unlikely(ol_flags & PKT_TX_VXLAN_CKSUM)) {
+		uint8_t l4tun_len;
+
+		l4tun_len = ETHER_VXLAN_HLEN + inner_l2_len;
+
+		if (ol_flags & PKT_TX_IPV4_CSUM)
+			*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
+		else if (ol_flags & PKT_TX_IPV6)
+			*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
+
+		/* Now set the ctx descriptor fields */
+		*cd_tunneling |= (l3_len >> 2) <<
+				I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
+				I40E_TXD_CTX_UDP_TUNNELING |
+				(l4tun_len >> 1) <<
+				I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+
+		l3_len = inner_l3_len;
+	}
+
 	/* Enable L3 checksum offloads */
 	if (ol_flags & PKT_TX_IPV4_CSUM) {
 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
@@ -1077,7 +1101,10 @@ i40e_recv_scattered_pkts(void *rx_queue,
 static inline uint16_t
 i40e_calc_context_desc(uint64_t flags)
 {
-	uint16_t mask = 0;
+	uint64_t mask = 0ULL;
+
+	if (flags | PKT_TX_VXLAN_CKSUM)
+		mask |= PKT_TX_VXLAN_CKSUM;
 
 #ifdef RTE_LIBRTE_IEEE1588
 	mask |= PKT_TX_IEEE1588_TMST;
@@ -1098,6 +1125,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	volatile struct i40e_tx_desc *txr;
 	struct rte_mbuf *tx_pkt;
 	struct rte_mbuf *m_seg;
+	uint32_t cd_tunneling_params;
 	uint16_t tx_id;
 	uint16_t nb_tx;
 	uint32_t td_cmd;
@@ -1106,7 +1134,9 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint32_t td_tag;
 	uint64_t ol_flags;
 	uint8_t l2_len;
-	uint8_t l3_len;
+	uint16_t l3_len;
+	uint8_t inner_l2_len;
+	uint16_t inner_l3_len;
 	uint16_t nb_used;
 	uint16_t nb_ctx;
 	uint16_t tx_last;
@@ -1134,7 +1164,9 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
 		ol_flags = tx_pkt->ol_flags;
 		l2_len = tx_pkt->l2_len;
+		inner_l2_len = tx_pkt->inner_l2_len;
 		l3_len = tx_pkt->l3_len;
+		inner_l3_len = tx_pkt->inner_l3_len;
 
 		/* Calculate the number of context descriptors needed. */
 		nb_ctx = i40e_calc_context_desc(ol_flags);
@@ -1182,15 +1214,17 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		td_cmd |= I40E_TX_DESC_CMD_ICRC;
 
 		/* Enable checksum offloading */
+		cd_tunneling_params = 0;
 		i40e_txd_enable_checksum(ol_flags, &td_cmd, &td_offset,
-							l2_len, l3_len);
+						l2_len, l3_len, inner_l2_len,
+						inner_l3_len,
+						&cd_tunneling_params);
 
 		if (unlikely(nb_ctx)) {
 			/* Setup TX context descriptor if required */
 			volatile struct i40e_tx_context_desc *ctx_txd =
 				(volatile struct i40e_tx_context_desc *)\
 							&txr[tx_id];
-			uint32_t cd_tunneling_params = 0;
 			uint16_t cd_l2tag2 = 0;
 			uint64_t cd_type_cmd_tso_mss =
 				I40E_TX_DESC_DTYPE_CONTEXT;
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
                   ` (8 preceding siblings ...)
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 09/10] i40e:support VxLAN Tx checksum offload Jijiang Liu
@ 2014-10-27  2:13 ` Jijiang Liu
  2014-11-04  8:19   ` Olivier MATZ
  2014-10-27  2:20 ` [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Liu, Yong
  2014-10-27  2:41 ` Zhang, Helin
  11 siblings, 1 reply; 47+ messages in thread
From: Jijiang Liu @ 2014-10-27  2:13 UTC (permalink / raw)
  To: dev
Add test cases in testpmd to test VxLAN Tx Checksum offload, which include
 - IPv4 and IPv6 packet
 - outer L3, inner L3 and L4 checksum offload for Tx side.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 app/test-pmd/cmdline.c  |   13 ++-
 app/test-pmd/config.c   |    6 +-
 app/test-pmd/csumonly.c |  194 +++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 192 insertions(+), 21 deletions(-)
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index da5d272..757c399 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -310,13 +310,17 @@ static void cmd_help_long_parsed(void *parsed_result,
 			"    Disable hardware insertion of a VLAN header in"
 			" packets sent on a port.\n\n"
 
-			"tx_checksum set mask (port_id)\n"
+			"tx_checksum set (mask) (port_id)\n"
 			"    Enable hardware insertion of checksum offload with"
-			" the 4-bit mask, 0~0xf, in packets sent on a port.\n"
+			" the 8-bit mask, 0~0xff, in packets sent on a port.\n"
 			"        bit 0 - insert ip   checksum offload if set\n"
 			"        bit 1 - insert udp  checksum offload if set\n"
 			"        bit 2 - insert tcp  checksum offload if set\n"
 			"        bit 3 - insert sctp checksum offload if set\n"
+			"        bit 4 - insert inner ip  checksum offload if set\n"
+			"        bit 5 - insert inner udp checksum offload if set\n"
+			"        bit 6 - insert inner tcp checksum offload if set\n"
+			"        bit 7 - insert inner sctp checksum offload if set\n"
 			"    Please check the NIC datasheet for HW limits.\n\n"
 
 			"set fwd (%s)\n"
@@ -2763,8 +2767,9 @@ cmdline_parse_inst_t cmd_tx_cksum_set = {
 	.f = cmd_tx_cksum_set_parsed,
 	.data = NULL,
 	.help_str = "enable hardware insertion of L3/L4checksum with a given "
-	"mask in packets sent on a port, the bit mapping is given as, Bit 0 for ip"
-	"Bit 1 for UDP, Bit 2 for TCP, Bit 3 for SCTP",
+	"mask in packets sent on a port, the bit mapping is given as, Bit 0 for ip, "
+	"Bit 1 for UDP, Bit 2 for TCP, Bit 3 for SCTP, Bit 4 for inner ip, "
+	"Bit 5 for inner UDP, Bit 6 for inner TCP, Bit 7 for inner SCTP",
 	.tokens = {
 		(void *)&cmd_tx_cksum_set_tx_cksum,
 		(void *)&cmd_tx_cksum_set_set,
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 2a1b93f..9bc08f4 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1753,9 +1753,9 @@ tx_cksum_set(portid_t port_id, uint64_t ol_flags)
 	uint64_t tx_ol_flags;
 	if (port_id_is_invalid(port_id))
 		return;
-	/* Clear last 4 bits and then set L3/4 checksum mask again */
-	tx_ol_flags = ports[port_id].tx_ol_flags & (~0x0Full);
-	ports[port_id].tx_ol_flags = ((ol_flags & 0xf) | tx_ol_flags);
+	/* Clear last 8 bits and then set L3/4 checksum mask again */
+	tx_ol_flags = ports[port_id].tx_ol_flags & (~0x0FFull);
+	ports[port_id].tx_ol_flags = ((ol_flags & 0xff) | tx_ol_flags);
 }
 
 void
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index fcc4876..3967476 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -209,10 +209,16 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 	struct rte_mbuf  *mb;
 	struct ether_hdr *eth_hdr;
 	struct ipv4_hdr  *ipv4_hdr;
+	struct ether_hdr *inner_eth_hdr;
+	struct ipv4_hdr  *inner_ipv4_hdr = NULL;
 	struct ipv6_hdr  *ipv6_hdr;
+	struct ipv6_hdr  *inner_ipv6_hdr = NULL;
 	struct udp_hdr   *udp_hdr;
+	struct udp_hdr   *inner_udp_hdr;
 	struct tcp_hdr   *tcp_hdr;
+	struct tcp_hdr   *inner_tcp_hdr;
 	struct sctp_hdr  *sctp_hdr;
+	struct sctp_hdr  *inner_sctp_hdr;
 
 	uint16_t nb_rx;
 	uint16_t nb_tx;
@@ -221,12 +227,17 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 	uint64_t pkt_ol_flags;
 	uint64_t tx_ol_flags;
 	uint16_t l4_proto;
+	uint16_t inner_l4_proto = 0;
 	uint16_t eth_type;
 	uint8_t  l2_len;
 	uint8_t  l3_len;
+	uint8_t  inner_l2_len = 0;
+	uint8_t  inner_l3_len = 0;
 
 	uint32_t rx_bad_ip_csum;
 	uint32_t rx_bad_l4_csum;
+	uint8_t  ipv4_tunnel;
+	uint8_t  ipv6_tunnel;
 
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
@@ -262,7 +273,10 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 		l2_len  = sizeof(struct ether_hdr);
 		pkt_ol_flags = mb->ol_flags;
 		ol_flags = (pkt_ol_flags & (~PKT_TX_L4_MASK));
-
+		ipv4_tunnel = (pkt_ol_flags & PKT_RX_TUNNEL_IPV4_HDR) ?
+				1 : 0;
+		ipv6_tunnel = (pkt_ol_flags & PKT_RX_TUNNEL_IPV6_HDR) ?
+				1 : 0;
 		eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
 		eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
 		if (eth_type == ETHER_TYPE_VLAN) {
@@ -295,7 +309,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 		 *      + ipv4 or ipv6
 		 *      + udp or tcp or sctp or others
 		 */
-		if (pkt_ol_flags & PKT_RX_IPV4_HDR) {
+		if (pkt_ol_flags & (PKT_RX_IPV4_HDR | PKT_RX_TUNNEL_IPV4_HDR)) {
 
 			/* Do not support ipv4 option field */
 			l3_len = sizeof(struct ipv4_hdr) ;
@@ -325,17 +339,92 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 				if (tx_ol_flags & 0x2) {
 					/* HW Offload */
 					ol_flags |= PKT_TX_UDP_CKSUM;
-					/* Pseudo header sum need be set properly */
-					udp_hdr->dgram_cksum = get_ipv4_psd_sum(ipv4_hdr);
+					if (ipv4_tunnel)
+						udp_hdr->dgram_cksum = 0;
+					else
+						/* Pseudo header sum need be set properly */
+						udp_hdr->dgram_cksum =
+							get_ipv4_psd_sum(ipv4_hdr);
 				}
 				else {
 					/* SW Implementation, clear checksum field first */
 					udp_hdr->dgram_cksum = 0;
 					udp_hdr->dgram_cksum = get_ipv4_udptcp_checksum(ipv4_hdr,
-							(uint16_t*)udp_hdr);
+									(uint16_t *)udp_hdr);
 				}
-			}
-			else if (l4_proto == IPPROTO_TCP){
+
+				if (ipv4_tunnel) {
+
+					uint16_t len;
+
+					/* Check if inner L3/L4 checkum flag is set */
+					if (tx_ol_flags & 0xF0)
+						ol_flags |= PKT_TX_VXLAN_CKSUM;
+
+					inner_l2_len  = sizeof(struct ether_hdr);
+					inner_eth_hdr = (struct ether_hdr *) (rte_pktmbuf_mtod(mb,
+								unsigned char *) + l2_len + l3_len
+								 + ETHER_VXLAN_HLEN);
+
+					eth_type = rte_be_to_cpu_16(inner_eth_hdr->ether_type);
+					if (eth_type == ETHER_TYPE_VLAN) {
+						inner_l2_len += sizeof(struct vlan_hdr);
+						eth_type = rte_be_to_cpu_16(*(uint16_t *)
+							((uintptr_t)ð_hdr->ether_type +
+								sizeof(struct vlan_hdr)));
+					}
+
+					len = l2_len + l3_len + ETHER_VXLAN_HLEN + inner_l2_len;
+					if (eth_type == ETHER_TYPE_IPv4) {
+						inner_l3_len = sizeof(struct ipv4_hdr);
+						inner_ipv4_hdr = (struct ipv4_hdr *) (rte_pktmbuf_mtod(mb,
+								unsigned char *) + len);
+						inner_l4_proto = inner_ipv4_hdr->next_proto_id;
+
+						if (tx_ol_flags & 0x10) {
+
+							/* Do not delete, this is required by HW*/
+							inner_ipv4_hdr->hdr_checksum = 0;
+							ol_flags |= PKT_TX_IPV4_CSUM;
+						}
+
+					} else if (eth_type == ETHER_TYPE_IPv6) {
+						inner_l3_len = sizeof(struct ipv6_hdr);
+						inner_ipv6_hdr = (struct ipv6_hdr *) (rte_pktmbuf_mtod(mb,
+								unsigned char *) + len);
+						inner_l4_proto = inner_ipv6_hdr->proto;
+					}
+					if ((inner_l4_proto == IPPROTO_UDP) && (tx_ol_flags & 0x20)) {
+
+						/* HW Offload */
+						ol_flags |= PKT_TX_UDP_CKSUM;
+						inner_udp_hdr = (struct udp_hdr *) (rte_pktmbuf_mtod(mb,
+								unsigned char *) + len + inner_l3_len);
+						if (eth_type == ETHER_TYPE_IPv4)
+							inner_udp_hdr->dgram_cksum = get_ipv4_psd_sum(inner_ipv4_hdr);
+						else if (eth_type == ETHER_TYPE_IPv6)
+							inner_udp_hdr->dgram_cksum = get_ipv6_psd_sum(inner_ipv6_hdr);
+
+					} else if ((inner_l4_proto == IPPROTO_TCP) && (tx_ol_flags & 0x40)) {
+						/* HW Offload */
+						ol_flags |= PKT_TX_TCP_CKSUM;
+						inner_tcp_hdr = (struct tcp_hdr *) (rte_pktmbuf_mtod(mb,
+								unsigned char *) + len + inner_l3_len);
+						if (eth_type == ETHER_TYPE_IPv4)
+							inner_tcp_hdr->cksum = get_ipv4_psd_sum(inner_ipv4_hdr);
+						else if (eth_type == ETHER_TYPE_IPv6)
+							inner_tcp_hdr->cksum = get_ipv6_psd_sum(inner_ipv6_hdr);
+					} else if ((inner_l4_proto == IPPROTO_SCTP) && (tx_ol_flags & 0x80)) {
+						/* HW Offload */
+						ol_flags |= PKT_TX_SCTP_CKSUM;
+						inner_sctp_hdr = (struct sctp_hdr *) (rte_pktmbuf_mtod(mb,
+								unsigned char *) + len + inner_l3_len);
+						inner_sctp_hdr->cksum = 0;
+					}
+
+				}
+
+			} else if (l4_proto == IPPROTO_TCP) {
 				tcp_hdr = (struct tcp_hdr*) (rte_pktmbuf_mtod(mb,
 						unsigned char *) + l2_len + l3_len);
 				if (tx_ol_flags & 0x4) {
@@ -347,8 +436,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 					tcp_hdr->cksum = get_ipv4_udptcp_checksum(ipv4_hdr,
 							(uint16_t*)tcp_hdr);
 				}
-			}
-			else if (l4_proto == IPPROTO_SCTP) {
+			} else if (l4_proto == IPPROTO_SCTP) {
 				sctp_hdr = (struct sctp_hdr*) (rte_pktmbuf_mtod(mb,
 						unsigned char *) + l2_len + l3_len);
 
@@ -367,9 +455,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 				}
 			}
 			/* End of L4 Handling*/
-		}
-		else if (pkt_ol_flags & PKT_RX_IPV6_HDR) {
-
+		} else if (pkt_ol_flags & (PKT_RX_IPV6_HDR | PKT_RX_TUNNEL_IPV6_HDR)) {
 			ipv6_hdr = (struct ipv6_hdr *) (rte_pktmbuf_mtod(mb,
 					unsigned char *) + l2_len);
 			l3_len = sizeof(struct ipv6_hdr) ;
@@ -382,15 +468,93 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 				if (tx_ol_flags & 0x2) {
 					/* HW Offload */
 					ol_flags |= PKT_TX_UDP_CKSUM;
-					udp_hdr->dgram_cksum = get_ipv6_psd_sum(ipv6_hdr);
+					if (ipv6_tunnel)
+						udp_hdr->dgram_cksum = 0;
+					else
+						udp_hdr->dgram_cksum =
+							get_ipv6_psd_sum(ipv6_hdr);
 				}
 				else {
 					/* SW Implementation */
 					/* checksum field need be clear first */
 					udp_hdr->dgram_cksum = 0;
 					udp_hdr->dgram_cksum = get_ipv6_udptcp_checksum(ipv6_hdr,
-							(uint16_t*)udp_hdr);
+								(uint16_t *)udp_hdr);
 				}
+
+				if (ipv6_tunnel) {
+
+					uint16_t len;
+
+					/* Check if inner L3/L4 checksum flag is set */
+					if (tx_ol_flags & 0xF0)
+						ol_flags |= PKT_TX_VXLAN_CKSUM;
+
+					inner_l2_len  = sizeof(struct ether_hdr);
+					inner_eth_hdr = (struct ether_hdr *) (rte_pktmbuf_mtod(mb,
+						unsigned char *) + l2_len + l3_len + ETHER_VXLAN_HLEN);
+					eth_type = rte_be_to_cpu_16(inner_eth_hdr->ether_type);
+
+					if (eth_type == ETHER_TYPE_VLAN) {
+						inner_l2_len += sizeof(struct vlan_hdr);
+						eth_type = rte_be_to_cpu_16(*(uint16_t *)
+							((uintptr_t)ð_hdr->ether_type +
+							sizeof(struct vlan_hdr)));
+					}
+
+					len = l2_len + l3_len + ETHER_VXLAN_HLEN + inner_l2_len;
+
+					if (eth_type == ETHER_TYPE_IPv4) {
+						inner_l3_len = sizeof(struct ipv4_hdr);
+						inner_ipv4_hdr = (struct ipv4_hdr *) (rte_pktmbuf_mtod(mb,
+								unsigned char *) + len);
+						inner_l4_proto = inner_ipv4_hdr->next_proto_id;
+
+						/* HW offload */
+						if (tx_ol_flags & 0x10) {
+
+							/* Do not delete, this is required by HW*/
+							inner_ipv4_hdr->hdr_checksum = 0;
+							ol_flags |= PKT_TX_IPV4_CSUM;
+						}
+					} else if (eth_type == ETHER_TYPE_IPv6) {
+						inner_l3_len = sizeof(struct ipv6_hdr);
+						inner_ipv6_hdr = (struct ipv6_hdr *) (rte_pktmbuf_mtod(mb,
+							unsigned char *) + len);
+						inner_l4_proto = inner_ipv6_hdr->proto;
+					}
+
+					if ((inner_l4_proto == IPPROTO_UDP) && (tx_ol_flags & 0x20)) {
+						inner_udp_hdr = (struct udp_hdr *) (rte_pktmbuf_mtod(mb,
+							unsigned char *) + len + inner_l3_len);
+						/* HW offload */
+						ol_flags |= PKT_TX_UDP_CKSUM;
+						inner_udp_hdr->dgram_cksum = 0;
+						if (eth_type == ETHER_TYPE_IPv4)
+							inner_udp_hdr->dgram_cksum = get_ipv4_psd_sum(inner_ipv4_hdr);
+						else if (eth_type == ETHER_TYPE_IPv6)
+							inner_udp_hdr->dgram_cksum = get_ipv6_psd_sum(inner_ipv6_hdr);
+					} else if ((inner_l4_proto == IPPROTO_TCP) && (tx_ol_flags & 0x40)) {
+						/* HW offload */
+						ol_flags |= PKT_TX_TCP_CKSUM;
+						inner_tcp_hdr = (struct tcp_hdr *) (rte_pktmbuf_mtod(mb,
+								unsigned char *) + len + inner_l3_len);
+
+						if (eth_type == ETHER_TYPE_IPv4)
+							inner_tcp_hdr->cksum = get_ipv4_psd_sum(inner_ipv4_hdr);
+						else if (eth_type == ETHER_TYPE_IPv6)
+							inner_tcp_hdr->cksum = get_ipv6_psd_sum(inner_ipv6_hdr);
+
+					} else if ((inner_l4_proto == IPPROTO_SCTP) && (tx_ol_flags & 0x80)) {
+						/* HW offload */
+						ol_flags |= PKT_TX_SCTP_CKSUM;
+						inner_sctp_hdr = (struct sctp_hdr *) (rte_pktmbuf_mtod(mb,
+								unsigned char *) + len + inner_l3_len);
+						inner_sctp_hdr->cksum = 0;
+					}
+
+				}
+
 			}
 			else if (l4_proto == IPPROTO_TCP) {
 				tcp_hdr = (struct tcp_hdr*) (rte_pktmbuf_mtod(mb,
@@ -434,6 +598,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 		/* Combine the packet header write. VLAN is not consider here */
 		mb->l2_len = l2_len;
 		mb->l3_len = l3_len;
+		mb->inner_l2_len = inner_l2_len;
+		mb->inner_l3_len = inner_l3_len;
 		mb->ol_flags = ol_flags;
 	}
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
-- 
1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 10/10] app/testpmd:test " Jijiang Liu
@ 2014-11-04  8:19   ` Olivier MATZ
  2014-11-05  6:02     ` Liu, Jijiang
  0 siblings, 1 reply; 47+ messages in thread
From: Olivier MATZ @ 2014-11-04  8:19 UTC (permalink / raw)
  To: Jijiang Liu; +Cc: dev
Hello Jijiang,
On 10/27/2014 03:13 AM, Jijiang Liu wrote:
> Add test cases in testpmd to test VxLAN Tx Checksum offload, which include
>   - IPv4 and IPv6 packet
>   - outer L3, inner L3 and L4 checksum offload for Tx side.
>
> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
I'm trying to port the test of TSO in csum_only.c which was originally
part of this patch [1].
Meanwhile, the source was modified by the patch provided by the email
I'm replying to (also available at [2]), and I would like to understand
what is the purpose of it.
First, the code checks if the mbuf has the flag PKT_RX_TUNNEL_IPV4_HDR.
What is the meaning of this flag? It was added by [3], but there is no
description in comments or in the commit log explaining in which case
this flag is set by the driver. The name supposes that this flag is
set when the received packet is an IPv4 tunnel, but the commit log talks
about vxlan.
Then, if this flag was present, the code assumes it's a vxlan packet.
If one of inner checksum is specified by the user in cmdline, the flag
PKT_TX_VXLAN_CKSUM is added to the mbuf. This flag definition was added
by [4] (at the wrong place, I'll fix it in my patchset). What is the
meaning of this flag? Is it enough to checksum outer L3, inner L3, and
inner L4 as specified in commit log? If yes, why are the other flags
PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM, (...) added in the mbuf later?
In my comprehension, these flags are needed in addition to
PKT_TX_VXLAN_CKSUM to do the checksum of the inner headers.
In general, I need to understand how to use all the new offloading
stuff. In [5], new fields were added in the mbuf structure
(inner_l3_len and inner_l2_len). I'm not sure I understand which fields
have to be filled. Below is my understanding, can you please check that
it is correct?
A- To send a Ether/IP/TCP packet and process IP and TCP TX
    checksum in the NIC:
   - set l2_len = 14, l3_len = 20,
     ol_flags = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM,
     write all checksums to 0 in the packet
B- To send a Ether/IP/UDP/vxlan/Ether/IP/TCP packet and
    process inner IP and inner TCP TX checksum in the NIC:
   - set l2_len = 14+20+8+8+14, l3_len = 20,
     ol_flags = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM,
     write all checksums to 0 in the packet
C- To send a Ether/IP/UDP/vxlan/Ether/IP/TCP packet and
    process outer IP and UDP, plus inner IP and inner TCP TX
    checksum in the NIC:
   - set l2_len = 14, l3_len = 20, inner_l2_len = 14,
     inner_l3_len = 20,
     ol_flags = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_VXLAN_CKSUM,
     write all checksums to 0 in the packet
D- To send a Ether/IP/UDP/vxlan/Ether/IP/TCP packet and
    process outer IP and UDP TX checksum in the NIC:
   - set l2_len = 14, l3_len = 20,
     ol_flags = PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM,
     write all checksums to 0 in the packet
First, can you confirm this? I think it is very important to
document it, as this is a public API that can be used by DPDK users.
To validate my modifications, I will try to reproduce the test plan
described in [6]. The test report contains useful information but
I'm not sure to understand the following:
    Enable outer IP,UDP,TCP,SCTP and inner IP,UDP checksum offload
    when inner L4 protocal is UDP.
       testpmd>tx_checksum set 0 0xf3
    Enable outer IP,UDP,TCP,SCTP and inner IP,TCP,SCTP checksum offload
    when inner L4 protocal is TCP or SCTP.
       testpmd>tx_checksum set 0 0xfd
Can you give details about the signification of the bits used in the
tx_checksum command? For instance, there is only one flag to enable
tx checksum in the mbuf, so I don't understand how it's possible to
do 0x44 = (inner tcp + outer tcp).
Today, there are hard-coded values for flags: 4 bits (0-3) for legacy
checksums, 4 bits for inner checksums (4-7), and one bit (bit 11?!)
for vlan header. These bits are checked without defines at several
places in the code. Moreover, there are some places in code where
the testpmd port flags and the mbuf flags are mixed up. Example in
macswap.c : mb->ol_flags = txp->tx_ol_flags;
I suggest to remove all hardcoded values except in
cmd_tx_cksum_set_parsed() and replace them by the flags definitions
from rte_mbuf.h. As a result, the new possible arguments of
cmd_tx_cksum_set_parsed() will map the mbuf flags:
   - a flag to enable ip tx cksum
   - a flag to enable udp tx cksum
   - a flag to enable tcp tx cksum
   - a flag to enable sctp tx cksum
   - a flag to enable vxlan tx cksum
If vxlan tx cksum is set, therefore the other considered checksum
will concern the inner headers. Do you think it matches your needs?
I think that the csum_only forward engine is now a bit complicated.
As it's an example that shows how to configure the tx checksum, I
think its behavior should be described somewhere, maybe in a comment in 
the file.
By the way, I was looking at the mbuf structure, and I see that a new
packet_type field was added. There is no explanation about what this
field should contain and how we shall use it (in commit log or in
comments). Can you please explain it?
To conclude, I'd like to propose a merge of the TSO series but
I'm currently blocked by these questions. Please, could you
help me to progress on this?
Regards,
Olivier
[1] http://dpdk.org/ml/archives/dev/2014-May/002549.html
[2] http://dpdk.org/ml/archives/dev/2014-October/007156.html
[3] 
http://dpdk.org/browse/dpdk/commit/lib/librte_mbuf/rte_mbuf.h?id=15dbb63ef9e9f108e7dcd837b88234f27a1ec258
[4] 
http://dpdk.org/browse/dpdk/commit/lib/librte_mbuf/rte_mbuf.h?id=77b8301733c3cd946648ca4a1375e3db0a952216
[5] 
http://dpdk.org/browse/dpdk/commit/lib/librte_mbuf/rte_mbuf.h?id=73b7d59cf4f6faf5ccd83ce706473e75c6fb8c3b
[6] http://dpdk.org/ml/archives/dev/2014-October/007157.html
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-04  8:19   ` Olivier MATZ
@ 2014-11-05  6:02     ` Liu, Jijiang
  2014-11-05 10:28       ` Olivier MATZ
  0 siblings, 1 reply; 47+ messages in thread
From: Liu, Jijiang @ 2014-11-05  6:02 UTC (permalink / raw)
  To: Olivier MATZ; +Cc: dev
Hi Olivier,
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, November 4, 2014 4:19 PM
> To: Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum
> offload
> 
> Hello Jijiang,
> 
> On 10/27/2014 03:13 AM, Jijiang Liu wrote:
> > Add test cases in testpmd to test VxLAN Tx Checksum offload, which include
> >   - IPv4 and IPv6 packet
> >   - outer L3, inner L3 and L4 checksum offload for Tx side.
> >
> > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> 
> I'm trying to port the test of TSO in csum_only.c which was originally part of this
> patch [1].
> 
> Meanwhile, the source was modified by the patch provided by the email I'm
> replying to (also available at [2]), and I would like to understand what is the
> purpose of it.
> 
> First, the code checks if the mbuf has the flag PKT_RX_TUNNEL_IPV4_HDR.
> What is the meaning of this flag? It was added by [3], but there is no description
> in comments or in the commit log explaining in which case this flag is set by the
> driver. The name supposes that this flag is set when the received packet is an IPv4
> tunnel, but the commit log talks about vxlan.
The flag PKT_RX_TUNNEL_IPV4_HDR can be used for all tunneling packet types with outer IPV4 header.
For example:
IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4:
MAC, IPV4, GRENAT, MAC, IPV4, SCTP, PAY4 
MAC, IPV4, GRENAT, MAC, IPV6, UDP, PAY4
MAC, IPV4, GRENAT, MAC, IPV6, UDP, PAY4
These tunneling packet formats have a common point that is outer IPv4 header here.
Only VXLAN tunneling packet is supported in DPDK for i40e now, so  the commit log talks about VXLAN .
 
> Then, if this flag was present, the code assumes it's a vxlan packet.
> If one of inner checksum is specified by the user in cmdline, the flag
> PKT_TX_VXLAN_CKSUM is added to the mbuf. This flag definition was added by
> [4] (at the wrong place, I'll fix it in my patchset). 
> What is the meaning of this flag?
> Is it enough to checksum outer L3, inner L3, and inner L4 as specified in commit
> log? If yes, why are the other flags PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM,
> (...) added in the mbuf later?
> In my comprehension, these flags are needed in addition to
> PKT_TX_VXLAN_CKSUM to do the checksum of the inner headers.
Yes, these flags(PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM)  are needed by HW offload of non-tunneling and tunneling  packet.
> In general, I need to understand how to use all the new offloading stuff. In [5],
> new fields were added in the mbuf structure (inner_l3_len and inner_l2_len). I'm
> not sure I understand which fields have to be filled. Below is my understanding,
> can you please check that it is correct?
> 
> A- To send a Ether/IP/TCP packet and process IP and TCP TX
>     checksum in the NIC:
> 
>    - set l2_len = 14, l3_len = 20,
>      ol_flags = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM,
>      write all checksums to 0 in the packet
   IP checksum is 0, but tcp checksum is not 0.
   tcp_hdr->cksum = get_ipv4_psd_sum(ipv4_hdr);
> B- To send a Ether/IP/UDP/vxlan/Ether/IP/TCP packet and
>     process inner IP and inner TCP TX checksum in the NIC:
> 
>    - set l2_len = 14+20+8+8+14, l3_len = 20,
No, l2_len is outer L2 length, its value also is 14. 
>      ol_flags = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM,
>      write all checksums to 0 in the packet
   IP checksum is 0, but tcp checksum is not 0.
   tcp_hdr->cksum = get_ipv4_psd_sum(ipv4_hdr);
> C- To send a Ether/IP/UDP/vxlan/Ether/IP/TCP packet and
>     process outer IP and UDP, plus inner IP and inner TCP TX
>     checksum in the NIC:
> 
>    - set l2_len = 14, l3_len = 20, inner_l2_len = 14,
>      inner_l3_len = 20,
Yes
>      ol_flags = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
> PKT_TX_VXLAN_CKSUM,
Yes
>      write all checksums to 0 in the packet
   Outer and inner IP checksum is 0, but tcp checksum is not 0.
   tcp_hdr->cksum = get_ipv4_psd_sum(ipv4_hdr);
> D- To send a Ether/IP/UDP/vxlan/Ether/IP/TCP packet and
>     process outer IP and UDP TX checksum in the NIC:
> 
>    - set l2_len = 14, l3_len = 20,
>      ol_flags = PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM,
Yes
>      write all checksums to 0 in the packet
Outer  IP checksum is 0, but tcp checksum is not 0.
   tcp_hdr->cksum = get_ipv4_psd_sum(ipv4_hdr);
> First, can you confirm this? I think it is very important to document it, as this is a
> public API that can be used by DPDK users.
These should be included in documents.
> To validate my modifications, I will try to reproduce the test plan described in [6].
> The test report contains useful information but I'm not sure to understand the
> following:
> 
>     Enable outer IP,UDP,TCP,SCTP and inner IP,UDP checksum offload
>     when inner L4 protocal is UDP.
>        testpmd>tx_checksum set 0 0xf3
"tx_checksum set 0 0xf3" should be "tx_checksum set 0 0x33", but the tester use 0xFX (that is to say, enable all inner TX flag)in order to write test script easily.
>     Enable outer IP,UDP,TCP,SCTP and inner IP,TCP,SCTP checksum offload
>     when inner L4 protocal is TCP or SCTP.
>        testpmd>tx_checksum set 0 0xfd
> 
> Can you give details about the signification of the bits used in the tx_checksum
> command? For instance, there is only one flag to enable tx checksum in the mbuf,
> so I don't understand how it's possible to do 0x44 = (inner tcp + outer tcp).
These bits meanings/help information  were provided in the patch
http://dpdk.org/ml/archives/dev/2014-October/007156.html 
> Today, there are hard-coded values for flags: 4 bits (0-3) for legacy checksums, 4
> bits for inner checksums (4-7), and one bit (bit 11?!) for vlan header. These bits
> are checked without defines at several places in the code. Moreover, there are
> some places in code where the testpmd port flags and the mbuf flags are mixed
> up. Example in macswap.c : mb->ol_flags = txp->tx_ol_flags;
I thought this is an issue.
> I suggest to remove all hardcoded values except in
> cmd_tx_cksum_set_parsed() and replace them by the flags definitions from
> rte_mbuf.h. As a result, the new possible arguments of
> cmd_tx_cksum_set_parsed() will map the mbuf flags:
>    - a flag to enable ip tx cksum
>    - a flag to enable udp tx cksum
>    - a flag to enable tcp tx cksum
>    - a flag to enable sctp tx cksum
>    - a flag to enable vxlan tx cksum
> If vxlan tx cksum is set, therefore the other considered checksum will concern the
> inner headers. Do you think it matches your needs?
As to HW TX checksum offload, do you have special requirement for implementing TSO?
I suggest to keep on using bit to indicate different protocol flag. this approach has good flexibility and extensibility.
For example, a VXLAN packet format: MAC/IP/UDP/VXLAN/inner MAC/inner IP/inner TCP/PAY4.
If the ip tx cksum and vxlan tx cksum  are set, but tcp tx cksum is not set,  and HW offload  of inner TCP TX checksum won't work.
In addition, sometimes we want to get the following performance data using testpmd
1. only enable outer IP TX checksum  
2. only enable inner IP TX checksum
3. only enable inner TCP/UDP/SCTP checksum
4. only enable inner IP TX checksum and inner TCP/UDP/SCTP checksum.
Now we have provided inner BIT masks, we can use these combinations to test it easily.
Actually, we just need to know that if  the PKT_TX_IPV4_CSUM, PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM, PKT_TX_UDP_CKSUM and PKT_TX_VXLAN_CKSUM are set in ol_flags in the i40e driver.
Could you please look at the function i40e_txd_enable_checksum() .
> I think that the csum_only forward engine is now a bit complicated.
> As it's an example that shows how to configure the tx checksum, I think its
> behavior should be described somewhere, maybe in a comment in the file.
I agree.
> By the way, I was looking at the mbuf structure, and I see that a new packet_type
> field was added. There is no explanation about what this field should contain and
> how we shall use it (in commit log or in comments). Can you please explain it?
+	/**
+	 * The packet type, which is used to indicate ordinary packet and also
+	 * tunneled packet format, i.e. each number is represented a type of
+	 * packet.
+	 */
+	uint16_t packet_type
Regarding adding a packet_type in mbuf, we ever had a lot of discussions as follows:
http://dpdk.org/ml/archives/dev/2014-October/007027.html
http://dpdk.org/ml/archives/dev/2014-September/005240.html
http://dpdk.org/ml/archives/dev/2014-September/005241.html
http://dpdk.org/ml/archives/dev/2014-September/005274.html
> To conclude, I'd like to propose a merge of the TSO series but I'm currently
> blocked by these questions. Please, could you help me to progress on this?
Let me know if you have any questions? 
> Regards,
> Olivier
> 
> 
> [1] http://dpdk.org/ml/archives/dev/2014-May/002549.html
> [2] http://dpdk.org/ml/archives/dev/2014-October/007156.html
> [3]
> http://dpdk.org/browse/dpdk/commit/lib/librte_mbuf/rte_mbuf.h?id=15dbb63ef
> 9e9f108e7dcd837b88234f27a1ec258
> [4]
> http://dpdk.org/browse/dpdk/commit/lib/librte_mbuf/rte_mbuf.h?id=77b83017
> 33c3cd946648ca4a1375e3db0a952216
> [5]
> http://dpdk.org/browse/dpdk/commit/lib/librte_mbuf/rte_mbuf.h?id=73b7d59cf
> 4f6faf5ccd83ce706473e75c6fb8c3b
> [6] http://dpdk.org/ml/archives/dev/2014-October/007157.html
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-05  6:02     ` Liu, Jijiang
@ 2014-11-05 10:28       ` Olivier MATZ
  2014-11-06 11:24         ` Liu, Jijiang
                           ` (2 more replies)
  0 siblings, 3 replies; 47+ messages in thread
From: Olivier MATZ @ 2014-11-05 10:28 UTC (permalink / raw)
  To: Liu, Jijiang; +Cc: dev
Hi Jijiang,
Thank you for your answer. Please find some comments below.
On 11/05/2014 07:02 AM, Liu, Jijiang wrote:
>> First, the code checks if the mbuf has the flag PKT_RX_TUNNEL_IPV4_HDR.
>> What is the meaning of this flag? It was added by [3], but there is no description
>> in comments or in the commit log explaining in which case this flag is set by the
>> driver. The name supposes that this flag is set when the received packet is an IPv4
>> tunnel, but the commit log talks about vxlan.
>
> The flag PKT_RX_TUNNEL_IPV4_HDR can be used for all tunneling packet types with outer IPV4 header.
> For example:
> IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4:
> MAC, IPV4, GRENAT, MAC, IPV4, SCTP, PAY4
> MAC, IPV4, GRENAT, MAC, IPV6, UDP, PAY4
> MAC, IPV4, GRENAT, MAC, IPV6, UDP, PAY4
> These tunneling packet formats have a common point that is outer IPv4 header here.
>
> Only VXLAN tunneling packet is supported in DPDK for i40e now, so  the commit log talks about VXLAN .
Is it possible to have a more formal definition? For instance, is the
following definition below correct?
  "the PKT_RX_TUNNEL_IPV4_HDR flag CAN be set by a driver if the packet
   contains a tunneling protocol inside an IPv4 header".
If the definition above is correct, I don't see how this flag can help
an application to run faster. There is already a flag telling if there
is a valid IPv4 header (PKT_RX_IPV4_HDR). As the PKT_RX_TUNNEL_IPV4_HDR
flag does not tell what is ip->proto, the work done by an application
to dissect a packet would be exactly the same with or without this flag.
Please, can you give an example showing in which conditions this flag
can help an application?
>> What is the meaning of this flag?
>> Is it enough to checksum outer L3, inner L3, and inner L4 as specified in commit
>> log? If yes, why are the other flags PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM,
>> (...) added in the mbuf later?
>> In my comprehension, these flags are needed in addition to
>> PKT_TX_VXLAN_CKSUM to do the checksum of the inner headers.
>
> Yes, these flags(PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM)  are needed by HW offload of non-tunneling and tunneling  packet.
OK, so I understand that when PKT_TX_VXLAN_CKSUM is set, if the
driver supports it, it will process IP and UDP checksum of outer
header, using l2_len and l3_len.
>> In general, I need to understand how to use all the new offloading stuff. In [5],
>> new fields were added in the mbuf structure (inner_l3_len and inner_l2_len). I'm
>> not sure I understand which fields have to be filled. Below is my understanding,
>> can you please check that it is correct?
>>
>> A- To send a Ether/IP/TCP packet and process IP and TCP TX
>>      checksum in the NIC:
>>
>>     - set l2_len = 14, l3_len = 20,
>>       ol_flags = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM,
>>       write all checksums to 0 in the packet
>     IP checksum is 0, but tcp checksum is not 0.
>     tcp_hdr->cksum = get_ipv4_psd_sum(ipv4_hdr);
OK, right. I forgot it but indeed it's in csumonly.c
>> B- To send a Ether/IP/UDP/vxlan/Ether/IP/TCP packet and
>>      process inner IP and inner TCP TX checksum in the NIC:
>>
>>     - set l2_len = 14+20+8+8+14, l3_len = 20,
>>       ol_flags = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM,
>>       write all checksums to 0 in the packet
>
> No, l2_len is outer L2 length, its value also is 14.
If you set l2_len to 14, how could the driver guess the offset of
the inner IP header? I'm pretty sure it should work with 14+20+8+8+14.
>> C- To send a Ether/IP/UDP/vxlan/Ether/IP/TCP packet and
>>      process outer IP and UDP, plus inner IP and inner TCP TX
>>      checksum in the NIC:
>>
>>     - set l2_len = 14, l3_len = 20, inner_l2_len = 14,
>>       inner_l3_len = 20,
> Yes
>>       ol_flags = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
>> PKT_TX_VXLAN_CKSUM,
> Yes
>>       write all checksums to 0 in the packet
>
>     Outer and inner IP checksum is 0, but tcp checksum is not 0.
>     tcp_hdr->cksum = get_ipv4_psd_sum(ipv4_hdr);
What about outer udp checksum? Why shouldn't we set it to the
phdr checksum too?
>> D- To send a Ether/IP/UDP/vxlan/Ether/IP/TCP packet and
>>      process outer IP and UDP TX checksum in the NIC:
>>
>>     - set l2_len = 14, l3_len = 20,
>>       ol_flags = PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM,
>
> Yes
>>       write all checksums to 0 in the packet
> Outer  IP checksum is 0, but tcp checksum is not 0.
>     tcp_hdr->cksum = get_ipv4_psd_sum(ipv4_hdr);
>
>> First, can you confirm this? I think it is very important to document it, as this is a
>> public API that can be used by DPDK users.
>
> These should be included in documents.
Another thing is surprising me.
- if PKT_TX_VXLAN_CKSUM is not set (legacy use case), then the
   driver use l2_len and l3_len to offload inner IP/UDP/TCP checksums.
- if PKT_TX_VXLAN_CKSUM is set, then the driver has to use
   inner_l{23}_len instead of l{23}_len for the same operation.
Adding PKT_TX_VXLAN_CKSUM changes the semantic of l2_len and l3_len.
To fix this, I suggest to remove the new fields inner_l{23}_len then
add outer_l{23}_len instead. Therefore, the semantic of l2_len and
l3_len would not change, and a driver would always use the same field
for a specific offload.
For my TSO development, I will follow the current semantic.
>> To validate my modifications, I will try to reproduce the test plan described in [6].
>> The test report contains useful information but I'm not sure to understand the
>> following:
>>
>>      Enable outer IP,UDP,TCP,SCTP and inner IP,UDP checksum offload
>>      when inner L4 protocal is UDP.
>>         testpmd>tx_checksum set 0 0xf3
>
> "tx_checksum set 0 0xf3" should be "tx_checksum set 0 0x33", but the tester use 0xFX (that is to say, enable all inner TX flag)in order to write test script easily.
To me, there is a strange thing:
- the mbuf flags only allows to checksum l3 (ip), l4 (tcp/udp/sctp),
   and vxlan.
- the user api in command line talks about inner and outer l3 + l4
   layers + vxlan.
Therefore many combinations are impossible to do or are non-sense.
Examples:
- outer IP + outer TCP + inner IP  -> no sense, there is no supported
   tunnel protocol above TCP
- outer IP + inner IP -> impossible to set up in mbuf flags
- outer IP: what is the meaning? same than inner IP? or does it
   means that the application has to parse the packet to find a
   tunnel? In this case, which tunnels are supported by the application?
- ...
Anyway, I will try to continue to use the current logic and document
it as much as I can.
>>      Enable outer IP,UDP,TCP,SCTP and inner IP,TCP,SCTP checksum offload
>>      when inner L4 protocal is TCP or SCTP.
>>         testpmd>tx_checksum set 0 0xfd
>>
>> Can you give details about the signification of the bits used in the tx_checksum
>> command? For instance, there is only one flag to enable tx checksum in the mbuf,
>> so I don't understand how it's possible to do 0x44 = (inner tcp + outer tcp).
>
> These bits meanings/help information  were provided in the patch
> http://dpdk.org/ml/archives/dev/2014-October/007156.html
Sorry, but the commit provides very few information about how to use
these flags (although it was already the case before your commit). I
will try to document it from what I understand from the code.
>> Today, there are hard-coded values for flags: 4 bits (0-3) for legacy checksums, 4
>> bits for inner checksums (4-7), and one bit (bit 11?!) for vlan header. These bits
>> are checked without defines at several places in the code. Moreover, there are
>> some places in code where the testpmd port flags and the mbuf flags are mixed
>
>> up. Example in macswap.c : mb->ol_flags = txp->tx_ol_flags;
> I thought this is an issue.
>
>> I suggest to remove all hardcoded values except in
>> cmd_tx_cksum_set_parsed() and replace them by the flags definitions from
>> rte_mbuf.h. As a result, the new possible arguments of
>> cmd_tx_cksum_set_parsed() will map the mbuf flags:
>>     - a flag to enable ip tx cksum
>>     - a flag to enable udp tx cksum
>>     - a flag to enable tcp tx cksum
>>     - a flag to enable sctp tx cksum
>>     - a flag to enable vxlan tx cksum
>> If vxlan tx cksum is set, therefore the other considered checksum will concern the
>> inner headers. Do you think it matches your needs?
>
> As to HW TX checksum offload, do you have special requirement for implementing TSO?
Yes. TSO implies TX TCP and IP checksum offload.
My first requirement is to understand what is currently implemented, and
the second one is to do build an API that is easily understandable and
usable by someone else.
>> By the way, I was looking at the mbuf structure, and I see that a new packet_type
>> field was added. There is no explanation about what this field should contain and
>> how we shall use it (in commit log or in comments). Can you please explain it?
>
> +	/**
> +	 * The packet type, which is used to indicate ordinary packet and also
> +	 * tunneled packet format, i.e. each number is represented a type of
> +	 * packet.
> +	 */
> +	uint16_t packet_type
> Regarding adding a packet_type in mbuf, we ever had a lot of discussions as follows:
>
> http://dpdk.org/ml/archives/dev/2014-October/007027.html
> http://dpdk.org/ml/archives/dev/2014-September/005240.html
> http://dpdk.org/ml/archives/dev/2014-September/005241.html
> http://dpdk.org/ml/archives/dev/2014-September/005274.html
Sure, there was a lot of discussions... and I still don't understand how
I can use it in my application. The reason is simple: I don't know
what kind of data is stored in this field (no #define in rte_mbuf.h).
This generic field is filled by reading a very specific register of i40e
driver. So:
- I cannot code an application that use it
- I cannot code a pmd that would fill this field
Conclusion: this field is absolutely useless for anyone that has not
read the datasheets of i40e... which is probably the case for 95% of
DPDK users.
Regards,
Olivier
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-05 10:28       ` Olivier MATZ
@ 2014-11-06 11:24         ` Liu, Jijiang
  2014-11-06 13:08           ` Olivier MATZ
  2014-11-07  0:43         ` Yong Wang
  2014-11-10  6:03         ` [dpdk-dev] " Liu, Jijiang
  2 siblings, 1 reply; 47+ messages in thread
From: Liu, Jijiang @ 2014-11-06 11:24 UTC (permalink / raw)
  To: Olivier MATZ; +Cc: dev
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Wednesday, November 5, 2014 6:28 PM
> To: Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum
> offload
> 
> Hi Jijiang,
> 
> Thank you for your answer. Please find some comments below.
> 
> On 11/05/2014 07:02 AM, Liu, Jijiang wrote:
> >> First, the code checks if the mbuf has the flag PKT_RX_TUNNEL_IPV4_HDR.
> >> What is the meaning of this flag? It was added by [3], but there is
> >> no description in comments or in the commit log explaining in which
> >> case this flag is set by the driver. The name supposes that this flag
> >> is set when the received packet is an IPv4 tunnel, but the commit log talks
> about vxlan.
> >
> > The flag PKT_RX_TUNNEL_IPV4_HDR can be used for all tunneling packet types
> with outer IPV4 header.
> > For example:
> > IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4:
> > MAC, IPV4, GRENAT, MAC, IPV4, SCTP, PAY4 MAC, IPV4, GRENAT, MAC, IPV6,
> > UDP, PAY4 MAC, IPV4, GRENAT, MAC, IPV6, UDP, PAY4 These tunneling
> > packet formats have a common point that is outer IPv4 header here.
> >
> > Only VXLAN tunneling packet is supported in DPDK for i40e now, so  the commit
> log talks about VXLAN .
> 
> Is it possible to have a more formal definition? For instance, is the following
> definition below correct?
> 
>   "the PKT_RX_TUNNEL_IPV4_HDR flag CAN be set by a driver if the packet
>    contains a tunneling protocol inside an IPv4 header".
Yes, correct.
> If the definition above is correct, I don't see how this flag can help an application
> to run faster. There is already a flag telling if there is a valid IPv4 header
> (PKT_RX_IPV4_HDR). As the PKT_RX_TUNNEL_IPV4_HDR flag does not tell what
> is ip->proto, the work done by an application to dissect a packet would be exactly
> the same with or without this flag.
If the PKT_RX_TUNNEL_IPV4_HDR flag is set, which means driver tell application that incoming packet is encapsulated packet, and application will process / analyse the packet according to tunneling format indicated by packet_type.
In terms of VXLAN packet format (MAC,IPv4,UDP,VXLAN,MAC,IP,TCP,PAY4), if only the PKT_RX_IPV4_HDR flag is set, and application regard its payload as "from VXLAN to PAY4", but actually, the real payload is PAY4.
  
> Please, can you give an example showing in which conditions this flag can help an
> application?
http://dpdk.org/ml/archives/dev/2014-October/007151.html
http://dpdk.org/ml/archives/dev/2014-October/007156.html
We used the PKT_RX_TUNNEL_IPV4_HDR in the two patches to help application identify incoming packet is tunneling packet.
 
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-06 11:24         ` Liu, Jijiang
@ 2014-11-06 13:08           ` Olivier MATZ
  2014-11-06 14:27             ` Liu, Jijiang
  0 siblings, 1 reply; 47+ messages in thread
From: Olivier MATZ @ 2014-11-06 13:08 UTC (permalink / raw)
  To: Liu, Jijiang; +Cc: dev
Hello Jijiang,
On 11/06/2014 12:24 PM, Liu, Jijiang wrote:
>> Is it possible to have a more formal definition? For instance, is the following
>> definition below correct?
>>
>>   "the PKT_RX_TUNNEL_IPV4_HDR flag CAN be set by a driver if the packet
>>    contains a tunneling protocol inside an IPv4 header".
> 
> Yes, correct.
> 
>> If the definition above is correct, I don't see how this flag can help an application
>> to run faster. There is already a flag telling if there is a valid IPv4 header
>> (PKT_RX_IPV4_HDR). As the PKT_RX_TUNNEL_IPV4_HDR flag does not tell what
>> is ip->proto, the work done by an application to dissect a packet would be exactly
>> the same with or without this flag.
> 
> If the PKT_RX_TUNNEL_IPV4_HDR flag is set, which means driver tell application that incoming packet is encapsulated packet, and application will process / analyse the packet according to tunneling format indicated by packet_type.
Where is it written that when the PKT_RX_TUNNEL_IPV4_HDR flag is set,
the packet_type is also set?
To which header packet_type refers to? Inner or Outer? Depends?
What are the possible values for packet_type?
Is the PKT_RX_TUNNEL_IPV4_HDR flag set in mbuf related to the commands
rx_vxlan_port add|del? If yes, it should be written in the API!
(assuming this is the right API design)
When the PKT_RX_TUNNEL_IPV4_HDR flag is set, does PKT_RX_IPV4_HDR or
PKT_RX_VLAN_PKT concerns the inner or outer headers? I hope it still
concerns the first one, else it would break many applications relying
on the these flags.
As you can see, today, an application cannot use PKT_RX_TUNNEL_IPV4_HDR
or m->packet_type because it is not documented.
> In terms of VXLAN packet format (MAC,IPv4,UDP,VXLAN,MAC,IP,TCP,PAY4), if only the PKT_RX_IPV4_HDR flag is set, and application regard its payload as "from VXLAN to PAY4", but actually, the real payload is PAY4.
>   
>> Please, can you give an example showing in which conditions this flag can help an
>> application?
> 
> http://dpdk.org/ml/archives/dev/2014-October/007151.html
> http://dpdk.org/ml/archives/dev/2014-October/007156.html
> 
> We used the PKT_RX_TUNNEL_IPV4_HDR in the two patches to help application identify incoming packet is tunneling packet.
As you agreed on "the PKT_RX_TUNNEL_IPV4_HDR flag CAN be set by a
driver", it means that if the flag is not present, the application
should do the check in software. And there are several reasons why
the flag may not be present:
 - the packet is not a VxLAN packet
 - the hw or driver was not able to recognize it (I don't know, maybe
   if there are IP options the hw will not recognize it?)
 - the hw or driver does not support it (all drivers except i40e)
So the application has to provide the software equivalent code
to process PAY4.
The "csum" testpmd forwarding engine is now a bad example because it
is not able to do the same processing in software or hardware. It
now only works with an i40e driver, which was not the case before. Also,
the semantic of the command line arguments changed. Before, the meaning
was "if the flag is set, process the checksum in the NIC, else in SW".
Now, it's "huh... it depends on the flag."
I will submit a rework of the csum fowarding engine to clarify its
behavior.
Regards,
Olivier
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-06 13:08           ` Olivier MATZ
@ 2014-11-06 14:27             ` Liu, Jijiang
  0 siblings, 0 replies; 47+ messages in thread
From: Liu, Jijiang @ 2014-11-06 14:27 UTC (permalink / raw)
  To: Olivier MATZ; +Cc: dev
Hi Olivier,
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Thursday, November 6, 2014 9:09 PM
> To: Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum
> offload
> 
> Hello Jijiang,
> 
> On 11/06/2014 12:24 PM, Liu, Jijiang wrote:
> >> Is it possible to have a more formal definition? For instance, is the
> >> following definition below correct?
> >>
> >>   "the PKT_RX_TUNNEL_IPV4_HDR flag CAN be set by a driver if the packet
> >>    contains a tunneling protocol inside an IPv4 header".
> >
> > Yes, correct.
> >
> >> If the definition above is correct, I don't see how this flag can
> >> help an application to run faster. There is already a flag telling if
> >> there is a valid IPv4 header (PKT_RX_IPV4_HDR). As the
> >> PKT_RX_TUNNEL_IPV4_HDR flag does not tell what is ip->proto, the work
> >> done by an application to dissect a packet would be exactly the same with or
> without this flag.
> >
> > If the PKT_RX_TUNNEL_IPV4_HDR flag is set, which means driver tell
> application that incoming packet is encapsulated packet, and application will
> process / analyse the packet according to tunneling format indicated by
> packet_type.
> 
> Where is it written that when the PKT_RX_TUNNEL_IPV4_HDR flag is set, the
> packet_type is also set?
> 
> To which header packet_type refers to? Inner or Outer? Depends?
> 
> What are the possible values for packet_type?
> 
> Is the PKT_RX_TUNNEL_IPV4_HDR flag set in mbuf related to the commands
> rx_vxlan_port add|del? If yes, it should be written in the API!
> (assuming this is the right API design)
> 
> When the PKT_RX_TUNNEL_IPV4_HDR flag is set, does PKT_RX_IPV4_HDR or
> PKT_RX_VLAN_PKT concerns the inner or outer headers? I hope it still concerns
> the first one, else it would break many applications relying on the these flags.
> 
> As you can see, today, an application cannot use PKT_RX_TUNNEL_IPV4_HDR or
> m->packet_type because it is not documented.
> 
> 
> > In terms of VXLAN packet format (MAC,IPv4,UDP,VXLAN,MAC,IP,TCP,PAY4), if
> only the PKT_RX_IPV4_HDR flag is set, and application regard its payload as "from
> VXLAN to PAY4", but actually, the real payload is PAY4.
> >
> >> Please, can you give an example showing in which conditions this flag
> >> can help an application?
> >
> > http://dpdk.org/ml/archives/dev/2014-October/007151.html
> > http://dpdk.org/ml/archives/dev/2014-October/007156.html
> >
> > We used the PKT_RX_TUNNEL_IPV4_HDR in the two patches to help
> application identify incoming packet is tunneling packet.
> 
> As you agreed on "the PKT_RX_TUNNEL_IPV4_HDR flag CAN be set by a driver",
> it means that if the flag is not present, the application should do the check in
> software. And there are several reasons why the flag may not be present:
>  - the packet is not a VxLAN packet
As long as it is tunneling packet with IPv4/6 header, the flag should be set by driver.
>  - the hw or driver was not able to recognize it (I don't know, maybe
>    if there are IP options the hw will not recognize it?) 
>  - the hw or driver does not support it (all drivers except i40e)
E1000/ixgbe don't support VXLAN packet and another tunneling packet, so driver don't need to set this flag.
As to other NICs that support tunneling packet , I don't why HW or driver can't recognize it.
> So the application has to provide the software equivalent code to process PAY4.
> 
> The "csum" testpmd forwarding engine is now a bad example because it is not
> able to do the same processing in software or hardware. It now only works with
> an i40e driver, which was not the case before. Also, the semantic of the command
> line arguments changed. Before, the meaning was "if the flag is set, process the
> checksum in the NIC, else in SW".
> Now, it's "huh... it depends on the flag."
Currently, If the packet is non-tunneling packet, I believe the  "csum" testpmd forwarding engine also works well as before.
we changed the engine as follows, which is compatible with previous implementation.
-		if (pkt_ol_flags & PKT_RX_IPV4_HDR) {
+		if (pkt_ol_flags & (PKT_RX_IPV4_HDR | PKT_RX_TUNNEL_IPV4_HDR)) {
...
-		else if (pkt_ol_flags & PKT_RX_IPV6_HDR) {
+		} else if (pkt_ol_flags & (PKT_RX_IPV6_HDR | PKT_RX_TUNNEL_IPV6_HDR)) {
> I will submit a rework of the csum fowarding engine to clarify its behavior.
OK. good.
> Regards,
> Olivier
^ permalink raw reply	[flat|nested] 47+ messages in thread
 
 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-05 10:28       ` Olivier MATZ
  2014-11-06 11:24         ` Liu, Jijiang
@ 2014-11-07  0:43         ` Yong Wang
  2014-11-07 17:16           ` Olivier MATZ
  2014-11-10  6:03         ` [dpdk-dev] " Liu, Jijiang
  2 siblings, 1 reply; 47+ messages in thread
From: Yong Wang @ 2014-11-07  0:43 UTC (permalink / raw)
  To: Olivier MATZ, Liu, Jijiang; +Cc: dev
>> As to HW TX checksum offload, do you have special requirement for implementing TSO?
> Yes. TSO implies TX TCP and IP checksum offload.
Is this a general requirement or something specific to ixgbe/i40e? FWIW, vmxnet3 device does not support tx IP checksum offload but doe support TSO.  In that case, we cannot leave IP checksum field as 0 (the correct checksum needs to be filled in the header) before passing it the the NIC when TSO is enabled.
Yong
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-07  0:43         ` Yong Wang
@ 2014-11-07 17:16           ` Olivier MATZ
  2014-11-10 11:39             ` Ananyev, Konstantin
       [not found]             ` <D0868B54.24DBB%yongwang@vmware.com>
  0 siblings, 2 replies; 47+ messages in thread
From: Olivier MATZ @ 2014-11-07 17:16 UTC (permalink / raw)
  To: Yong Wang, Liu, Jijiang; +Cc: dev
Hello Yong,
On 11/07/2014 01:43 AM, Yong Wang wrote:
>>> As to HW TX checksum offload, do you have special requirement for implementing TSO?
> 
>> Yes. TSO implies TX TCP and IP checksum offload.
> 
> Is this a general requirement or something specific to ixgbe/i40e? FWIW,
> vmxnet3 device does not support tx IP checksum offload but doe support
> TSO.  In that case, we cannot leave IP checksum field as 0 (the correct
> checksum needs to be filled in the header) before passing it the the NIC
> when TSO is enabled.
This is a good question because we need to define the proper API that
will work on other PMDs in the future.
Indeed, there is a hardware specificity in ixgbe: when TSO is enabled,
the IP checksum flag must also be passed to the driver if it's IPv4.
>From 82599 datasheets (7.2.3.2.4 Advanced Transmit Data Descriptor):
    IXSM (bit 0) — Insert IP Checksum: This field indicates that IP
    checksum must be inserted. In IPv6 mode, it must be reset to 0b.
    If DCMD.TSE and TUCMD.IPV4 are set, IXSM must be set as well.
    If this bit is set, the packet should at least contain an
    IP header.
If we allow the user to give the TSO flag without the IP checksum
flag in mbuf flags, the ixgbe driver would have to set the IP checksum
flag in hardware descriptors if the packet is IPv4. The driver would
have to parse the IP header: this is not a problem as we already need
it for TCP checksum.
To summarize, I think we have 3 options when transmitting a packet to be
segmented using TSO:
- set IP checksum to 0 in the application: in this case, it would
  require additional work in virtual drivers if the peer expects
  to receive a packet with a valid IP checksum. But I'm wondering
  what is the need for calculating a checksum when transmitting on
  a virtual device (the peer receiving the packet knows that the
  packet is not corrupted as it comes from memory). Moreover, if the
  device advertise TSO, I assume it can also advertise IP checksum
  offload.
- calculate the IP checksum in the application. It would take additional
  cycles although it may not be needed as the driver probably knows
  how to calculate it.
- if the driver supports both TSO and IP checksum, the 2 flags MUST
  be given to the driver and the IP checksum must be set to 0 and the
  checksum cannot be calculated in software. If the driver only
  supports TSO, the checksum has to be calculated in software.
Currently, I choosen the first solution, but I'm open to change the
design. Maybe the 3rd one is also a good solution.
By the way, we had the same kind of discussion with Konstantin [1]
about what to do with the TCP checksum. My feeling is that setting it
to the pseudo-header checksum is the best we can do:
 - linux does that
 - many hardware requires that (this is not the case for ixgbe, which
   need a pshdr checksum without the IP len)
 - it can be reused if received by a virtual device and sent to a
   physical device supporting TSO
Best regards,
Olivier
[1] http://dpdk.org/ml/archives/dev/2014-May/002766.html
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-07 17:16           ` Olivier MATZ
@ 2014-11-10 11:39             ` Ananyev, Konstantin
  2014-11-10 15:57               ` Olivier MATZ
       [not found]             ` <D0868B54.24DBB%yongwang@vmware.com>
  1 sibling, 1 reply; 47+ messages in thread
From: Ananyev, Konstantin @ 2014-11-10 11:39 UTC (permalink / raw)
  To: Olivier MATZ, Yong Wang, Liu, Jijiang; +Cc: dev
Hi Oliver,
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier MATZ
> Sent: Friday, November 07, 2014 5:16 PM
> To: Yong Wang; Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
> 
> Hello Yong,
> 
> On 11/07/2014 01:43 AM, Yong Wang wrote:
> >>> As to HW TX checksum offload, do you have special requirement for implementing TSO?
> >
> >> Yes. TSO implies TX TCP and IP checksum offload.
> >
> > Is this a general requirement or something specific to ixgbe/i40e? FWIW,
> > vmxnet3 device does not support tx IP checksum offload but doe support
> > TSO.  In that case, we cannot leave IP checksum field as 0 (the correct
> > checksum needs to be filled in the header) before passing it the the NIC
> > when TSO is enabled.
> 
> This is a good question because we need to define the proper API that
> will work on other PMDs in the future.
> 
> Indeed, there is a hardware specificity in ixgbe: when TSO is enabled,
> the IP checksum flag must also be passed to the driver if it's IPv4.
> From 82599 datasheets (7.2.3.2.4 Advanced Transmit Data Descriptor):
> 
>     IXSM (bit 0) - Insert IP Checksum: This field indicates that IP
>     checksum must be inserted. In IPv6 mode, it must be reset to 0b.
>     If DCMD.TSE and TUCMD.IPV4 are set, IXSM must be set as well.
>     If this bit is set, the packet should at least contain an
>     IP header.
> 
> If we allow the user to give the TSO flag without the IP checksum
> flag in mbuf flags, the ixgbe driver would have to set the IP checksum
> flag in hardware descriptors if the packet is IPv4. The driver would
> have to parse the IP header: this is not a problem as we already need
> it for TCP checksum.
> 
> To summarize, I think we have 3 options when transmitting a packet to be
> segmented using TSO:
> 
> - set IP checksum to 0 in the application: in this case, it would
>   require additional work in virtual drivers if the peer expects
>   to receive a packet with a valid IP checksum. But I'm wondering
>   what is the need for calculating a checksum when transmitting on
>   a virtual device (the peer receiving the packet knows that the
>   packet is not corrupted as it comes from memory). Moreover, if the
>   device advertise TSO, I assume it can also advertise IP checksum
>   offload.
> 
> - calculate the IP checksum in the application. It would take additional
>   cycles although it may not be needed as the driver probably knows
>   how to calculate it.
> 
> - if the driver supports both TSO and IP checksum, the 2 flags MUST
>   be given to the driver and the IP checksum must be set to 0 and the
>   checksum cannot be calculated in software. If the driver only
>   supports TSO, the checksum has to be calculated in software.
> 
> Currently, I choosen the first solution, but I'm open to change the
> design. Maybe the 3rd one is also a good solution.
> 
> By the way, we had the same kind of discussion with Konstantin [1]
> about what to do with the TCP checksum. My feeling is that setting it
> to the pseudo-header checksum is the best we can do:
>  - linux does that
>  - many hardware requires that (this is not the case for ixgbe, which
>    need a pshdr checksum without the IP len)
>  - it can be reused if received by a virtual device and sent to a
>    physical device supporting TSO
Yes, I remember that discussion.
I still think we better avoid any read/write access of the packet data inside PMD TX routine.
(packet header parsing and/or pseudo-header checksum calculations).
As I said before - if different HW have different requirements of what have to be recalculated for HW TX offloads -
why not introduce a new function dev_prep_tx(portid, queueid, mbuf[], num)?
PMD developer can put all necessary calculations/updates of the packet data and related mbuf fields inside that function.
It would be then a PMD responsibility to provide that function and it would be an app layer responsibility to call it for
mbufs with TX offload flags before calling tx_burst().
Konstantin
> 
> Best regards,
> Olivier
> 
> 
> [1] http://dpdk.org/ml/archives/dev/2014-May/002766.html
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-10 11:39             ` Ananyev, Konstantin
@ 2014-11-10 15:57               ` Olivier MATZ
  2014-11-12  9:55                 ` Ananyev, Konstantin
  0 siblings, 1 reply; 47+ messages in thread
From: Olivier MATZ @ 2014-11-10 15:57 UTC (permalink / raw)
  To: Ananyev, Konstantin, Yong Wang, Liu, Jijiang; +Cc: dev
Hello Konstantin,
>> By the way, we had the same kind of discussion with Konstantin [1]
>> about what to do with the TCP checksum. My feeling is that setting it
>> to the pseudo-header checksum is the best we can do:
>>  - linux does that
>>  - many hardware requires that (this is not the case for ixgbe, which
>>    need a pshdr checksum without the IP len)
>>  - it can be reused if received by a virtual device and sent to a
>>    physical device supporting TSO
> 
> Yes, I remember that discussion.
> I still think we better avoid any read/write access of the packet data inside PMD TX routine.
> (packet header parsing and/or pseudo-header checksum calculations).
> As I said before - if different HW have different requirements of what have to be recalculated for HW TX offloads -
> why not introduce a new function dev_prep_tx(portid, queueid, mbuf[], num)?
> PMD developer can put all necessary calculations/updates of the packet data and related mbuf fields inside that function.
> It would be then a PMD responsibility to provide that function and it would be an app layer responsibility to call it for
> mbufs with TX offload flags before calling tx_burst().
I think I understand your point: you don't want to touch the packet
in the PMD because the lcore that transmits the packet can be different
than the one that built it. In this case (i.e. a pipeline case),
reading or writing the packet can produce a cache miss, is it correct?
>From an API perspective, it looks a bit more complex to have to call
dev_prep_tx() before sending the packets if they have been flagged
for offload processing. But I admit I have no other argument. I'll be
happy to have more comments from other people on the list.
I'm sending a first version of the patchset now as it's ready, it does
not take in account this comment, but I'm open to add it in a v2 if
there is a consensus on this.
Now, knowing that:
- adding dev_prep_tx() will also concern hw checksum (TCP L4 checksum
  already requires to set the TCP pseudo header checksum), so adding
  this will change the API of an existing feature
- TSO is a new feature expected for 1.8 (which should be out soon)
Do you think we need to include this for 1.8 or can we postpone your
proposition for after the 1.8 release?
Thank you for your comments,
Regards,
Olivier
>> [1] http://dpdk.org/ml/archives/dev/2014-May/002766.html
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-10 15:57               ` Olivier MATZ
@ 2014-11-12  9:55                 ` Ananyev, Konstantin
  2014-11-12 13:05                   ` Olivier MATZ
  0 siblings, 1 reply; 47+ messages in thread
From: Ananyev, Konstantin @ 2014-11-12  9:55 UTC (permalink / raw)
  To: Olivier MATZ, Yong Wang, Liu, Jijiang; +Cc: dev
Hi Oliver,
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Monday, November 10, 2014 3:58 PM
> To: Ananyev, Konstantin; Yong Wang; Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
> 
> Hello Konstantin,
> 
> >> By the way, we had the same kind of discussion with Konstantin [1]
> >> about what to do with the TCP checksum. My feeling is that setting it
> >> to the pseudo-header checksum is the best we can do:
> >>  - linux does that
> >>  - many hardware requires that (this is not the case for ixgbe, which
> >>    need a pshdr checksum without the IP len)
> >>  - it can be reused if received by a virtual device and sent to a
> >>    physical device supporting TSO
> >
> > Yes, I remember that discussion.
> > I still think we better avoid any read/write access of the packet data inside PMD TX routine.
> > (packet header parsing and/or pseudo-header checksum calculations).
> > As I said before - if different HW have different requirements of what have to be recalculated for HW TX offloads -
> > why not introduce a new function dev_prep_tx(portid, queueid, mbuf[], num)?
> > PMD developer can put all necessary calculations/updates of the packet data and related mbuf fields inside that function.
> > It would be then a PMD responsibility to provide that function and it would be an app layer responsibility to call it for
> > mbufs with TX offload flags before calling tx_burst().
> 
> I think I understand your point: you don't want to touch the packet
> in the PMD because the lcore that transmits the packet can be different
> than the one that built it. In this case (i.e. a pipeline case),
> reading or writing the packet can produce a cache miss, is it correct?
Yes, it is correct. 
That's one of the main reason why current implementations of TX routines avoid touching packet data. 
> From an API perspective, it looks a bit more complex to have to call
> dev_prep_tx() before sending the packets if they have been flagged
> for offload processing. But I admit I have no other argument. I'll be
> happy to have more comments from other people on the list.
> 
> I'm sending a first version of the patchset now as it's ready, it does
> not take in account this comment, but I'm open to add it in a v2 if
> there is a consensus on this.
> 
> Now, knowing that:
> - adding dev_prep_tx() will also concern hw checksum (TCP L4 checksum
>   already requires to set the TCP pseudo header checksum), so adding
>   this will change the API of an existing feature
> - TSO is a new feature expected for 1.8 (which should be out soon)
> 
> Do you think we need to include this for 1.8 or can we postpone your
> proposition for after the 1.8 release?
I'd say it would be good to have it done together with TSO feature.
About changing API: I think existing applications shouldn't be affected.
For existing PMDs/TX offloads we don't change  any rules what need to be filled by the app.
We just add a new function that can do that for user.
If the app fills required manually (as all apps have to do now) it would keep working as expected. 
If you feel like it is too much work for 1.8 timeframe -
can we at least move fix_tcp_phdr_cksum() out of TX PMD as a temporary measure?
Let say create a function  get_ipv4_udptcp_checksum(struct rte_mbuf *m) (in librte_net ?).
It will calculate PSD checksum for both TSO and non-TSO case based on given mbuf flags/fields.
Then we can update testpmd/csumonly.c to use it.
Thanks
Konstantin
 
> Thank you for your comments,
> Regards,
> Olivier
> 
> 
> 
> >> [1] http://dpdk.org/ml/archives/dev/2014-May/002766.html
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-12  9:55                 ` Ananyev, Konstantin
@ 2014-11-12 13:05                   ` Olivier MATZ
  2014-11-12 13:40                     ` Thomas Monjalon
  2014-11-12 14:39                     ` Ananyev, Konstantin
  0 siblings, 2 replies; 47+ messages in thread
From: Olivier MATZ @ 2014-11-12 13:05 UTC (permalink / raw)
  To: Ananyev, Konstantin, Yong Wang, Liu, Jijiang; +Cc: dev
Hi Konstantin,
On 11/12/2014 10:55 AM, Ananyev, Konstantin wrote:
>>  From an API perspective, it looks a bit more complex to have to call
>> dev_prep_tx() before sending the packets if they have been flagged
>> for offload processing. But I admit I have no other argument. I'll be
>> happy to have more comments from other people on the list.
>>
>> I'm sending a first version of the patchset now as it's ready, it does
>> not take in account this comment, but I'm open to add it in a v2 if
>> there is a consensus on this.
>>
>> Now, knowing that:
>> - adding dev_prep_tx() will also concern hw checksum (TCP L4 checksum
>>    already requires to set the TCP pseudo header checksum), so adding
>>    this will change the API of an existing feature
>> - TSO is a new feature expected for 1.8 (which should be out soon)
>>
>> Do you think we need to include this for 1.8 or can we postpone your
>> proposition for after the 1.8 release?
>
> I'd say it would be good to have it done together with TSO feature.
> About changing API: I think existing applications shouldn't be affected.
> For existing PMDs/TX offloads we don't change  any rules what need to be filled by the app.
> We just add a new function that can do that for user.
> If the app fills required manually (as all apps have to do now) it would keep working as expected.
I agree, this proposition could work without changing the current
applications.
> If you feel like it is too much work for 1.8 timeframe -
> can we at least move fix_tcp_phdr_cksum() out of TX PMD as a temporary measure?
> Let say create a function  get_ipv4_udptcp_checksum(struct rte_mbuf *m) (in librte_net ?).
> It will calculate PSD checksum for both TSO and non-TSO case based on given mbuf flags/fields.
> Then we can update testpmd/csumonly.c to use it.
I'm not sure having get_ipv4_udptcp_checksum() in librte_net would
help. The value we have to set in the TCP checksum field depends on the
PMD (altought only ixgbe is supported now). So, it would require
another parameter <portid> and a new PMD eth_ops... which looks very
similar to dev_prep_tx() (except that dev_prep_tx() can be bulked).
I think a stack will not be able to call get_udptcp_checksum(m ,port)
because it does not know the physical port at the time the packet is
built. Moreover, calling a function through a pointer is more efficient
when bulked. So I think the dev_prep_tx() you initially describe is
a better answer to the problem.
I don't know what is the exact timeframe for 1.8, maybe Thomas can help
on this? Depending on it, we have several options:
- implement dev_prep_tx() for 1.8 in the TSO series: this implies that
   the community agrees on this new API. We need to check that it will
   be faster in a pipeline model (I think this is obvious) but also that
   it does not penalize the run-to-completion model: introducing another
   function dev_prep_tx() can result in duplicated tests in the driver
   (ex: test the offload flag values).
- postpone dev_prep_tx() or similar to next version and push the current
   TSO patchset (including the comments done on the list). It does not
   modify the current offload API, it provides the TSO feature on ixgbe
   based on a similar API concept (set the TCP phdr cksum). The drawback
   is a potential performance loss when using a pipeline model.
- another option that you may prefer is to bind the API behavior to
   ixgbe (for 1.8): we can ask the application to set the pseudo-header
   checksum without the IP len when doing TSO, as required by the ixgbe
   driver. Then, for next release, we can think about dev_prep_tx(). The
   drawback of this solution is that we may go back on this choice if the
   dev_prep_tx() approach is not validated by the community.
Regards,
Olivier
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-12 13:05                   ` Olivier MATZ
@ 2014-11-12 13:40                     ` Thomas Monjalon
  2014-11-12 23:14                       ` Ananyev, Konstantin
  2014-11-12 14:39                     ` Ananyev, Konstantin
  1 sibling, 1 reply; 47+ messages in thread
From: Thomas Monjalon @ 2014-11-12 13:40 UTC (permalink / raw)
  To: Olivier MATZ, Ananyev, Konstantin, dev
2014-11-12 14:05, Olivier MATZ:
> On 11/12/2014 10:55 AM, Ananyev, Konstantin wrote:
> >>  From an API perspective, it looks a bit more complex to have to call
> >> dev_prep_tx() before sending the packets if they have been flagged
> >> for offload processing. But I admit I have no other argument. I'll be
> >> happy to have more comments from other people on the list.
> >>
> >> I'm sending a first version of the patchset now as it's ready, it does
> >> not take in account this comment, but I'm open to add it in a v2 if
> >> there is a consensus on this.
> >>
> >> Now, knowing that:
> >> - adding dev_prep_tx() will also concern hw checksum (TCP L4 checksum
> >>    already requires to set the TCP pseudo header checksum), so adding
> >>    this will change the API of an existing feature
> >> - TSO is a new feature expected for 1.8 (which should be out soon)
> >>
> >> Do you think we need to include this for 1.8 or can we postpone your
> >> proposition for after the 1.8 release?
> >
> > I'd say it would be good to have it done together with TSO feature.
> > About changing API: I think existing applications shouldn't be affected.
> > For existing PMDs/TX offloads we don't change  any rules what need to be filled by the app.
> > We just add a new function that can do that for user.
> > If the app fills required manually (as all apps have to do now) it would keep working as expected.
> 
> I agree, this proposition could work without changing the current
> applications.
> 
> > If you feel like it is too much work for 1.8 timeframe -
> > can we at least move fix_tcp_phdr_cksum() out of TX PMD as a temporary measure?
> > Let say create a function  get_ipv4_udptcp_checksum(struct rte_mbuf *m) (in librte_net ?).
> > It will calculate PSD checksum for both TSO and non-TSO case based on given mbuf flags/fields.
> > Then we can update testpmd/csumonly.c to use it.
> 
> I'm not sure having get_ipv4_udptcp_checksum() in librte_net would
> help. The value we have to set in the TCP checksum field depends on the
> PMD (altought only ixgbe is supported now). So, it would require
> another parameter <portid> and a new PMD eth_ops... which looks very
> similar to dev_prep_tx() (except that dev_prep_tx() can be bulked).
> I think a stack will not be able to call get_udptcp_checksum(m ,port)
> because it does not know the physical port at the time the packet is
> built. Moreover, calling a function through a pointer is more efficient
> when bulked. So I think the dev_prep_tx() you initially describe is
> a better answer to the problem.
> 
> I don't know what is the exact timeframe for 1.8, maybe Thomas can help
> on this? Depending on it, we have several options:
> 
> - implement dev_prep_tx() for 1.8 in the TSO series: this implies that
>    the community agrees on this new API. We need to check that it will
>    be faster in a pipeline model (I think this is obvious) but also that
>    it does not penalize the run-to-completion model: introducing another
>    function dev_prep_tx() can result in duplicated tests in the driver
>    (ex: test the offload flag values).
> 
> - postpone dev_prep_tx() or similar to next version and push the current
>    TSO patchset (including the comments done on the list). It does not
>    modify the current offload API, it provides the TSO feature on ixgbe
>    based on a similar API concept (set the TCP phdr cksum). The drawback
>    is a potential performance loss when using a pipeline model.
> 
> - another option that you may prefer is to bind the API behavior to
>    ixgbe (for 1.8): we can ask the application to set the pseudo-header
>    checksum without the IP len when doing TSO, as required by the ixgbe
>    driver. Then, for next release, we can think about dev_prep_tx(). The
>    drawback of this solution is that we may go back on this choice if the
>    dev_prep_tx() approach is not validated by the community.
I feel this question is really important and we need more people to review
the API. We'll also need more validation tests and performance checks with
several use cases.
Release is already late and I'm not comfortable with such change now.
The only chance to have dev_prep_tx() in 1.8 would be to quickly have a large
consensus and some benchmarks in pipeline and run to completion models.
Conclusion: we should integrate TSO without dev_prep_tx (option 2 or 3) and
then speed up dev & tests for dev_prep_tx(). This improvement for pipeline
model could go in 2.0 if it's considered too short or risky for 1.8.
Konstantin, could you be in charge of dev_prep_tx() works?
Thanks for the good discussion
-- 
Thomas
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-12 13:40                     ` Thomas Monjalon
@ 2014-11-12 23:14                       ` Ananyev, Konstantin
  0 siblings, 0 replies; 47+ messages in thread
From: Ananyev, Konstantin @ 2014-11-12 23:14 UTC (permalink / raw)
  To: Thomas Monjalon, Olivier MATZ, dev
Hi Thomas,
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Wednesday, November 12, 2014 1:41 PM
> To: Olivier MATZ; Ananyev, Konstantin; dev@dpdk.org
> Cc: Yong Wang
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
> 
> 2014-11-12 14:05, Olivier MATZ:
> > On 11/12/2014 10:55 AM, Ananyev, Konstantin wrote:
> > >>  From an API perspective, it looks a bit more complex to have to call
> > >> dev_prep_tx() before sending the packets if they have been flagged
> > >> for offload processing. But I admit I have no other argument. I'll be
> > >> happy to have more comments from other people on the list.
> > >>
> > >> I'm sending a first version of the patchset now as it's ready, it does
> > >> not take in account this comment, but I'm open to add it in a v2 if
> > >> there is a consensus on this.
> > >>
> > >> Now, knowing that:
> > >> - adding dev_prep_tx() will also concern hw checksum (TCP L4 checksum
> > >>    already requires to set the TCP pseudo header checksum), so adding
> > >>    this will change the API of an existing feature
> > >> - TSO is a new feature expected for 1.8 (which should be out soon)
> > >>
> > >> Do you think we need to include this for 1.8 or can we postpone your
> > >> proposition for after the 1.8 release?
> > >
> > > I'd say it would be good to have it done together with TSO feature.
> > > About changing API: I think existing applications shouldn't be affected.
> > > For existing PMDs/TX offloads we don't change  any rules what need to be filled by the app.
> > > We just add a new function that can do that for user.
> > > If the app fills required manually (as all apps have to do now) it would keep working as expected.
> >
> > I agree, this proposition could work without changing the current
> > applications.
> >
> > > If you feel like it is too much work for 1.8 timeframe -
> > > can we at least move fix_tcp_phdr_cksum() out of TX PMD as a temporary measure?
> > > Let say create a function  get_ipv4_udptcp_checksum(struct rte_mbuf *m) (in librte_net ?).
> > > It will calculate PSD checksum for both TSO and non-TSO case based on given mbuf flags/fields.
> > > Then we can update testpmd/csumonly.c to use it.
> >
> > I'm not sure having get_ipv4_udptcp_checksum() in librte_net would
> > help. The value we have to set in the TCP checksum field depends on the
> > PMD (altought only ixgbe is supported now). So, it would require
> > another parameter <portid> and a new PMD eth_ops... which looks very
> > similar to dev_prep_tx() (except that dev_prep_tx() can be bulked).
> > I think a stack will not be able to call get_udptcp_checksum(m ,port)
> > because it does not know the physical port at the time the packet is
> > built. Moreover, calling a function through a pointer is more efficient
> > when bulked. So I think the dev_prep_tx() you initially describe is
> > a better answer to the problem.
> >
> > I don't know what is the exact timeframe for 1.8, maybe Thomas can help
> > on this? Depending on it, we have several options:
> >
> > - implement dev_prep_tx() for 1.8 in the TSO series: this implies that
> >    the community agrees on this new API. We need to check that it will
> >    be faster in a pipeline model (I think this is obvious) but also that
> >    it does not penalize the run-to-completion model: introducing another
> >    function dev_prep_tx() can result in duplicated tests in the driver
> >    (ex: test the offload flag values).
> >
> > - postpone dev_prep_tx() or similar to next version and push the current
> >    TSO patchset (including the comments done on the list). It does not
> >    modify the current offload API, it provides the TSO feature on ixgbe
> >    based on a similar API concept (set the TCP phdr cksum). The drawback
> >    is a potential performance loss when using a pipeline model.
> >
> > - another option that you may prefer is to bind the API behavior to
> >    ixgbe (for 1.8): we can ask the application to set the pseudo-header
> >    checksum without the IP len when doing TSO, as required by the ixgbe
> >    driver. Then, for next release, we can think about dev_prep_tx(). The
> >    drawback of this solution is that we may go back on this choice if the
> >    dev_prep_tx() approach is not validated by the community.
> 
> I feel this question is really important and we need more people to review
> the API. We'll also need more validation tests and performance checks with
> several use cases.
> 
> Release is already late and I'm not comfortable with such change now.
> The only chance to have dev_prep_tx() in 1.8 would be to quickly have a large
> consensus and some benchmarks in pipeline and run to completion models.
> 
> Conclusion: we should integrate TSO without dev_prep_tx (option 2 or 3) and
> then speed up dev & tests for dev_prep_tx(). This improvement for pipeline
> model could go in 2.0 if it's considered too short or risky for 1.8.
> Konstantin, could you be in charge of dev_prep_tx() works?
I can have a look at it in 2.0 timeframe.
Unless someone else is interested in doing it before that :)
Konstantin
> 
> Thanks for the good discussion
> --
> Thomas
^ permalink raw reply	[flat|nested] 47+ messages in thread 
 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-12 13:05                   ` Olivier MATZ
  2014-11-12 13:40                     ` Thomas Monjalon
@ 2014-11-12 14:39                     ` Ananyev, Konstantin
  2014-11-12 14:56                       ` Olivier MATZ
  1 sibling, 1 reply; 47+ messages in thread
From: Ananyev, Konstantin @ 2014-11-12 14:39 UTC (permalink / raw)
  To: Olivier MATZ, Yong Wang, Liu, Jijiang; +Cc: dev
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Wednesday, November 12, 2014 1:06 PM
> To: Ananyev, Konstantin; Yong Wang; Liu, Jijiang
> Cc: dev@dpdk.org; Thomas Monjalon
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
> 
> Hi Konstantin,
> 
> On 11/12/2014 10:55 AM, Ananyev, Konstantin wrote:
> >>  From an API perspective, it looks a bit more complex to have to call
> >> dev_prep_tx() before sending the packets if they have been flagged
> >> for offload processing. But I admit I have no other argument. I'll be
> >> happy to have more comments from other people on the list.
> >>
> >> I'm sending a first version of the patchset now as it's ready, it does
> >> not take in account this comment, but I'm open to add it in a v2 if
> >> there is a consensus on this.
> >>
> >> Now, knowing that:
> >> - adding dev_prep_tx() will also concern hw checksum (TCP L4 checksum
> >>    already requires to set the TCP pseudo header checksum), so adding
> >>    this will change the API of an existing feature
> >> - TSO is a new feature expected for 1.8 (which should be out soon)
> >>
> >> Do you think we need to include this for 1.8 or can we postpone your
> >> proposition for after the 1.8 release?
> >
> > I'd say it would be good to have it done together with TSO feature.
> > About changing API: I think existing applications shouldn't be affected.
> > For existing PMDs/TX offloads we don't change  any rules what need to be filled by the app.
> > We just add a new function that can do that for user.
> > If the app fills required manually (as all apps have to do now) it would keep working as expected.
> 
> I agree, this proposition could work without changing the current
> applications.
> 
> > If you feel like it is too much work for 1.8 timeframe -
> > can we at least move fix_tcp_phdr_cksum() out of TX PMD as a temporary measure?
> > Let say create a function  get_ipv4_udptcp_checksum(struct rte_mbuf *m) (in librte_net ?).
> > It will calculate PSD checksum for both TSO and non-TSO case based on given mbuf flags/fields.
> > Then we can update testpmd/csumonly.c to use it.
> 
> I'm not sure having get_ipv4_udptcp_checksum() in librte_net would
> help. The value we have to set in the TCP checksum field depends on the
> PMD (altought only ixgbe is supported now). So, it would require
> another parameter <portid> and a new PMD eth_ops... which looks very
> similar to dev_prep_tx() (except that dev_prep_tx() can be bulked).
> I think a stack will not be able to call get_udptcp_checksum(m ,port)
> because it does not know the physical port at the time the packet is
> built. Moreover, calling a function through a pointer is more efficient
> when bulked. So I think the dev_prep_tx() you initially describe is
> a better answer to the problem.
Yes I understand that it might not be applicable for non-Intel NICs.
Though I thought it is ok as a temporary measure as right now we
support these offloads for Intel NICs only.
Basically my thought was what you proposed as option 3 below.
Why common function in librte_net?
So people don't need to write their own each time.
Plus as I remember all 3 Intel NIC types (ixgbe/igb/i40e) we support have similar 
requirements for what need to be set/calculated for these TX offloads.
So my thought was that having a common function might help to avoid code duplication in future,
If/when will implement dev_prep_tx(). 
> 
> I don't know what is the exact timeframe for 1.8, maybe Thomas can help
> on this? Depending on it, we have several options:
> 
> - implement dev_prep_tx() for 1.8 in the TSO series: this implies that
>    the community agrees on this new API. We need to check that it will
>    be faster in a pipeline model (I think this is obvious) but also that
>    it does not penalize the run-to-completion model: introducing another
>    function dev_prep_tx() can result in duplicated tests in the driver
>    (ex: test the offload flag values).
> 
> - postpone dev_prep_tx() or similar to next version and push the current
>    TSO patchset (including the comments done on the list). It does not
>    modify the current offload API, it provides the TSO feature on ixgbe
>    based on a similar API concept (set the TCP phdr cksum). The drawback
>    is a potential performance loss when using a pipeline model.
> 
> - another option that you may prefer is to bind the API behavior to
>    ixgbe (for 1.8): we can ask the application to set the pseudo-header
>    checksum without the IP len when doing TSO, as required by the ixgbe
>    driver. Then, for next release, we can think about dev_prep_tx(). The
>    drawback of this solution is that we may go back on this choice if the
>    dev_prep_tx() approach is not validated by the community.
My vote would be for option 3 then.
Thanks
Konstantin
> 
> 
> Regards,
> Olivier
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-12 14:39                     ` Ananyev, Konstantin
@ 2014-11-12 14:56                       ` Olivier MATZ
  0 siblings, 0 replies; 47+ messages in thread
From: Olivier MATZ @ 2014-11-12 14:56 UTC (permalink / raw)
  To: Ananyev, Konstantin, Yong Wang, Liu, Jijiang; +Cc: dev
Hi Konstantin,
On 11/12/2014 03:39 PM, Ananyev, Konstantin wrote:
>> I'm not sure having get_ipv4_udptcp_checksum() in librte_net would
>> help. The value we have to set in the TCP checksum field depends on the
>> PMD (altought only ixgbe is supported now). So, it would require
>> another parameter <portid> and a new PMD eth_ops... which looks very
>> similar to dev_prep_tx() (except that dev_prep_tx() can be bulked).
>> I think a stack will not be able to call get_udptcp_checksum(m ,port)
>> because it does not know the physical port at the time the packet is
>> built. Moreover, calling a function through a pointer is more efficient
>> when bulked. So I think the dev_prep_tx() you initially describe is
>> a better answer to the problem.
>
> Yes I understand that it might not be applicable for non-Intel NICs.
> Though I thought it is ok as a temporary measure as right now we
> support these offloads for Intel NICs only.
> Basically my thought was what you proposed as option 3 below.
> Why common function in librte_net?
> So people don't need to write their own each time.
> Plus as I remember all 3 Intel NIC types (ixgbe/igb/i40e) we support have similar
> requirements for what need to be set/calculated for these TX offloads.
> So my thought was that having a common function might help to avoid code duplication in future,
> If/when will implement dev_prep_tx().
OK, now I understand better what you suggest. I'll try to implement
the option 3 (below) with a common checksum function in librte_net
in the next version of the TSO patchset.
Regards,
Olivier
>
>>
>> I don't know what is the exact timeframe for 1.8, maybe Thomas can help
>> on this? Depending on it, we have several options:
>>
>> - implement dev_prep_tx() for 1.8 in the TSO series: this implies that
>>     the community agrees on this new API. We need to check that it will
>>     be faster in a pipeline model (I think this is obvious) but also that
>>     it does not penalize the run-to-completion model: introducing another
>>     function dev_prep_tx() can result in duplicated tests in the driver
>>     (ex: test the offload flag values).
>>
>> - postpone dev_prep_tx() or similar to next version and push the current
>>     TSO patchset (including the comments done on the list). It does not
>>     modify the current offload API, it provides the TSO feature on ixgbe
>>     based on a similar API concept (set the TCP phdr cksum). The drawback
>>     is a potential performance loss when using a pipeline model.
>>
>> - another option that you may prefer is to bind the API behavior to
>>     ixgbe (for 1.8): we can ask the application to set the pseudo-header
>>     checksum without the IP len when doing TSO, as required by the ixgbe
>>     driver. Then, for next release, we can think about dev_prep_tx(). The
>>     drawback of this solution is that we may go back on this choice if the
>>     dev_prep_tx() approach is not validated by the community.
^ permalink raw reply	[flat|nested] 47+ messages in thread 
 
 
 
 
 
- [parent not found: <D0868B54.24DBB%yongwang@vmware.com>] 
- * [dpdk-dev] FW: [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
       [not found]             ` <D0868B54.24DBB%yongwang@vmware.com>
@ 2014-11-11  0:07               ` Yong Wang
  0 siblings, 0 replies; 47+ messages in thread
From: Yong Wang @ 2014-11-11  0:07 UTC (permalink / raw)
  To: Olivier MATZ; +Cc: dev
On 11/7/14, 9:16 AM, "Olivier MATZ" <olivier.matz@6wind.com> wrote:
>Hello Yong,
>
>On 11/07/2014 01:43 AM, Yong Wang wrote:
>>>> As to HW TX checksum offload, do you have special requirement for
>>>>implementing TSO?
>>
>>> Yes. TSO implies TX TCP and IP checksum offload.
>>
>> Is this a general requirement or something specific to ixgbe/i40e? FWIW,
>> vmxnet3 device does not support tx IP checksum offload but doe support
>> TSO.  In that case, we cannot leave IP checksum field as 0 (the correct
>> checksum needs to be filled in the header) before passing it the the NIC
>> when TSO is enabled.
>
>This is a good question because we need to define the proper API that
>will work on other PMDs in the future.
>
>Indeed, there is a hardware specificity in ixgbe: when TSO is enabled,
>the IP checksum flag must also be passed to the driver if it's IPv4.
>From 82599 datasheets (7.2.3.2.4 Advanced Transmit Data Descriptor):
>
>    IXSM (bit 0) ‹ Insert IP Checksum: This field indicates that IP
>    checksum must be inserted. In IPv6 mode, it must be reset to 0b.
>    If DCMD.TSE and TUCMD.IPV4 are set, IXSM must be set as well.
>    If this bit is set, the packet should at least contain an
>    IP header.
>
>If we allow the user to give the TSO flag without the IP checksum
>flag in mbuf flags, the ixgbe driver would have to set the IP checksum
>flag in hardware descriptors if the packet is IPv4. The driver would
>have to parse the IP header: this is not a problem as we already need
>it for TCP checksum.
>
>To summarize, I think we have 3 options when transmitting a packet to be
>segmented using TSO:
>
>- set IP checksum to 0 in the application: in this case, it would
>  require additional work in virtual drivers if the peer expects
>  to receive a packet with a valid IP checksum. But I'm wondering
>  what is the need for calculating a checksum when transmitting on
>  a virtual device (the peer receiving the packet knows that the
>  packet is not corrupted as it comes from memory). Moreover, if the
>  device advertise TSO, I assume it can also advertise IP checksum
>  offload.
Checksum is still needed if the packet has to be transmitted over the wire.
The device is capable of IP checksum but for various reasons, it is
designed to only support TSO and TCP/UDP checksum. So I guess we still
have to deal with this discrepancy.
 
>
>- calculate the IP checksum in the application. It would take additional
>  cycles although it may not be needed as the driver probably knows
>  how to calculate it.
>
>- if the driver supports both TSO and IP checksum, the 2 flags MUST
>  be given to the driver and the IP checksum must be set to 0 and the
>  checksum cannot be calculated in software. If the driver only
>  supports TSO, the checksum has to be calculated in software.
>
>Currently, I choosen the first solution, but I'm open to change the
>design. Maybe the 3rd one is also a good solution.
I think option (3) is cleaner and can accommodate device differences
without requiring a new API.  But I don’t really have a strong preference
here and I am fine with option (1) or a new API (dev_prep_tx()) as long as
the assumptions/requirements are clearly documented.
Thanks,
Yong
>
>By the way, we had the same kind of discussion with Konstantin [1]
>about what to do with the TCP checksum. My feeling is that setting it
>to the pseudo-header checksum is the best we can do:
> - linux does that
> - many hardware requires that (this is not the case for ixgbe, which
>   need a pshdr checksum without the IP len)
> - it can be reused if received by a virtual device and sent to a
>   physical device supporting TSO
>
>Best regards,
>Olivier
>
>
>[1]
>https://urldefense.proofpoint.com/v2/url?u=http-3A__dpdk.org_ml_archives_d
>ev_2014-2DMay_002766.html&d=AAID-g&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNt
>Xt-uEs&r=44mSO5N5yEs4CeCdtQE0xt0F7J0p67_mApYVAzyYms0&m=Sb_uMbXc4QNWb6fbk2n
>yDga1IfEZQeJUbx731-gSHU4&s=p3oIaLnY_38j2i4oxMGmtBAoQsQbeko01aEUojzSnIo&e=
^ permalink raw reply	[flat|nested] 47+ messages in thread
 
 
 
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-05 10:28       ` Olivier MATZ
  2014-11-06 11:24         ` Liu, Jijiang
  2014-11-07  0:43         ` Yong Wang
@ 2014-11-10  6:03         ` Liu, Jijiang
  2014-11-10 16:17           ` Olivier MATZ
  2 siblings, 1 reply; 47+ messages in thread
From: Liu, Jijiang @ 2014-11-10  6:03 UTC (permalink / raw)
  To: Olivier MATZ; +Cc: dev
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Wednesday, November 5, 2014 6:28 PM
> To: Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum
> offload
> 
> Hi Jijiang,
> 
> Thank you for your answer. Please find some comments below.
> 
> 
> Another thing is surprising me.
> 
> - if PKT_TX_VXLAN_CKSUM is not set (legacy use case), then the
>    driver use l2_len and l3_len to offload inner IP/UDP/TCP checksums.
If the flag is not set, and imply that it is not VXLAN packet, 
 and do TX checksum offload as regular packet.
> - if PKT_TX_VXLAN_CKSUM is set, then the driver has to use
>    inner_l{23}_len instead of l{23}_len for the same operation.
Your understanding is not fully correct.
The l{23}_len is still used for TX checksum offload, please refer to i40e_txd_enable_checksum()  implementation.
> Adding PKT_TX_VXLAN_CKSUM changes the semantic of l2_len and l3_len.
> To fix this, I suggest to remove the new fields inner_l{23}_len then add
> outer_l{23}_len instead. Therefore, the semantic of l2_len and l3_len would not
> change, and a driver would always use the same field for a specific offload.
Oh...
> For my TSO development, I will follow the current semantic.
For TSO, you still can use l{2,3} _len .
When I develop tunneling TSO, I will use inner_l3_len/inner_l4_len.
> 
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload
  2014-11-10  6:03         ` [dpdk-dev] " Liu, Jijiang
@ 2014-11-10 16:17           ` Olivier MATZ
       [not found]             ` <1ED644BD7E0A5F4091CF203DAFB8E4CC01D8F7A7@SHSMSX101.ccr.corp.intel.com>
  0 siblings, 1 reply; 47+ messages in thread
From: Olivier MATZ @ 2014-11-10 16:17 UTC (permalink / raw)
  To: Liu, Jijiang; +Cc: dev
Hi Jijiang,
On 11/10/2014 07:03 AM, Liu, Jijiang wrote:
>> Another thing is surprising me.
>>
>> - if PKT_TX_VXLAN_CKSUM is not set (legacy use case), then the
>>    driver use l2_len and l3_len to offload inner IP/UDP/TCP checksums.
> If the flag is not set, and imply that it is not VXLAN packet, 
>  and do TX checksum offload as regular packet.
> 
>> - if PKT_TX_VXLAN_CKSUM is set, then the driver has to use
>>    inner_l{23}_len instead of l{23}_len for the same operation.
> Your understanding is not fully correct.
> The l{23}_len is still used for TX checksum offload, please refer to i40e_txd_enable_checksum()  implementation.
This fields are part of public mbuf API. You cannot say to refer to
i40e PMD code to understand how to use it.
>> Adding PKT_TX_VXLAN_CKSUM changes the semantic of l2_len and l3_len.
>> To fix this, I suggest to remove the new fields inner_l{23}_len then add
>> outer_l{23}_len instead. Therefore, the semantic of l2_len and l3_len would not
>> change, and a driver would always use the same field for a specific offload.
> Oh...
Does it mean you agree?
>> For my TSO development, I will follow the current semantic.
> For TSO, you still can use l{2,3} _len .
> When I develop tunneling TSO, I will use inner_l3_len/inner_l4_len.
I've just submitted a first version, please feel free to comment it.
Regards,
Olivier
^ permalink raw reply	[flat|nested] 47+ messages in thread
 
 
 
 
 
- * Re: [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
                   ` (9 preceding siblings ...)
  2014-10-27  2:13 ` [dpdk-dev] [PATCH v8 10/10] app/testpmd:test " Jijiang Liu
@ 2014-10-27  2:20 ` Liu, Yong
  2014-10-27  2:41 ` Zhang, Helin
  11 siblings, 0 replies; 47+ messages in thread
From: Liu, Yong @ 2014-10-27  2:20 UTC (permalink / raw)
  To: Liu, Jijiang, dev
Tested-by: Yong Liu <yong.liu@intel.com>
- Tested Commit: 455d09e54b92a4626e178b020fe9c23e43ede3f7
- OS: Fedora20 3.15.8-200.fc20.x86_64
- GCC: gcc version 4.8.3 20140624
- CPU: Intel(R) Xeon(R) CPU E5-2680 v2 @ 2.80GHz
- NIC: Intel Corporation Ethernet Controller XL710 for 40GbE QSFP+ [8086:1583]
- Default x86_64-native-linuxapp-gcc configuration
- Total 6 cases, 6 passed, 0 failed
- Case: vxlan_ipv4_detect
  Description: Check testpmd can receive and detect vxlan packet 
  Command / instruction:
	Start testpmd with vxlan enabled and rss disabled
		testpmd -c ffff -n 4 -- -i --tunnel-type=1 --disable-rss --rxq=4 --txq=4 --nb-cores=8 --nb-ports=2
	Enable VxLAN on both ports and UDP dport setting to 4789
		testpmd>rx_vxlan_port add 4789 0
		testpmd>rx_vxlan_port add 4789 1
	Set forward type to rxonly and enable detail log output
		testpmd>set fwd rxonly
		testpmd>set verbose 1
		testpmd>start
	Send packets with udp/tcp/sctp inner L4 data
  Expected test result:
	testpmd can receive the vxlan packet with different inner L4 data and detect whether the packet is vxlan packet
- Case: vxlan_ipv6_detect
  Description: Check testpmd can receive and detect ipv6 vxlan packet
  Command / instruction:
	Start testpmd with vxlan enabled and rss disabled
		testpmd -c ffff -n 4 -- -i --tunnel-type=1 --disable-rss --rxq=4 --txq=4 --nb-cores=8 --nb-ports=2
	Enable VxLAN on both ports and UDP dport setting to 4789
		testpmd>rx_vxlan_port add 4789 0
		testpmd>rx_vxlan_port add 4789 1
	Set forward type to rxonly and enable detail log output
		testpmd>set fwd rxonly
		testpmd>set verbose 1
		testpmd>start
	Send vxlan packets with outer IPv6 header and inner IPv6 header.
  Expected test result:
	testpmd can receive the vxlan packet with different inner L4 data and detect whether the packet is IPv6 vxlan packet
- Case: vxlan_ipv4_checksum_offload
  Description: Check testpmd can offload vxlan checksum and forward the packet
  Command / instruction:
	Start testpmd with vxlan enabled and rss disabled.
		testpmd -c ffff -n 4 -- -i --tunnel-type=1 --disable-rss --rxq=4 --txq=4 --nb-cores=8 --nb-ports=2
	Enable VxLAN on both ports and UDP dport setting to 4789
		testpmd>rx_vxlan_port add 4789 0
		testpmd>rx_vxlan_port add 4789 1
	Set csum packet forwarding mode and enable verbose log.
		testpmd>set fwd csum
		testpmd>set verbose 1
		testpmd>start
	Enable outer IP,UDP,TCP,SCTP and inner IP,UDP checksum offload when inner L4 protocal is UDP.
		testpmd>tx_checksum set 0 0xf3
	Enable outer IP,UDP,TCP,SCTP and inner IP,TCP,SCTP checksum offload when inner L4 protocal is TCP or SCTP.
		testpmd>tx_checksum set 0 0xfd
	Send ipv4 vxlan packet with invalid outer/inner l3 or l4 checksum.	
  Expected test result:
	testpmd can forwarded vxlan packet and the checksum is corrected. The chksum error counter also increased.
- Case: vxlan_ipv6_checksum_offload
  Description: Check testpmd can offload ipv6 vxlan checksum and forward the packet 
  Command / instruction:
	Start testpmd with vxlan enabled and rss disabled.
		testpmd -c ffff -n 4 -- -i --tunnel-type=1 --disable-rss --rxq=4 --txq=4 --nb-cores=8 --nb-ports=2
	Enable VxLAN on both ports and UDP dport setting to 4789
		testpmd>rx_vxlan_port add 4789 0
		testpmd>rx_vxlan_port add 4789 1
	Set csum packet forwarding mode and enable verbose log.
		testpmd>set fwd csum
		testpmd>set verbose 1
		testpmd>start
	Enable outer IP,UDP,TCP,SCTP and inner IP,UDP checksum offload when inner L4 protocal is UDP.
		testpmd>tx_checksum set 0 0xf3
	Enable outer IP,UDP,TCP,SCTP and inner IP,TCP,SCTP checksum offload when inner L4 protocal is TCP or SCTP.
		testpmd>tx_checksum set 0 0xfd
	Send ipv6 vxlan packet with invalid outer/inner l3 or l4 checksum.
  Expected test result:
	testpmd can forwarded vxlan packet and the checksum is corrected. The chksum error counter also increased.
- Case: tunnel_filter
  Description: Check FVL vxlan tunnel filter function work with testpmd.
  Command / instruction:
	Start testpmd with vxlan enabled and rss disabled.
		testpmd -c ffff -n 4 -- -i --tunnel-type=1 --disable-rss --rxq=4 --txq=4 --nb-cores=8 --nb-ports=2
	Enable VxLAN on both ports and UDP dport setting to 4789
		testpmd>rx_vxlan_port add 4789 0
		testpmd>rx_vxlan_port add 4
	Set rxonly forwarding mode and enable verbose log.
		testpmd>set fwd rxonly
		testpmd>set verbose 1
		testpmd>start
	Add cloud filter with specified inner dst mac and inner vlan.
		testpmd>tunnel_filter add 0 11:22:33:44:55:66 00:00:20:00:00:01 192.168.2.2 1 vxlan imac-ivlan 1 3
	Add cloud filter with specified inner dst mac,inner vlan and tunnel id. 
		testpmd>tunnel_filter add 0 11:22:33:44:55:66 00:00:20:00:00:01 192.168.2.2 1 vxlan imac-ivlan-tenid 1 3
	Add cloud filter with specified inner dst mac and tunnel id.
		testpmd>tunnel_filter add 0 11:22:33:44:55:66 00:00:20:00:00:01 192.168.2.2 1 vxlan imac-tenid 1 3
	Add cloud filter with specified inner dst mac.
		testpmd>tunnel_filter add 0 11:22:33:44:55:66 00:00:20:00:00:01 192.168.2.2 1 vxlan imac 1 3
	Add cloud filter with specified outer dst mac, inner dst mac and tunnel id.
		testpmd>tunnel_filter add 0 11:22:33:44:55:66 00:00:20:00:00:01 192.168.2.2 1 vxlan omac-imac-tenid 1 3
	Send vxlan packet matched the specified fields.
  Expected test result:
	testpmd can received the  vxlan packet on the assigned queue 3.
- Case: tunnel_filter_invalid
  Description: Check FVL vxlan tunnel  filter invalid command cannot accepted by testpmd 
  Command / instruction:
	Start testpmd with vxlan enabled and rss disabled.
		testpmd -c ffff -n 4 -- -i --tunnel-type=1 --disble-rss --rxq=4 --txq=4 --nb-cores=8 --nb-ports=2
	Enable VxLAN on both ports and UDP dport setting to 4789
		testpmd>rx_vxlan_port add 4789 0
		testpmd>rx_vxlan_port add 4
	Add Clould filter with invalid Mac address.
	Add Clould filter with invalid ip address.
	Add Clould filter with invalid vlan.
	Add Clould filter with invalid vni.
	Add Clould filter with invalid queue id.
  Expected test result: testpmd will report the parameter is invalid.
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jijiang Liu
> Sent: Monday, October 27, 2014 10:13 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville
> 
> The patch set supports VxLAN on Fortville based on latest rte_mbuf
> structure.
> 
> It includes:
>  - Support VxLAN packet identification by configuring UDP tunneling port.
>  - Support VxLAN packet filters. It uses MAC and VLAN to point
>    to a queue. The filter types supported are listed below:
>    1. Inner MAC and Inner VLAN ID
>    2. Inner MAC address, inner VLAN ID and tenant ID.
>    3. Inner MAC and tenant ID
>    4. Inner MAC address
>    5. Outer MAC address, tenant ID and inner MAC
>  - Support VxLAN TX checksum offload, which include outer L3(IP), inner L3(IP)
> and inner L4(UDP,TCP and SCTP)
> 
> Change notes:
> 
>  v8)  * Fix the issue of redundant "PKT_RX" and the comma missing in the
> pkt_rx_flag_names[] in the rxonly.c file.
> 
> Jijiang Liu (10):
>   change rte_mbuf structures
>   add data structures of UDP tunneling
>   add VxLAN packet identification API in librte_ether
>   support VxLAN packet identification in i40e
>   test VxLAN packet identification in testpmd.
>   add data structures of tunneling filter in rte_eth_ctrl.h
>   implement the API of VxLAN packet filter in i40e
>   test VxLAN packet filter
>   support VxLAN Tx checksum offload in i40e
>   test VxLAN Tx checksum offload
> 
> 
>  app/test-pmd/cmdline.c            |  228 +++++++++++++++++++++++++-
>  app/test-pmd/config.c             |    6 +-
>  app/test-pmd/csumonly.c           |  194 ++++++++++++++++++++--
>  app/test-pmd/rxonly.c             |   50 ++++++-
>  lib/librte_ether/rte_eth_ctrl.h   |   61 +++++++
>  lib/librte_ether/rte_ethdev.c     |   52 ++++++
>  lib/librte_ether/rte_ethdev.h     |   54 ++++++
>  lib/librte_ether/rte_ether.h      |   13 ++
>  lib/librte_mbuf/rte_mbuf.h        |   28 +++-
>  lib/librte_pmd_i40e/i40e_ethdev.c |  331
> ++++++++++++++++++++++++++++++++++++-
>  lib/librte_pmd_i40e/i40e_ethdev.h |    8 +-
>  lib/librte_pmd_i40e/i40e_rxtx.c   |  151 +++++++++++------
>  12 files changed, 1096 insertions(+), 80 deletions(-)
> 
> --
> 1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * Re: [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville
  2014-10-27  2:13 [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Jijiang Liu
                   ` (10 preceding siblings ...)
  2014-10-27  2:20 ` [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville Liu, Yong
@ 2014-10-27  2:41 ` Zhang, Helin
  2014-10-27 13:46   ` Thomas Monjalon
  11 siblings, 1 reply; 47+ messages in thread
From: Zhang, Helin @ 2014-10-27  2:41 UTC (permalink / raw)
  To: Liu, Jijiang, dev
Acked-by: Helin Zhang <helin.zhang@intel.com>
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jijiang Liu
> Sent: Monday, October 27, 2014 10:13 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville
> 
> The patch set supports VxLAN on Fortville based on latest rte_mbuf structure.
> 
> It includes:
>  - Support VxLAN packet identification by configuring UDP tunneling port.
>  - Support VxLAN packet filters. It uses MAC and VLAN to point
>    to a queue. The filter types supported are listed below:
>    1. Inner MAC and Inner VLAN ID
>    2. Inner MAC address, inner VLAN ID and tenant ID.
>    3. Inner MAC and tenant ID
>    4. Inner MAC address
>    5. Outer MAC address, tenant ID and inner MAC
>  - Support VxLAN TX checksum offload, which include outer L3(IP), inner L3(IP)
> and inner L4(UDP,TCP and SCTP)
> 
> Change notes:
> 
>  v8)  * Fix the issue of redundant "PKT_RX" and the comma missing in the
> pkt_rx_flag_names[] in the rxonly.c file.
> 
> Jijiang Liu (10):
>   change rte_mbuf structures
>   add data structures of UDP tunneling
>   add VxLAN packet identification API in librte_ether
>   support VxLAN packet identification in i40e
>   test VxLAN packet identification in testpmd.
>   add data structures of tunneling filter in rte_eth_ctrl.h
>   implement the API of VxLAN packet filter in i40e
>   test VxLAN packet filter
>   support VxLAN Tx checksum offload in i40e
>   test VxLAN Tx checksum offload
> 
> 
>  app/test-pmd/cmdline.c            |  228 +++++++++++++++++++++++++-
>  app/test-pmd/config.c             |    6 +-
>  app/test-pmd/csumonly.c           |  194 ++++++++++++++++++++--
>  app/test-pmd/rxonly.c             |   50 ++++++-
>  lib/librte_ether/rte_eth_ctrl.h   |   61 +++++++
>  lib/librte_ether/rte_ethdev.c     |   52 ++++++
>  lib/librte_ether/rte_ethdev.h     |   54 ++++++
>  lib/librte_ether/rte_ether.h      |   13 ++
>  lib/librte_mbuf/rte_mbuf.h        |   28 +++-
>  lib/librte_pmd_i40e/i40e_ethdev.c |  331
> ++++++++++++++++++++++++++++++++++++-
>  lib/librte_pmd_i40e/i40e_ethdev.h |    8 +-
>  lib/librte_pmd_i40e/i40e_rxtx.c   |  151 +++++++++++------
>  12 files changed, 1096 insertions(+), 80 deletions(-)
> 
> --
> 1.7.7.6
^ permalink raw reply	[flat|nested] 47+ messages in thread
- * Re: [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville
  2014-10-27  2:41 ` Zhang, Helin
@ 2014-10-27 13:46   ` Thomas Monjalon
  2014-10-27 14:34     ` Liu, Jijiang
  0 siblings, 1 reply; 47+ messages in thread
From: Thomas Monjalon @ 2014-10-27 13:46 UTC (permalink / raw)
  To: Liu, Jijiang; +Cc: dev
2014-10-27 02:41, Zhang, Helin:
> > The patch set supports VxLAN on Fortville based on latest rte_mbuf structure.
> > 
> > It includes:
> >  - Support VxLAN packet identification by configuring UDP tunneling port.
> >  - Support VxLAN packet filters. It uses MAC and VLAN to point
> >    to a queue. The filter types supported are listed below:
> >    1. Inner MAC and Inner VLAN ID
> >    2. Inner MAC address, inner VLAN ID and tenant ID.
> >    3. Inner MAC and tenant ID
> >    4. Inner MAC address
> >    5. Outer MAC address, tenant ID and inner MAC
> >  - Support VxLAN TX checksum offload, which include outer L3(IP), inner L3(IP)
> > and inner L4(UDP,TCP and SCTP)
> > 
> > Change notes:
> > 
> >  v8)  * Fix the issue of redundant "PKT_RX" and the comma missing in the
> > pkt_rx_flag_names[] in the rxonly.c file.
> > 
> > Jijiang Liu (10):
> >   change rte_mbuf structures
> >   add data structures of UDP tunneling
> >   add VxLAN packet identification API in librte_ether
> >   support VxLAN packet identification in i40e
> >   test VxLAN packet identification in testpmd.
> >   add data structures of tunneling filter in rte_eth_ctrl.h
> >   implement the API of VxLAN packet filter in i40e
> >   test VxLAN packet filter
> >   support VxLAN Tx checksum offload in i40e
> >   test VxLAN Tx checksum offload
> 
> Acked-by: Helin Zhang <helin.zhang@intel.com>
Applied
I fixed logs which had \n despite recent log rework.
I think there is also a wording error: you are writing VxLAN with x lowercase
but standard is writing it all uppercase: VXLAN. Do you agree?
Thanks
-- 
Thomas
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville
  2014-10-27 13:46   ` Thomas Monjalon
@ 2014-10-27 14:34     ` Liu, Jijiang
  2014-10-27 15:15       ` Thomas Monjalon
  0 siblings, 1 reply; 47+ messages in thread
From: Liu, Jijiang @ 2014-10-27 14:34 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, October 27, 2014 9:46 PM
> To: Liu, Jijiang
> Cc: dev@dpdk.org; Zhang, Helin
> Subject: Re: [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville
> 
> 2014-10-27 02:41, Zhang, Helin:
> > > The patch set supports VxLAN on Fortville based on latest rte_mbuf structure.
> > >
> > > It includes:
> > >  - Support VxLAN packet identification by configuring UDP tunneling port.
> > >  - Support VxLAN packet filters. It uses MAC and VLAN to point
> > >    to a queue. The filter types supported are listed below:
> > >    1. Inner MAC and Inner VLAN ID
> > >    2. Inner MAC address, inner VLAN ID and tenant ID.
> > >    3. Inner MAC and tenant ID
> > >    4. Inner MAC address
> > >    5. Outer MAC address, tenant ID and inner MAC
> > >  - Support VxLAN TX checksum offload, which include outer L3(IP),
> > > inner L3(IP) and inner L4(UDP,TCP and SCTP)
> > >
> > > Change notes:
> > >
> > >  v8)  * Fix the issue of redundant "PKT_RX" and the comma missing in
> > > the pkt_rx_flag_names[] in the rxonly.c file.
> > >
> > > Jijiang Liu (10):
> > >   change rte_mbuf structures
> > >   add data structures of UDP tunneling
> > >   add VxLAN packet identification API in librte_ether
> > >   support VxLAN packet identification in i40e
> > >   test VxLAN packet identification in testpmd.
> > >   add data structures of tunneling filter in rte_eth_ctrl.h
> > >   implement the API of VxLAN packet filter in i40e
> > >   test VxLAN packet filter
> > >   support VxLAN Tx checksum offload in i40e
> > >   test VxLAN Tx checksum offload
> >
> > Acked-by: Helin Zhang <helin.zhang@intel.com>
> 
> Applied
> 
> I fixed logs which had \n despite recent log rework.
> I think there is also a wording error: you are writing VxLAN with x lowercase but
> standard is writing it all uppercase: VXLAN. Do you agree?
Virtual eXtensible Local Area Network (VXLAN)
Agree.
> Thanks
> --
> Thomas
^ permalink raw reply	[flat|nested] 47+ messages in thread 
- * Re: [dpdk-dev] [PATCH v8 00/10] Support VxLAN on Fortville
  2014-10-27 14:34     ` Liu, Jijiang
@ 2014-10-27 15:15       ` Thomas Monjalon
  0 siblings, 0 replies; 47+ messages in thread
From: Thomas Monjalon @ 2014-10-27 15:15 UTC (permalink / raw)
  To: Liu, Jijiang; +Cc: dev
2014-10-27 14:34, Liu, Jijiang:
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > I think there is also a wording error: you are writing VxLAN with x lowercase but
> > standard is writing it all uppercase: VXLAN. Do you agree?
> Virtual eXtensible Local Area Network (VXLAN)
> 
> Agree.
Fixed
-- 
Thomas
^ permalink raw reply	[flat|nested] 47+ messages in thread