DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/octeontx2: add TSO offload support
@ 2019-09-06  8:53 Nithin Dabilpuram
  2019-09-25  4:36 ` [dpdk-dev] [PATCH v2] " Nithin Dabilpuram
  0 siblings, 1 reply; 4+ messages in thread
From: Nithin Dabilpuram @ 2019-09-06  8:53 UTC (permalink / raw)
  To: Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K, John McNamara,
	Marko Kovacevic, Vamsi Attunuru, Pavan Nikhilesh
  Cc: dev

Add support to below TCP segmentation offloads for
96XX A1 onwards and 95xx B0 onwards.
- TCPv4, TCPv6
- VXLAN[v4 | v6][v4 | v6]
- GENEVE[v4 | v6][v4 | v6]

This patch also modifies a fastpath function to be forced
inline due to performance reasons for multi-seg mode.

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
 doc/guides/nics/features/octeontx2.ini     |   1 +
 drivers/common/octeontx2/hw/otx2_nix.h     |  12 ++
 drivers/common/octeontx2/otx2_common.h     |  17 ++
 drivers/event/octeontx2/otx2_evdev.c       |  30 +--
 drivers/event/octeontx2/otx2_evdev.h       |   2 +-
 drivers/event/octeontx2/otx2_worker.c      |   4 +-
 drivers/event/octeontx2/otx2_worker.h      |   2 +
 drivers/event/octeontx2/otx2_worker_dual.c |   4 +-
 drivers/net/octeontx2/otx2_ethdev.c        | 222 ++++++++++++++++++++-
 drivers/net/octeontx2/otx2_ethdev.h        |   4 +
 drivers/net/octeontx2/otx2_tx.c            |  55 ++++--
 drivers/net/octeontx2/otx2_tx.h            | 298 ++++++++++++++++++++++-------
 12 files changed, 545 insertions(+), 106 deletions(-)

diff --git a/doc/guides/nics/features/octeontx2.ini b/doc/guides/nics/features/octeontx2.ini
index 6695232..5b73c8b 100644
--- a/doc/guides/nics/features/octeontx2.ini
+++ b/doc/guides/nics/features/octeontx2.ini
@@ -17,6 +17,7 @@ Fast mbuf free       = Y
 Free Tx mbuf on demand = Y
 Queue start/stop     = Y
 MTU update           = Y
+TSO                  = Y
 Promiscuous mode     = Y
 Allmulticast mode    = Y
 Unicast MAC filter   = Y
diff --git a/drivers/common/octeontx2/hw/otx2_nix.h b/drivers/common/octeontx2/hw/otx2_nix.h
index d5ad988..e3b6850 100644
--- a/drivers/common/octeontx2/hw/otx2_nix.h
+++ b/drivers/common/octeontx2/hw/otx2_nix.h
@@ -1376,4 +1376,16 @@ struct nix_lso_format {
 	uint64_t rsvd_19_63  : 45;
 };
 
+#define NIX_LSO_FIELD_MAX	(8)
+#define NIX_LSO_FIELD_ALG_MASK	GENMASK(18, 16)
+#define NIX_LSO_FIELD_SZ_MASK	GENMASK(13, 12)
+#define NIX_LSO_FIELD_LY_MASK	GENMASK(9, 8)
+#define NIX_LSO_FIELD_OFF_MASK	GENMASK(7, 0)
+
+#define NIX_LSO_FIELD_MASK			\
+		(NIX_LSO_FIELD_OFF_MASK |	\
+		 NIX_LSO_FIELD_LY_MASK |	\
+		 NIX_LSO_FIELD_SZ_MASK |	\
+		 NIX_LSO_FIELD_ALG_MASK)
+
 #endif /* __OTX2_NIX_HW_H__ */
diff --git a/drivers/common/octeontx2/otx2_common.h b/drivers/common/octeontx2/otx2_common.h
index a155462..0d0b87e 100644
--- a/drivers/common/octeontx2/otx2_common.h
+++ b/drivers/common/octeontx2/otx2_common.h
@@ -31,6 +31,23 @@
 #define BIT(nr)     (1UL << (nr))
 #endif
 
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#endif
+#ifndef BITS_PER_LONG_LONG
+#define BITS_PER_LONG_LONG (__SIZEOF_LONG_LONG__ * 8)
+#endif
+
+#ifndef GENMASK
+#define GENMASK(h, l) \
+		(((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+#endif
+#ifndef GENMASK_ULL
+#define GENMASK_ULL(h, l) \
+	(((~0ULL) - (1ULL << (l)) + 1) & \
+	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+#endif
+
 /* Compiler attributes */
 #ifndef __hot
 #define __hot   __attribute__((hot))
diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c
index 160ac94..4d8860f 100644
--- a/drivers/event/octeontx2/otx2_evdev.c
+++ b/drivers/event/octeontx2/otx2_evdev.c
@@ -169,33 +169,35 @@ SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 	};
 
 	/* Tx modes */
-	const event_tx_adapter_enqueue ssogws_tx_adptr_enq[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =  otx2_ssogws_tx_adptr_enq_ ## name,
+	const event_tx_adapter_enqueue ssogws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =  otx2_ssogws_tx_adptr_enq_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 	};
 
 	const event_tx_adapter_enqueue
-		ssogws_tx_adptr_enq_seg[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =  otx2_ssogws_tx_adptr_enq_seg_ ## name,
+		ssogws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =				\
+			otx2_ssogws_tx_adptr_enq_seg_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 	};
 
 	const event_tx_adapter_enqueue
-		ssogws_dual_tx_adptr_enq[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =  otx2_ssogws_dual_tx_adptr_enq_ ## name,
+		ssogws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =				\
+			otx2_ssogws_dual_tx_adptr_enq_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 	};
 
 	const event_tx_adapter_enqueue
-		ssogws_dual_tx_adptr_enq_seg[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =					\
+		ssogws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =				\
 			otx2_ssogws_dual_tx_adptr_enq_seg_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -274,6 +276,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
 		/* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
 		event_dev->txa_enqueue = ssogws_tx_adptr_enq_seg
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
@@ -281,6 +284,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
 	} else {
 		event_dev->txa_enqueue = ssogws_tx_adptr_enq
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
@@ -406,6 +410,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 		if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
 		/* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
 			event_dev->txa_enqueue = ssogws_dual_tx_adptr_enq_seg
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 				[!!(dev->tx_offloads &
 						NIX_TX_OFFLOAD_MBUF_NOFF_F)]
@@ -417,6 +422,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 						NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
 		} else {
 			event_dev->txa_enqueue = ssogws_dual_tx_adptr_enq
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 				[!!(dev->tx_offloads &
 						NIX_TX_OFFLOAD_MBUF_NOFF_F)]
diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
index 5cd80e3b2..530060f 100644
--- a/drivers/event/octeontx2/otx2_evdev.h
+++ b/drivers/event/octeontx2/otx2_evdev.h
@@ -331,7 +331,7 @@ uint16_t otx2_ssogws_dual_deq_seg_timeout_burst_ ##name(void *port,	       \
 SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 #undef R
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				     \
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			     \
 uint16_t otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],\
 					   uint16_t nb_events);		     \
 uint16_t otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port,		     \
diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
index cd14cd3..34d39f4 100644
--- a/drivers/event/octeontx2/otx2_worker.c
+++ b/drivers/event/octeontx2/otx2_worker.c
@@ -267,7 +267,7 @@ otx2_ssogws_enq_fwd_burst(void *port, const struct rte_event ev[],
 	return 1;
 }
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 				  uint16_t nb_events)			\
@@ -281,7 +281,7 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
 				      uint16_t nb_events)		\
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 76f91bb..4e971f2 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -256,6 +256,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
 	struct rte_mbuf *m = ev[0].mbuf;
 	const struct otx2_eth_txq *txq = otx2_ssogws_xtract_meta(m);
 
+	/* Perform header writes before barrier for TSO */
+	otx2_nix_xmit_prepare_tso(m, flags);
 	otx2_ssogws_head_wait(ws, !ev->sched_type);
 	otx2_ssogws_prepare_pkt(txq, m, cmd, flags);
 
diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
index 37c274a..ff68472 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.c
+++ b/drivers/event/octeontx2/otx2_worker_dual.c
@@ -306,7 +306,7 @@ otx2_ssogws_dual_deq_seg_timeout_burst_ ##name(void *port,		\
 SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 #undef R
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 				       struct rte_event ev[],		\
@@ -323,7 +323,7 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,			\
 					   struct rte_event ev[],	\
diff --git a/drivers/net/octeontx2/otx2_ethdev.c b/drivers/net/octeontx2/otx2_ethdev.c
index b84128f..c447f83 100644
--- a/drivers/net/octeontx2/otx2_ethdev.c
+++ b/drivers/net/octeontx2/otx2_ethdev.c
@@ -27,9 +27,14 @@ nix_get_rx_offload_capa(struct otx2_eth_dev *dev)
 static inline uint64_t
 nix_get_tx_offload_capa(struct otx2_eth_dev *dev)
 {
-	RTE_SET_USED(dev);
+	uint64_t capa = NIX_TX_OFFLOAD_CAPA;
 
-	return NIX_TX_OFFLOAD_CAPA;
+	/* TSO not supported for earlier chip revisions */
+	if (otx2_dev_is_96xx_A0(dev) || otx2_dev_is_95xx_Ax(dev))
+		capa &= ~(DEV_TX_OFFLOAD_TCP_TSO |
+			  DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+			  DEV_TX_OFFLOAD_GENEVE_TNL_TSO);
+	return capa;
 }
 
 static const struct otx2_dev_ops otx2_dev_ops = {
@@ -643,6 +648,18 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	if (conf & DEV_TX_OFFLOAD_MULTI_SEGS)
 		flags |= NIX_TX_MULTI_SEG_F;
 
+	/* Enable Inner checksum for TSO */
+	if (conf & DEV_TX_OFFLOAD_TCP_TSO)
+		flags |= (NIX_TX_OFFLOAD_TSO_F |
+			  NIX_TX_OFFLOAD_L3_L4_CSUM_F);
+
+	/* Enable Inner and Outer checksum for Tunnel TSO */
+	if (conf & (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+		    DEV_TX_OFFLOAD_GENEVE_TNL_TSO))
+		flags |= (NIX_TX_OFFLOAD_TSO_F |
+			  NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+			  NIX_TX_OFFLOAD_L3_L4_CSUM_F);
+
 	if ((dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP))
 		flags |= NIX_TX_OFFLOAD_TSTAMP_F;
 
@@ -1205,6 +1222,201 @@ nix_set_nop_rxtx_function(struct rte_eth_dev *eth_dev)
 	rte_mb();
 }
 
+static void
+nix_lso_tcp(struct nix_lso_format_cfg *req, bool v4)
+{
+	volatile struct nix_lso_format *field;
+
+	/* Format works only with TCP packet marked by OL3/OL4 */
+	field = (volatile struct nix_lso_format *)&req->fields[0];
+	req->field_mask = NIX_LSO_FIELD_MASK;
+	/* Outer IPv4/IPv6 */
+	field->layer = NIX_TXLAYER_OL3;
+	field->offset = v4 ? 2 : 4;
+	field->sizem1 = 1; /* 2B */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+	if (v4) {
+		/* IPID field */
+		field->layer = NIX_TXLAYER_OL3;
+		field->offset = 4;
+		field->sizem1 = 1;
+		/* Incremented linearly per segment */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* TCP sequence number update */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 4;
+	field->sizem1 = 3; /* 4 bytes */
+	field->alg = NIX_LSOALG_ADD_OFFSET;
+	field++;
+	/* TCP flags field */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 12;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_TCP_FLAGS;
+	field++;
+}
+
+static void
+nix_lso_udp_tun_tcp(struct nix_lso_format_cfg *req,
+		    bool outer_v4, bool inner_v4)
+{
+	volatile struct nix_lso_format *field;
+
+	field = (volatile struct nix_lso_format *)&req->fields[0];
+	req->field_mask = NIX_LSO_FIELD_MASK;
+	/* Outer IPv4/IPv6 len */
+	field->layer = NIX_TXLAYER_OL3;
+	field->offset = outer_v4 ? 2 : 4;
+	field->sizem1 = 1; /* 2B */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+	if (outer_v4) {
+		/* IPID */
+		field->layer = NIX_TXLAYER_OL3;
+		field->offset = 4;
+		field->sizem1 = 1;
+		/* Incremented linearly per segment */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* Outer UDP length */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 4;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+
+	/* Inner IPv4/IPv6 */
+	field->layer = NIX_TXLAYER_IL3;
+	field->offset = inner_v4 ? 2 : 4;
+	field->sizem1 = 1; /* 2B */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+	if (inner_v4) {
+		/* IPID field */
+		field->layer = NIX_TXLAYER_IL3;
+		field->offset = 4;
+		field->sizem1 = 1;
+		/* Incremented linearly per segment */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* TCP sequence number update */
+	field->layer = NIX_TXLAYER_IL4;
+	field->offset = 4;
+	field->sizem1 = 3; /* 4 bytes */
+	field->alg = NIX_LSOALG_ADD_OFFSET;
+	field++;
+
+	/* TCP flags field */
+	field->layer = NIX_TXLAYER_IL4;
+	field->offset = 12;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_TCP_FLAGS;
+	field++;
+}
+
+static int
+nix_setup_lso_formats(struct otx2_eth_dev *dev)
+{
+	struct otx2_mbox *mbox = dev->mbox;
+	struct nix_lso_format_cfg_rsp *rsp;
+	struct nix_lso_format_cfg *req;
+	uint8_t base;
+	int rc;
+
+	/* Skip if TSO was not requested */
+	if (!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+		return 0;
+	/*
+	 * IPv4/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_tcp(req, true);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	base = rsp->lso_format_idx;
+	if (base != NIX_LSO_FORMAT_IDX_TSOV4)
+		return -EFAULT;
+	dev->lso_base_idx = base;
+	otx2_nix_dbg("tcpv4 lso fmt=%u", base);
+
+
+	/*
+	 * IPv6/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_tcp(req, false);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 1)
+		return -EFAULT;
+	otx2_nix_dbg("tcpv6 lso fmt=%u\n", base + 1);
+
+	/*
+	 * IPv4/UDP/TUN HDR/IPv4/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, true, true);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 2)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v4v4 fmt=%u\n", base + 2);
+
+	/*
+	 * IPv4/UDP/TUN HDR/IPv6/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, true, false);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 3)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v4v6 fmt=%u\n", base + 3);
+
+	/*
+	 * IPv6/UDP/TUN HDR/IPv4/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, false, true);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 4)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v6v4 fmt=%u\n", base + 4);
+
+	/*
+	 * IPv6/UDP/TUN HDR/IPv6/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, false, false);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+	if (rsp->lso_format_idx != base + 5)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v6v6 fmt=%u\n", base + 5);
+
+	return 0;
+}
+
 static int
 otx2_nix_configure(struct rte_eth_dev *eth_dev)
 {
@@ -1292,6 +1504,12 @@ otx2_nix_configure(struct rte_eth_dev *eth_dev)
 		goto fail_offloads;
 	}
 
+	rc = nix_setup_lso_formats(dev);
+	if (rc) {
+		otx2_err("failed to setup nix lso format fields, rc=%d", rc);
+		goto free_nix_lf;
+	}
+
 	/* Configure RSS */
 	rc = otx2_nix_rss_config(eth_dev);
 	if (rc) {
diff --git a/drivers/net/octeontx2/otx2_ethdev.h b/drivers/net/octeontx2/otx2_ethdev.h
index 7b15d6b..633fc40 100644
--- a/drivers/net/octeontx2/otx2_ethdev.h
+++ b/drivers/net/octeontx2/otx2_ethdev.h
@@ -127,6 +127,9 @@
 	DEV_TX_OFFLOAD_TCP_CKSUM	| \
 	DEV_TX_OFFLOAD_UDP_CKSUM	| \
 	DEV_TX_OFFLOAD_SCTP_CKSUM	| \
+	DEV_TX_OFFLOAD_TCP_TSO		| \
+	DEV_TX_OFFLOAD_VXLAN_TNL_TSO    | \
+	DEV_TX_OFFLOAD_GENEVE_TNL_TSO   | \
 	DEV_TX_OFFLOAD_MULTI_SEGS	| \
 	DEV_TX_OFFLOAD_IPV4_CKSUM)
 
@@ -249,6 +252,7 @@ struct otx2_eth_dev {
 	uint8_t tx_chan_cnt;
 	uint8_t lso_tsov4_idx;
 	uint8_t lso_tsov6_idx;
+	uint8_t lso_base_idx;
 	uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
 	uint8_t mkex_pfl_name[MKEX_NAME_LEN];
 	uint8_t max_mac_entries;
diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
index 0dcadff..fa53300 100644
--- a/drivers/net/octeontx2/otx2_tx.c
+++ b/drivers/net/octeontx2/otx2_tx.c
@@ -32,6 +32,12 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	otx2_lmt_mov(cmd, &txq->cmd[0], otx2_nix_tx_ext_subs(flags));
 
+	/* Perform header writes before barrier for TSO */
+	if (flags & NIX_TX_OFFLOAD_TSO_F) {
+		for (i = 0; i < pkts; i++)
+			otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
+	}
+
 	/* Lets commit any changes in the packet */
 	rte_cio_wmb();
 
@@ -62,6 +68,12 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	otx2_lmt_mov(cmd, &txq->cmd[0], otx2_nix_tx_ext_subs(flags));
 
+	/* Perform header writes before barrier for TSO */
+	if (flags & NIX_TX_OFFLOAD_TSO_F) {
+		for (i = 0; i < pkts; i++)
+			otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
+	}
+
 	/* Lets commit any changes in the packet */
 	rte_cio_wmb();
 
@@ -933,26 +945,34 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 #endif
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 static uint16_t __rte_noinline	__hot					\
 otx2_nix_xmit_pkts_ ## name(void *tx_queue,				\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
 {									\
 	uint64_t cmd[sz];						\
 									\
+	/* For TSO inner checksum is a must */				\
+	if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&				\
+	    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))			\
+		return 0;						\
 	return nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, flags);	\
 }
 
 NIX_TX_FASTPATH_MODES
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 static uint16_t __rte_noinline	__hot					\
 otx2_nix_xmit_pkts_mseg_ ## name(void *tx_queue,			\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
 {									\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
 									\
+	/* For TSO inner checksum is a must */				\
+	if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&				\
+	    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))			\
+		return 0;						\
 	return nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,		\
 				  (flags) | NIX_TX_MULTI_SEG_F);	\
 }
@@ -960,14 +980,15 @@ otx2_nix_xmit_pkts_mseg_ ## name(void *tx_queue,			\
 NIX_TX_FASTPATH_MODES
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 static uint16_t __rte_noinline	__hot					\
 otx2_nix_xmit_pkts_vec_ ## name(void *tx_queue,				\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
 {									\
-	/* VLAN and TSTMP is not supported by vec */			\
+	/* VLAN, TSTMP, TSO is not supported by vec */			\
 	if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F ||			\
-	    (flags) & NIX_TX_OFFLOAD_TSTAMP_F)				\
+	    (flags) & NIX_TX_OFFLOAD_TSTAMP_F ||			\
+	    (flags) & NIX_TX_OFFLOAD_TSO_F)				\
 		return 0;						\
 	return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, (flags));	\
 }
@@ -977,12 +998,13 @@ NIX_TX_FASTPATH_MODES
 
 static inline void
 pick_tx_func(struct rte_eth_dev *eth_dev,
-	     const eth_tx_burst_t tx_burst[2][2][2][2][2])
+	     const eth_tx_burst_t tx_burst[2][2][2][2][2][2])
 {
 	struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
 
 	/* [TSTMP] [NOFF] [VLAN] [OL3_OL4_CSUM] [IL3_IL4_CSUM] */
 	eth_dev->tx_pkt_burst = tx_burst
+		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)]
 		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSTAMP_F)]
 		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
 		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
@@ -995,25 +1017,25 @@ otx2_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 {
 	struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
 
-	const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-	[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_ ## name,
+	const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+	[f5][f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_ ## name,
 
 NIX_TX_FASTPATH_MODES
 #undef T
 	};
 
-	const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-	[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_mseg_ ## name,
+	const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+	[f5][f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_mseg_ ## name,
 
 NIX_TX_FASTPATH_MODES
 #undef T
 	};
 
-	const eth_tx_burst_t nix_eth_tx_vec_burst[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-	[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_vec_ ## name,
+	const eth_tx_burst_t nix_eth_tx_vec_burst[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+	[f5][f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_vec_ ## name,
 
 NIX_TX_FASTPATH_MODES
 #undef T
@@ -1021,7 +1043,8 @@ NIX_TX_FASTPATH_MODES
 
 	if (dev->scalar_ena ||
 	    (dev->tx_offload_flags &
-	     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F)))
+	     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
+	      NIX_TX_OFFLOAD_TSO_F)))
 		pick_tx_func(eth_dev, nix_eth_tx_burst);
 	else
 		pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h
index 82a6af6..e919198 100644
--- a/drivers/net/octeontx2/otx2_tx.h
+++ b/drivers/net/octeontx2/otx2_tx.h
@@ -11,6 +11,7 @@
 #define NIX_TX_OFFLOAD_VLAN_QINQ_F	BIT(2)
 #define NIX_TX_OFFLOAD_MBUF_NOFF_F	BIT(3)
 #define NIX_TX_OFFLOAD_TSTAMP_F		BIT(4)
+#define NIX_TX_OFFLOAD_TSO_F		BIT(5)
 
 /* Flags to control xmit_prepare function.
  * Defining it from backwards to denote its been
@@ -20,10 +21,18 @@
 
 #define NIX_TX_NEED_SEND_HDR_W1	\
 	(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |	\
-	 NIX_TX_OFFLOAD_VLAN_QINQ_F)
+	 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
 
 #define NIX_TX_NEED_EXT_HDR \
-	(NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F)
+	(NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
+	 NIX_TX_OFFLOAD_TSO_F)
+
+#define NIX_UDP_TUN_BITMASK \
+	((1ull << (PKT_TX_TUNNEL_VXLAN >> 45)) | \
+	 (1ull << (PKT_TX_TUNNEL_GENEVE >> 45)))
+
+#define NIX_LSO_FORMAT_IDX_TSOV4	(0)
+#define NIX_LSO_FORMAT_IDX_TSOV6	(1)
 
 /* Function to determine no of tx subdesc required in case ext
  * sub desc is enabled.
@@ -32,7 +41,8 @@ static __rte_always_inline int
 otx2_nix_tx_ext_subs(const uint16_t flags)
 {
 	return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 2 :
-		((flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) ? 1 : 0);
+		((flags & (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
+		 1 : 0);
 }
 
 static __rte_always_inline void
@@ -135,7 +145,57 @@ otx2_nix_prefree_seg(struct rte_mbuf *m)
 	return 1;
 }
 
-static inline void
+static __rte_always_inline void
+otx2_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
+{
+	uint64_t mask, ol_flags = m->ol_flags;
+
+	if (flags & NIX_TX_OFFLOAD_TSO_F &&
+	    (ol_flags & PKT_TX_TCP_SEG)) {
+		uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
+		uint16_t *iplen, *oiplen, *oudplen;
+		uint16_t lso_sb, paylen;
+
+		mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
+		lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
+			m->l2_len + m->l3_len + m->l4_len;
+
+		/* Reduce payload len from base headers */
+		paylen = m->pkt_len - lso_sb;
+
+		/* Get iplen position assuming no tunnel hdr */
+		iplen = (uint16_t *)(mdata + m->l2_len +
+				     (2 << !!(ol_flags & PKT_TX_IPV6)));
+		/* Handle tunnel tso */
+		if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+		    (ol_flags & PKT_TX_TUNNEL_MASK)) {
+			const uint8_t is_udp_tun = (NIX_UDP_TUN_BITMASK >>
+				((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & 0x1;
+
+			oiplen = (uint16_t *)(mdata + m->outer_l2_len +
+				(2 << !!(ol_flags & PKT_TX_OUTER_IPV6)));
+			*oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
+						   paylen);
+
+			/* Update format for UDP tunneled packet */
+			if (is_udp_tun) {
+				oudplen = (uint16_t *)(mdata + m->outer_l2_len +
+						       m->outer_l3_len + 4);
+				*oudplen =
+				rte_cpu_to_be_16(rte_be_to_cpu_16(*oudplen) -
+						 paylen);
+			}
+
+			/* Update iplen position to inner ip hdr */
+			iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
+				m->l4_len + (2 << !!(ol_flags & PKT_TX_IPV6)));
+		}
+
+		*iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
+	}
+}
+
+static __rte_always_inline void
 otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 {
 	struct nix_send_ext_s *send_hdr_ext;
@@ -258,6 +318,37 @@ otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 		send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
 	}
 
+	if (flags & NIX_TX_OFFLOAD_TSO_F &&
+	    (ol_flags & PKT_TX_TCP_SEG)) {
+		uint16_t lso_sb;
+		uint64_t mask;
+
+		mask = -(!w1.il3type);
+		lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
+
+		send_hdr_ext->w0.lso_sb = lso_sb;
+		send_hdr_ext->w0.lso = 1;
+		send_hdr_ext->w0.lso_mps = m->tso_segsz;
+		send_hdr_ext->w0.lso_format =
+			NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+		w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+		/* Handle tunnel tso */
+		if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+		    (ol_flags & PKT_TX_TUNNEL_MASK)) {
+			const uint8_t is_udp_tun = (NIX_UDP_TUN_BITMASK >>
+				((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & 0x1;
+
+			w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+			w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+			/* Update format for UDP tunneled packet */
+			send_hdr_ext->w0.lso_format += (is_udp_tun << 1);
+
+			send_hdr_ext->w0.lso_format +=
+				!!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
+		}
+	}
+
 	if (flags & NIX_TX_NEED_SEND_HDR_W1)
 		send_hdr->w1.u = w1.u;
 
@@ -378,72 +469,137 @@ otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr,
 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
+#define TSO_F        NIX_TX_OFFLOAD_TSO_F
 
-/* [TSTMP] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
-#define NIX_TX_FASTPATH_MODES					\
-T(no_offload,				0, 0, 0, 0, 0,	4,	\
-		NIX_TX_OFFLOAD_NONE)				\
-T(l3l4csum,				0, 0, 0, 0, 1,	4,	\
-		L3L4CSUM_F)					\
-T(ol3ol4csum,				0, 0, 0, 1, 0,	4,	\
-		OL3OL4CSUM_F)					\
-T(ol3ol4csum_l3l4csum,			0, 0, 0, 1, 1,	4,	\
-		OL3OL4CSUM_F | L3L4CSUM_F)			\
-T(vlan,					0, 0, 1, 0, 0,	6,	\
-		VLAN_F)						\
-T(vlan_l3l4csum,			0, 0, 1, 0, 1,	6,	\
-		VLAN_F | L3L4CSUM_F)				\
-T(vlan_ol3ol4csum,			0, 0, 1, 1, 0,	6,	\
-		VLAN_F | OL3OL4CSUM_F)				\
-T(vlan_ol3ol4csum_l3l4csum,		0, 0, 1, 1, 1,	6,	\
-		VLAN_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
-T(noff,					0, 1, 0, 0, 0,	4,	\
-		NOFF_F)						\
-T(noff_l3l4csum,			0, 1, 0, 0, 1,	4,	\
-		NOFF_F | L3L4CSUM_F)				\
-T(noff_ol3ol4csum,			0, 1, 0, 1, 0,	4,	\
-		NOFF_F | OL3OL4CSUM_F)				\
-T(noff_ol3ol4csum_l3l4csum,		0, 1, 0, 1, 1,	4,	\
-		NOFF_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
-T(noff_vlan,				0, 1, 1, 0, 0,	6,	\
-		NOFF_F | VLAN_F)				\
-T(noff_vlan_l3l4csum,			0, 1, 1, 0, 1,	6,	\
-		NOFF_F | VLAN_F | L3L4CSUM_F)			\
-T(noff_vlan_ol3ol4csum,			0, 1, 1, 1, 0,	6,	\
-		NOFF_F | VLAN_F | OL3OL4CSUM_F)			\
-T(noff_vlan_ol3ol4csum_l3l4csum,	0, 1, 1, 1, 1,	6,	\
-		NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
-T(ts,					1, 0, 0, 0, 0,	8,	\
-		TSP_F)						\
-T(ts_l3l4csum,			1, 0, 0, 0, 1,	8,		\
-		TSP_F | L3L4CSUM_F)				\
-T(ts_ol3ol4csum,			1, 0, 0, 1, 0,	8,	\
-		TSP_F | OL3OL4CSUM_F)				\
-T(ts_ol3ol4csum_l3l4csum,		1, 0, 0, 1, 1,	8,	\
-		TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
-T(ts_vlan,				1, 0, 1, 0, 0,	8,	\
-		TSP_F | VLAN_F)					\
-T(ts_vlan_l3l4csum,			1, 0, 1, 0, 1,	8,	\
-		TSP_F | VLAN_F | L3L4CSUM_F)			\
-T(ts_vlan_ol3ol4csum,		1, 0, 1, 1, 0,	8,		\
-		TSP_F | VLAN_F | OL3OL4CSUM_F)			\
-T(ts_vlan_ol3ol4csum_l3l4csum,	1, 0, 1, 1, 1,	8,		\
-		TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
-T(ts_noff,				1, 1, 0, 0, 0,	8,	\
-		TSP_F | NOFF_F)					\
-T(ts_noff_l3l4csum,			1, 1, 0, 0, 1,	8,	\
-		TSP_F | NOFF_F | L3L4CSUM_F)			\
-T(ts_noff_ol3ol4csum,		1, 1, 0, 1, 0,	8,		\
-		TSP_F | NOFF_F | OL3OL4CSUM_F)			\
-T(ts_noff_ol3ol4csum_l3l4csum,	1, 1, 0, 1, 1,	8,		\
-		TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
-T(ts_noff_vlan,			1, 1, 1, 0, 0,	8,		\
-		TSP_F | NOFF_F | VLAN_F)			\
-T(ts_noff_vlan_l3l4csum,		1, 1, 1, 0, 1,	8,	\
-		TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)		\
-T(ts_noff_vlan_ol3ol4csum,		1, 1, 1, 1, 0,	8,	\
-		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)		\
-T(ts_noff_vlan_ol3ol4csum_l3l4csum,	1, 1, 1, 1, 1,	8,	\
-		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
-
+/* [TSO] [TSTMP] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
+#define NIX_TX_FASTPATH_MODES						\
+T(no_offload,				0, 0, 0, 0, 0, 0,	4,	\
+		NIX_TX_OFFLOAD_NONE)					\
+T(l3l4csum,				0, 0, 0, 0, 0, 1,	4,	\
+		L3L4CSUM_F)						\
+T(ol3ol4csum,				0, 0, 0, 0, 1, 0,	4,	\
+		OL3OL4CSUM_F)						\
+T(ol3ol4csum_l3l4csum,			0, 0, 0, 0, 1, 1,	4,	\
+		OL3OL4CSUM_F | L3L4CSUM_F)				\
+T(vlan,					0, 0, 0, 1, 0, 0,	6,	\
+		VLAN_F)							\
+T(vlan_l3l4csum,			0, 0, 0, 1, 0, 1,	6,	\
+		VLAN_F | L3L4CSUM_F)					\
+T(vlan_ol3ol4csum,			0, 0, 0, 1, 1, 0,	6,	\
+		VLAN_F | OL3OL4CSUM_F)					\
+T(vlan_ol3ol4csum_l3l4csum,		0, 0, 0, 1, 1, 1,	6,	\
+		VLAN_F | OL3OL4CSUM_F |	L3L4CSUM_F)			\
+T(noff,					0, 0, 1, 0, 0, 0,	4,	\
+		NOFF_F)							\
+T(noff_l3l4csum,			0, 0, 1, 0, 0, 1,	4,	\
+		NOFF_F | L3L4CSUM_F)					\
+T(noff_ol3ol4csum,			0, 0, 1, 0, 1, 0,	4,	\
+		NOFF_F | OL3OL4CSUM_F)					\
+T(noff_ol3ol4csum_l3l4csum,		0, 0, 1, 0, 1, 1,	4,	\
+		NOFF_F | OL3OL4CSUM_F |	L3L4CSUM_F)			\
+T(noff_vlan,				0, 0, 1, 1, 0, 0,	6,	\
+		NOFF_F | VLAN_F)					\
+T(noff_vlan_l3l4csum,			0, 0, 1, 1, 0, 1,	6,	\
+		NOFF_F | VLAN_F | L3L4CSUM_F)				\
+T(noff_vlan_ol3ol4csum,			0, 0, 1, 1, 1, 0,	6,	\
+		NOFF_F | VLAN_F | OL3OL4CSUM_F)				\
+T(noff_vlan_ol3ol4csum_l3l4csum,	0, 0, 1, 1, 1, 1,	6,	\
+		NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(ts,					0, 1, 0, 0, 0, 0,	8,	\
+		TSP_F)							\
+T(ts_l3l4csum,				0, 1, 0, 0, 0, 1,	8,	\
+		TSP_F | L3L4CSUM_F)					\
+T(ts_ol3ol4csum,			0, 1, 0, 0, 1, 0,	8,	\
+		TSP_F | OL3OL4CSUM_F)					\
+T(ts_ol3ol4csum_l3l4csum,		0, 1, 0, 0, 1, 1,	8,	\
+		TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)			\
+T(ts_vlan,				0, 1, 0, 1, 0, 0,	8,	\
+		TSP_F | VLAN_F)						\
+T(ts_vlan_l3l4csum,			0, 1, 0, 1, 0, 1,	8,	\
+		TSP_F | VLAN_F | L3L4CSUM_F)				\
+T(ts_vlan_ol3ol4csum,			0, 1, 0, 1, 1, 0,	8,	\
+		TSP_F | VLAN_F | OL3OL4CSUM_F)				\
+T(ts_vlan_ol3ol4csum_l3l4csum,		0, 1, 0, 1, 1, 1,	8,	\
+		TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(ts_noff,				0, 1, 1, 0, 0, 0,	8,	\
+		TSP_F | NOFF_F)						\
+T(ts_noff_l3l4csum,			0, 1, 1, 0, 0, 1,	8,	\
+		TSP_F | NOFF_F | L3L4CSUM_F)				\
+T(ts_noff_ol3ol4csum,			0, 1, 1, 0, 1, 0,	8,	\
+		TSP_F | NOFF_F | OL3OL4CSUM_F)				\
+T(ts_noff_ol3ol4csum_l3l4csum,		0, 1, 1, 0, 1, 1,	8,	\
+		TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(ts_noff_vlan,				0, 1, 1, 1, 0, 0,	8,	\
+		TSP_F | NOFF_F | VLAN_F)				\
+T(ts_noff_vlan_l3l4csum,		0, 1, 1, 1, 0, 1,	8,	\
+		TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)			\
+T(ts_noff_vlan_ol3ol4csum,		0, 1, 1, 1, 1, 0,	8,	\
+		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)			\
+T(ts_noff_vlan_ol3ol4csum_l3l4csum,	0, 1, 1, 1, 1, 1,	8,	\
+		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+									\
+T(tso,					1, 0, 0, 0, 0, 0,	6,	\
+		TSO_F)							\
+T(tso_l3l4csum,				1, 0, 0, 0, 0, 1,	6,	\
+		TSO_F | L3L4CSUM_F)					\
+T(tso_ol3ol4csum,			1, 0, 0, 0, 1, 0,	6,	\
+		TSO_F | OL3OL4CSUM_F)					\
+T(tso_ol3ol4csum_l3l4csum,		1, 0, 0, 0, 1, 1,	6,	\
+		TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)			\
+T(tso_vlan,				1, 0, 0, 1, 0, 0,	6,	\
+		TSO_F | VLAN_F)						\
+T(tso_vlan_l3l4csum,			1, 0, 0, 1, 0, 1,	6,	\
+		TSO_F | VLAN_F | L3L4CSUM_F)				\
+T(tso_vlan_ol3ol4csum,			1, 0, 0, 1, 1, 0,	6,	\
+		TSO_F | VLAN_F | OL3OL4CSUM_F)				\
+T(tso_vlan_ol3ol4csum_l3l4csum,		1, 0, 0, 1, 1, 1,	6,	\
+		TSO_F | VLAN_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
+T(tso_noff,				1, 0, 1, 0, 0, 0,	6,	\
+		TSO_F | NOFF_F)						\
+T(tso_noff_l3l4csum,			1, 0, 1, 0, 0, 1,	6,	\
+		TSO_F | NOFF_F | L3L4CSUM_F)				\
+T(tso_noff_ol3ol4csum,			1, 0, 1, 0, 1, 0,	6,	\
+		TSO_F | NOFF_F | OL3OL4CSUM_F)				\
+T(tso_noff_ol3ol4csum_l3l4csum,		1, 0, 1, 0, 1, 1,	6,	\
+		TSO_F | NOFF_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
+T(tso_noff_vlan,			1, 0, 1, 1, 0, 0,	6,	\
+		TSO_F | NOFF_F | VLAN_F)				\
+T(tso_noff_vlan_l3l4csum,		1, 0, 1, 1, 0, 1,	6,	\
+		TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)			\
+T(tso_noff_vlan_ol3ol4csum,		1, 0, 1, 1, 1, 0,	6,	\
+		TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)			\
+T(tso_noff_vlan_ol3ol4csum_l3l4csum,	1, 0, 1, 1, 1, 1,	6,	\
+		TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+T(tso_ts,				1, 1, 0, 0, 0, 0,	8,	\
+		TSO_F | TSP_F)						\
+T(tso_ts_l3l4csum,			1, 1, 0, 0, 0, 1,	8,	\
+		TSO_F | TSP_F | L3L4CSUM_F)				\
+T(tso_ts_ol3ol4csum,			1, 1, 0, 0, 1, 0,	8,	\
+		TSO_F | TSP_F | OL3OL4CSUM_F)				\
+T(tso_ts_ol3ol4csum_l3l4csum,		1, 1, 0, 0, 1, 1,	8,	\
+		TSO_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(tso_ts_vlan,				1, 1, 0, 1, 0, 0,	8,	\
+		TSO_F | TSP_F | VLAN_F)					\
+T(tso_ts_vlan_l3l4csum,			1, 1, 0, 1, 0, 1,	8,	\
+		TSO_F | TSP_F | VLAN_F | L3L4CSUM_F)			\
+T(tso_ts_vlan_ol3ol4csum,		1, 1, 0, 1, 1, 0,	8,	\
+		TSO_F | TSP_F | VLAN_F | OL3OL4CSUM_F)			\
+T(tso_ts_vlan_ol3ol4csum_l3l4csum,	1, 1, 0, 1, 1, 1,	8,	\
+		TSO_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+T(tso_ts_noff,				1, 1, 1, 0, 0, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F)					\
+T(tso_ts_noff_l3l4csum,			1, 1, 1, 0, 0, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | L3L4CSUM_F)			\
+T(tso_ts_noff_ol3ol4csum,		1, 1, 1, 0, 1, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F | OL3OL4CSUM_F)			\
+T(tso_ts_noff_ol3ol4csum_l3l4csum,	1, 1, 1, 0, 1, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+T(tso_ts_noff_vlan,			1, 1, 1, 1, 0, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F)			\
+T(tso_ts_noff_vlan_l3l4csum,		1, 1, 1, 1, 0, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)		\
+T(tso_ts_noff_vlan_ol3ol4csum,		1, 1, 1, 1, 1, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)		\
+T(tso_ts_noff_vlan_ol3ol4csum_l3l4csum,	1, 1, 1, 1, 1, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
 #endif /* __OTX2_TX_H__ */
-- 
2.8.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [dpdk-dev] [PATCH v2] net/octeontx2: add TSO offload support
  2019-09-06  8:53 [dpdk-dev] [PATCH] net/octeontx2: add TSO offload support Nithin Dabilpuram
@ 2019-09-25  4:36 ` Nithin Dabilpuram
  0 siblings, 0 replies; 4+ messages in thread
From: Nithin Dabilpuram @ 2019-09-25  4:36 UTC (permalink / raw)
  To: Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K, John McNamara,
	Marko Kovacevic, Vamsi Attunuru, Pavan Nikhilesh
  Cc: dev

Add support to below TCP segmentation offloads for
96XX A1 onwards and 95xx B0 onwards.
- TCPv4, TCPv6
- VXLAN[v4 | v6][v4 | v6]
- GENEVE[v4 | v6][v4 | v6]

This patch also modifies a fastpath function to be forced
inline due to performance reasons for multi-seg mode.

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---

v2:
* Remove duplicate NIX_LSO_FIELD_MAX define

 doc/guides/nics/features/octeontx2.ini     |   1 +
 drivers/common/octeontx2/hw/otx2_nix.h     |  12 ++
 drivers/common/octeontx2/otx2_common.h     |  17 ++
 drivers/common/octeontx2/otx2_mbox.h       |   1 -
 drivers/event/octeontx2/otx2_evdev.c       |  30 +--
 drivers/event/octeontx2/otx2_evdev.h       |   2 +-
 drivers/event/octeontx2/otx2_worker.c      |   4 +-
 drivers/event/octeontx2/otx2_worker.h      |   2 +
 drivers/event/octeontx2/otx2_worker_dual.c |   4 +-
 drivers/net/octeontx2/otx2_ethdev.c        | 222 ++++++++++++++++++++-
 drivers/net/octeontx2/otx2_ethdev.h        |   4 +
 drivers/net/octeontx2/otx2_tx.c            |  55 ++++--
 drivers/net/octeontx2/otx2_tx.h            | 298 ++++++++++++++++++++++-------
 13 files changed, 545 insertions(+), 107 deletions(-)

diff --git a/doc/guides/nics/features/octeontx2.ini b/doc/guides/nics/features/octeontx2.ini
index 6695232..5b73c8b 100644
--- a/doc/guides/nics/features/octeontx2.ini
+++ b/doc/guides/nics/features/octeontx2.ini
@@ -17,6 +17,7 @@ Fast mbuf free       = Y
 Free Tx mbuf on demand = Y
 Queue start/stop     = Y
 MTU update           = Y
+TSO                  = Y
 Promiscuous mode     = Y
 Allmulticast mode    = Y
 Unicast MAC filter   = Y
diff --git a/drivers/common/octeontx2/hw/otx2_nix.h b/drivers/common/octeontx2/hw/otx2_nix.h
index d5ad988..e3b6850 100644
--- a/drivers/common/octeontx2/hw/otx2_nix.h
+++ b/drivers/common/octeontx2/hw/otx2_nix.h
@@ -1376,4 +1376,16 @@ struct nix_lso_format {
 	uint64_t rsvd_19_63  : 45;
 };
 
+#define NIX_LSO_FIELD_MAX	(8)
+#define NIX_LSO_FIELD_ALG_MASK	GENMASK(18, 16)
+#define NIX_LSO_FIELD_SZ_MASK	GENMASK(13, 12)
+#define NIX_LSO_FIELD_LY_MASK	GENMASK(9, 8)
+#define NIX_LSO_FIELD_OFF_MASK	GENMASK(7, 0)
+
+#define NIX_LSO_FIELD_MASK			\
+		(NIX_LSO_FIELD_OFF_MASK |	\
+		 NIX_LSO_FIELD_LY_MASK |	\
+		 NIX_LSO_FIELD_SZ_MASK |	\
+		 NIX_LSO_FIELD_ALG_MASK)
+
 #endif /* __OTX2_NIX_HW_H__ */
diff --git a/drivers/common/octeontx2/otx2_common.h b/drivers/common/octeontx2/otx2_common.h
index a155462..0d0b87e 100644
--- a/drivers/common/octeontx2/otx2_common.h
+++ b/drivers/common/octeontx2/otx2_common.h
@@ -31,6 +31,23 @@
 #define BIT(nr)     (1UL << (nr))
 #endif
 
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#endif
+#ifndef BITS_PER_LONG_LONG
+#define BITS_PER_LONG_LONG (__SIZEOF_LONG_LONG__ * 8)
+#endif
+
+#ifndef GENMASK
+#define GENMASK(h, l) \
+		(((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+#endif
+#ifndef GENMASK_ULL
+#define GENMASK_ULL(h, l) \
+	(((~0ULL) - (1ULL << (l)) + 1) & \
+	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+#endif
+
 /* Compiler attributes */
 #ifndef __hot
 #define __hot   __attribute__((hot))
diff --git a/drivers/common/octeontx2/otx2_mbox.h b/drivers/common/octeontx2/otx2_mbox.h
index 9652f4d..8f6aeb6 100644
--- a/drivers/common/octeontx2/otx2_mbox.h
+++ b/drivers/common/octeontx2/otx2_mbox.h
@@ -869,7 +869,6 @@ struct nix_mark_format_cfg_rsp {
 struct nix_lso_format_cfg {
 	struct mbox_msghdr hdr;
 	uint64_t __otx2_io field_mask;
-#define NIX_LSO_FIELD_MAX	(8)
 	uint64_t __otx2_io fields[NIX_LSO_FIELD_MAX];
 };
 
diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c
index 160ac94..4d8860f 100644
--- a/drivers/event/octeontx2/otx2_evdev.c
+++ b/drivers/event/octeontx2/otx2_evdev.c
@@ -169,33 +169,35 @@ SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 	};
 
 	/* Tx modes */
-	const event_tx_adapter_enqueue ssogws_tx_adptr_enq[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =  otx2_ssogws_tx_adptr_enq_ ## name,
+	const event_tx_adapter_enqueue ssogws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =  otx2_ssogws_tx_adptr_enq_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 	};
 
 	const event_tx_adapter_enqueue
-		ssogws_tx_adptr_enq_seg[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =  otx2_ssogws_tx_adptr_enq_seg_ ## name,
+		ssogws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =				\
+			otx2_ssogws_tx_adptr_enq_seg_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 	};
 
 	const event_tx_adapter_enqueue
-		ssogws_dual_tx_adptr_enq[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =  otx2_ssogws_dual_tx_adptr_enq_ ## name,
+		ssogws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =				\
+			otx2_ssogws_dual_tx_adptr_enq_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 	};
 
 	const event_tx_adapter_enqueue
-		ssogws_dual_tx_adptr_enq_seg[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =					\
+		ssogws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =				\
 			otx2_ssogws_dual_tx_adptr_enq_seg_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -274,6 +276,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
 		/* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
 		event_dev->txa_enqueue = ssogws_tx_adptr_enq_seg
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
@@ -281,6 +284,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
 	} else {
 		event_dev->txa_enqueue = ssogws_tx_adptr_enq
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
@@ -406,6 +410,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 		if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
 		/* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
 			event_dev->txa_enqueue = ssogws_dual_tx_adptr_enq_seg
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 				[!!(dev->tx_offloads &
 						NIX_TX_OFFLOAD_MBUF_NOFF_F)]
@@ -417,6 +422,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 						NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
 		} else {
 			event_dev->txa_enqueue = ssogws_dual_tx_adptr_enq
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 				[!!(dev->tx_offloads &
 						NIX_TX_OFFLOAD_MBUF_NOFF_F)]
diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
index 5cd80e3b2..530060f 100644
--- a/drivers/event/octeontx2/otx2_evdev.h
+++ b/drivers/event/octeontx2/otx2_evdev.h
@@ -331,7 +331,7 @@ uint16_t otx2_ssogws_dual_deq_seg_timeout_burst_ ##name(void *port,	       \
 SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 #undef R
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				     \
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			     \
 uint16_t otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],\
 					   uint16_t nb_events);		     \
 uint16_t otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port,		     \
diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
index cd14cd3..34d39f4 100644
--- a/drivers/event/octeontx2/otx2_worker.c
+++ b/drivers/event/octeontx2/otx2_worker.c
@@ -267,7 +267,7 @@ otx2_ssogws_enq_fwd_burst(void *port, const struct rte_event ev[],
 	return 1;
 }
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 				  uint16_t nb_events)			\
@@ -281,7 +281,7 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
 				      uint16_t nb_events)		\
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 76f91bb..4e971f2 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -256,6 +256,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
 	struct rte_mbuf *m = ev[0].mbuf;
 	const struct otx2_eth_txq *txq = otx2_ssogws_xtract_meta(m);
 
+	/* Perform header writes before barrier for TSO */
+	otx2_nix_xmit_prepare_tso(m, flags);
 	otx2_ssogws_head_wait(ws, !ev->sched_type);
 	otx2_ssogws_prepare_pkt(txq, m, cmd, flags);
 
diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
index 37c274a..ff68472 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.c
+++ b/drivers/event/octeontx2/otx2_worker_dual.c
@@ -306,7 +306,7 @@ otx2_ssogws_dual_deq_seg_timeout_burst_ ##name(void *port,		\
 SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 #undef R
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 				       struct rte_event ev[],		\
@@ -323,7 +323,7 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,			\
 					   struct rte_event ev[],	\
diff --git a/drivers/net/octeontx2/otx2_ethdev.c b/drivers/net/octeontx2/otx2_ethdev.c
index b84128f..c447f83 100644
--- a/drivers/net/octeontx2/otx2_ethdev.c
+++ b/drivers/net/octeontx2/otx2_ethdev.c
@@ -27,9 +27,14 @@ nix_get_rx_offload_capa(struct otx2_eth_dev *dev)
 static inline uint64_t
 nix_get_tx_offload_capa(struct otx2_eth_dev *dev)
 {
-	RTE_SET_USED(dev);
+	uint64_t capa = NIX_TX_OFFLOAD_CAPA;
 
-	return NIX_TX_OFFLOAD_CAPA;
+	/* TSO not supported for earlier chip revisions */
+	if (otx2_dev_is_96xx_A0(dev) || otx2_dev_is_95xx_Ax(dev))
+		capa &= ~(DEV_TX_OFFLOAD_TCP_TSO |
+			  DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+			  DEV_TX_OFFLOAD_GENEVE_TNL_TSO);
+	return capa;
 }
 
 static const struct otx2_dev_ops otx2_dev_ops = {
@@ -643,6 +648,18 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	if (conf & DEV_TX_OFFLOAD_MULTI_SEGS)
 		flags |= NIX_TX_MULTI_SEG_F;
 
+	/* Enable Inner checksum for TSO */
+	if (conf & DEV_TX_OFFLOAD_TCP_TSO)
+		flags |= (NIX_TX_OFFLOAD_TSO_F |
+			  NIX_TX_OFFLOAD_L3_L4_CSUM_F);
+
+	/* Enable Inner and Outer checksum for Tunnel TSO */
+	if (conf & (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+		    DEV_TX_OFFLOAD_GENEVE_TNL_TSO))
+		flags |= (NIX_TX_OFFLOAD_TSO_F |
+			  NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+			  NIX_TX_OFFLOAD_L3_L4_CSUM_F);
+
 	if ((dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP))
 		flags |= NIX_TX_OFFLOAD_TSTAMP_F;
 
@@ -1205,6 +1222,201 @@ nix_set_nop_rxtx_function(struct rte_eth_dev *eth_dev)
 	rte_mb();
 }
 
+static void
+nix_lso_tcp(struct nix_lso_format_cfg *req, bool v4)
+{
+	volatile struct nix_lso_format *field;
+
+	/* Format works only with TCP packet marked by OL3/OL4 */
+	field = (volatile struct nix_lso_format *)&req->fields[0];
+	req->field_mask = NIX_LSO_FIELD_MASK;
+	/* Outer IPv4/IPv6 */
+	field->layer = NIX_TXLAYER_OL3;
+	field->offset = v4 ? 2 : 4;
+	field->sizem1 = 1; /* 2B */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+	if (v4) {
+		/* IPID field */
+		field->layer = NIX_TXLAYER_OL3;
+		field->offset = 4;
+		field->sizem1 = 1;
+		/* Incremented linearly per segment */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* TCP sequence number update */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 4;
+	field->sizem1 = 3; /* 4 bytes */
+	field->alg = NIX_LSOALG_ADD_OFFSET;
+	field++;
+	/* TCP flags field */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 12;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_TCP_FLAGS;
+	field++;
+}
+
+static void
+nix_lso_udp_tun_tcp(struct nix_lso_format_cfg *req,
+		    bool outer_v4, bool inner_v4)
+{
+	volatile struct nix_lso_format *field;
+
+	field = (volatile struct nix_lso_format *)&req->fields[0];
+	req->field_mask = NIX_LSO_FIELD_MASK;
+	/* Outer IPv4/IPv6 len */
+	field->layer = NIX_TXLAYER_OL3;
+	field->offset = outer_v4 ? 2 : 4;
+	field->sizem1 = 1; /* 2B */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+	if (outer_v4) {
+		/* IPID */
+		field->layer = NIX_TXLAYER_OL3;
+		field->offset = 4;
+		field->sizem1 = 1;
+		/* Incremented linearly per segment */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* Outer UDP length */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 4;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+
+	/* Inner IPv4/IPv6 */
+	field->layer = NIX_TXLAYER_IL3;
+	field->offset = inner_v4 ? 2 : 4;
+	field->sizem1 = 1; /* 2B */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+	if (inner_v4) {
+		/* IPID field */
+		field->layer = NIX_TXLAYER_IL3;
+		field->offset = 4;
+		field->sizem1 = 1;
+		/* Incremented linearly per segment */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* TCP sequence number update */
+	field->layer = NIX_TXLAYER_IL4;
+	field->offset = 4;
+	field->sizem1 = 3; /* 4 bytes */
+	field->alg = NIX_LSOALG_ADD_OFFSET;
+	field++;
+
+	/* TCP flags field */
+	field->layer = NIX_TXLAYER_IL4;
+	field->offset = 12;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_TCP_FLAGS;
+	field++;
+}
+
+static int
+nix_setup_lso_formats(struct otx2_eth_dev *dev)
+{
+	struct otx2_mbox *mbox = dev->mbox;
+	struct nix_lso_format_cfg_rsp *rsp;
+	struct nix_lso_format_cfg *req;
+	uint8_t base;
+	int rc;
+
+	/* Skip if TSO was not requested */
+	if (!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+		return 0;
+	/*
+	 * IPv4/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_tcp(req, true);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	base = rsp->lso_format_idx;
+	if (base != NIX_LSO_FORMAT_IDX_TSOV4)
+		return -EFAULT;
+	dev->lso_base_idx = base;
+	otx2_nix_dbg("tcpv4 lso fmt=%u", base);
+
+
+	/*
+	 * IPv6/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_tcp(req, false);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 1)
+		return -EFAULT;
+	otx2_nix_dbg("tcpv6 lso fmt=%u\n", base + 1);
+
+	/*
+	 * IPv4/UDP/TUN HDR/IPv4/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, true, true);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 2)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v4v4 fmt=%u\n", base + 2);
+
+	/*
+	 * IPv4/UDP/TUN HDR/IPv6/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, true, false);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 3)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v4v6 fmt=%u\n", base + 3);
+
+	/*
+	 * IPv6/UDP/TUN HDR/IPv4/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, false, true);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 4)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v6v4 fmt=%u\n", base + 4);
+
+	/*
+	 * IPv6/UDP/TUN HDR/IPv6/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, false, false);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+	if (rsp->lso_format_idx != base + 5)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v6v6 fmt=%u\n", base + 5);
+
+	return 0;
+}
+
 static int
 otx2_nix_configure(struct rte_eth_dev *eth_dev)
 {
@@ -1292,6 +1504,12 @@ otx2_nix_configure(struct rte_eth_dev *eth_dev)
 		goto fail_offloads;
 	}
 
+	rc = nix_setup_lso_formats(dev);
+	if (rc) {
+		otx2_err("failed to setup nix lso format fields, rc=%d", rc);
+		goto free_nix_lf;
+	}
+
 	/* Configure RSS */
 	rc = otx2_nix_rss_config(eth_dev);
 	if (rc) {
diff --git a/drivers/net/octeontx2/otx2_ethdev.h b/drivers/net/octeontx2/otx2_ethdev.h
index 5de0a1d..38f341e 100644
--- a/drivers/net/octeontx2/otx2_ethdev.h
+++ b/drivers/net/octeontx2/otx2_ethdev.h
@@ -127,6 +127,9 @@
 	DEV_TX_OFFLOAD_TCP_CKSUM	| \
 	DEV_TX_OFFLOAD_UDP_CKSUM	| \
 	DEV_TX_OFFLOAD_SCTP_CKSUM	| \
+	DEV_TX_OFFLOAD_TCP_TSO		| \
+	DEV_TX_OFFLOAD_VXLAN_TNL_TSO    | \
+	DEV_TX_OFFLOAD_GENEVE_TNL_TSO   | \
 	DEV_TX_OFFLOAD_MULTI_SEGS	| \
 	DEV_TX_OFFLOAD_IPV4_CKSUM)
 
@@ -249,6 +252,7 @@ struct otx2_eth_dev {
 	uint8_t tx_chan_cnt;
 	uint8_t lso_tsov4_idx;
 	uint8_t lso_tsov6_idx;
+	uint8_t lso_base_idx;
 	uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
 	uint8_t mkex_pfl_name[MKEX_NAME_LEN];
 	uint8_t max_mac_entries;
diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
index 0dcadff..fa53300 100644
--- a/drivers/net/octeontx2/otx2_tx.c
+++ b/drivers/net/octeontx2/otx2_tx.c
@@ -32,6 +32,12 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	otx2_lmt_mov(cmd, &txq->cmd[0], otx2_nix_tx_ext_subs(flags));
 
+	/* Perform header writes before barrier for TSO */
+	if (flags & NIX_TX_OFFLOAD_TSO_F) {
+		for (i = 0; i < pkts; i++)
+			otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
+	}
+
 	/* Lets commit any changes in the packet */
 	rte_cio_wmb();
 
@@ -62,6 +68,12 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	otx2_lmt_mov(cmd, &txq->cmd[0], otx2_nix_tx_ext_subs(flags));
 
+	/* Perform header writes before barrier for TSO */
+	if (flags & NIX_TX_OFFLOAD_TSO_F) {
+		for (i = 0; i < pkts; i++)
+			otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
+	}
+
 	/* Lets commit any changes in the packet */
 	rte_cio_wmb();
 
@@ -933,26 +945,34 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 #endif
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 static uint16_t __rte_noinline	__hot					\
 otx2_nix_xmit_pkts_ ## name(void *tx_queue,				\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
 {									\
 	uint64_t cmd[sz];						\
 									\
+	/* For TSO inner checksum is a must */				\
+	if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&				\
+	    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))			\
+		return 0;						\
 	return nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, flags);	\
 }
 
 NIX_TX_FASTPATH_MODES
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 static uint16_t __rte_noinline	__hot					\
 otx2_nix_xmit_pkts_mseg_ ## name(void *tx_queue,			\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
 {									\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
 									\
+	/* For TSO inner checksum is a must */				\
+	if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&				\
+	    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))			\
+		return 0;						\
 	return nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,		\
 				  (flags) | NIX_TX_MULTI_SEG_F);	\
 }
@@ -960,14 +980,15 @@ otx2_nix_xmit_pkts_mseg_ ## name(void *tx_queue,			\
 NIX_TX_FASTPATH_MODES
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 static uint16_t __rte_noinline	__hot					\
 otx2_nix_xmit_pkts_vec_ ## name(void *tx_queue,				\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
 {									\
-	/* VLAN and TSTMP is not supported by vec */			\
+	/* VLAN, TSTMP, TSO is not supported by vec */			\
 	if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F ||			\
-	    (flags) & NIX_TX_OFFLOAD_TSTAMP_F)				\
+	    (flags) & NIX_TX_OFFLOAD_TSTAMP_F ||			\
+	    (flags) & NIX_TX_OFFLOAD_TSO_F)				\
 		return 0;						\
 	return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, (flags));	\
 }
@@ -977,12 +998,13 @@ NIX_TX_FASTPATH_MODES
 
 static inline void
 pick_tx_func(struct rte_eth_dev *eth_dev,
-	     const eth_tx_burst_t tx_burst[2][2][2][2][2])
+	     const eth_tx_burst_t tx_burst[2][2][2][2][2][2])
 {
 	struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
 
 	/* [TSTMP] [NOFF] [VLAN] [OL3_OL4_CSUM] [IL3_IL4_CSUM] */
 	eth_dev->tx_pkt_burst = tx_burst
+		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)]
 		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSTAMP_F)]
 		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
 		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
@@ -995,25 +1017,25 @@ otx2_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 {
 	struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
 
-	const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-	[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_ ## name,
+	const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+	[f5][f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_ ## name,
 
 NIX_TX_FASTPATH_MODES
 #undef T
 	};
 
-	const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-	[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_mseg_ ## name,
+	const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+	[f5][f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_mseg_ ## name,
 
 NIX_TX_FASTPATH_MODES
 #undef T
 	};
 
-	const eth_tx_burst_t nix_eth_tx_vec_burst[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-	[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_vec_ ## name,
+	const eth_tx_burst_t nix_eth_tx_vec_burst[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+	[f5][f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_vec_ ## name,
 
 NIX_TX_FASTPATH_MODES
 #undef T
@@ -1021,7 +1043,8 @@ NIX_TX_FASTPATH_MODES
 
 	if (dev->scalar_ena ||
 	    (dev->tx_offload_flags &
-	     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F)))
+	     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
+	      NIX_TX_OFFLOAD_TSO_F)))
 		pick_tx_func(eth_dev, nix_eth_tx_burst);
 	else
 		pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h
index 82a6af6..e919198 100644
--- a/drivers/net/octeontx2/otx2_tx.h
+++ b/drivers/net/octeontx2/otx2_tx.h
@@ -11,6 +11,7 @@
 #define NIX_TX_OFFLOAD_VLAN_QINQ_F	BIT(2)
 #define NIX_TX_OFFLOAD_MBUF_NOFF_F	BIT(3)
 #define NIX_TX_OFFLOAD_TSTAMP_F		BIT(4)
+#define NIX_TX_OFFLOAD_TSO_F		BIT(5)
 
 /* Flags to control xmit_prepare function.
  * Defining it from backwards to denote its been
@@ -20,10 +21,18 @@
 
 #define NIX_TX_NEED_SEND_HDR_W1	\
 	(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |	\
-	 NIX_TX_OFFLOAD_VLAN_QINQ_F)
+	 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
 
 #define NIX_TX_NEED_EXT_HDR \
-	(NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F)
+	(NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
+	 NIX_TX_OFFLOAD_TSO_F)
+
+#define NIX_UDP_TUN_BITMASK \
+	((1ull << (PKT_TX_TUNNEL_VXLAN >> 45)) | \
+	 (1ull << (PKT_TX_TUNNEL_GENEVE >> 45)))
+
+#define NIX_LSO_FORMAT_IDX_TSOV4	(0)
+#define NIX_LSO_FORMAT_IDX_TSOV6	(1)
 
 /* Function to determine no of tx subdesc required in case ext
  * sub desc is enabled.
@@ -32,7 +41,8 @@ static __rte_always_inline int
 otx2_nix_tx_ext_subs(const uint16_t flags)
 {
 	return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 2 :
-		((flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) ? 1 : 0);
+		((flags & (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
+		 1 : 0);
 }
 
 static __rte_always_inline void
@@ -135,7 +145,57 @@ otx2_nix_prefree_seg(struct rte_mbuf *m)
 	return 1;
 }
 
-static inline void
+static __rte_always_inline void
+otx2_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
+{
+	uint64_t mask, ol_flags = m->ol_flags;
+
+	if (flags & NIX_TX_OFFLOAD_TSO_F &&
+	    (ol_flags & PKT_TX_TCP_SEG)) {
+		uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
+		uint16_t *iplen, *oiplen, *oudplen;
+		uint16_t lso_sb, paylen;
+
+		mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
+		lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
+			m->l2_len + m->l3_len + m->l4_len;
+
+		/* Reduce payload len from base headers */
+		paylen = m->pkt_len - lso_sb;
+
+		/* Get iplen position assuming no tunnel hdr */
+		iplen = (uint16_t *)(mdata + m->l2_len +
+				     (2 << !!(ol_flags & PKT_TX_IPV6)));
+		/* Handle tunnel tso */
+		if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+		    (ol_flags & PKT_TX_TUNNEL_MASK)) {
+			const uint8_t is_udp_tun = (NIX_UDP_TUN_BITMASK >>
+				((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & 0x1;
+
+			oiplen = (uint16_t *)(mdata + m->outer_l2_len +
+				(2 << !!(ol_flags & PKT_TX_OUTER_IPV6)));
+			*oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
+						   paylen);
+
+			/* Update format for UDP tunneled packet */
+			if (is_udp_tun) {
+				oudplen = (uint16_t *)(mdata + m->outer_l2_len +
+						       m->outer_l3_len + 4);
+				*oudplen =
+				rte_cpu_to_be_16(rte_be_to_cpu_16(*oudplen) -
+						 paylen);
+			}
+
+			/* Update iplen position to inner ip hdr */
+			iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
+				m->l4_len + (2 << !!(ol_flags & PKT_TX_IPV6)));
+		}
+
+		*iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
+	}
+}
+
+static __rte_always_inline void
 otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 {
 	struct nix_send_ext_s *send_hdr_ext;
@@ -258,6 +318,37 @@ otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 		send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
 	}
 
+	if (flags & NIX_TX_OFFLOAD_TSO_F &&
+	    (ol_flags & PKT_TX_TCP_SEG)) {
+		uint16_t lso_sb;
+		uint64_t mask;
+
+		mask = -(!w1.il3type);
+		lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
+
+		send_hdr_ext->w0.lso_sb = lso_sb;
+		send_hdr_ext->w0.lso = 1;
+		send_hdr_ext->w0.lso_mps = m->tso_segsz;
+		send_hdr_ext->w0.lso_format =
+			NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+		w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+		/* Handle tunnel tso */
+		if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+		    (ol_flags & PKT_TX_TUNNEL_MASK)) {
+			const uint8_t is_udp_tun = (NIX_UDP_TUN_BITMASK >>
+				((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & 0x1;
+
+			w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+			w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+			/* Update format for UDP tunneled packet */
+			send_hdr_ext->w0.lso_format += (is_udp_tun << 1);
+
+			send_hdr_ext->w0.lso_format +=
+				!!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
+		}
+	}
+
 	if (flags & NIX_TX_NEED_SEND_HDR_W1)
 		send_hdr->w1.u = w1.u;
 
@@ -378,72 +469,137 @@ otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr,
 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
+#define TSO_F        NIX_TX_OFFLOAD_TSO_F
 
-/* [TSTMP] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
-#define NIX_TX_FASTPATH_MODES					\
-T(no_offload,				0, 0, 0, 0, 0,	4,	\
-		NIX_TX_OFFLOAD_NONE)				\
-T(l3l4csum,				0, 0, 0, 0, 1,	4,	\
-		L3L4CSUM_F)					\
-T(ol3ol4csum,				0, 0, 0, 1, 0,	4,	\
-		OL3OL4CSUM_F)					\
-T(ol3ol4csum_l3l4csum,			0, 0, 0, 1, 1,	4,	\
-		OL3OL4CSUM_F | L3L4CSUM_F)			\
-T(vlan,					0, 0, 1, 0, 0,	6,	\
-		VLAN_F)						\
-T(vlan_l3l4csum,			0, 0, 1, 0, 1,	6,	\
-		VLAN_F | L3L4CSUM_F)				\
-T(vlan_ol3ol4csum,			0, 0, 1, 1, 0,	6,	\
-		VLAN_F | OL3OL4CSUM_F)				\
-T(vlan_ol3ol4csum_l3l4csum,		0, 0, 1, 1, 1,	6,	\
-		VLAN_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
-T(noff,					0, 1, 0, 0, 0,	4,	\
-		NOFF_F)						\
-T(noff_l3l4csum,			0, 1, 0, 0, 1,	4,	\
-		NOFF_F | L3L4CSUM_F)				\
-T(noff_ol3ol4csum,			0, 1, 0, 1, 0,	4,	\
-		NOFF_F | OL3OL4CSUM_F)				\
-T(noff_ol3ol4csum_l3l4csum,		0, 1, 0, 1, 1,	4,	\
-		NOFF_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
-T(noff_vlan,				0, 1, 1, 0, 0,	6,	\
-		NOFF_F | VLAN_F)				\
-T(noff_vlan_l3l4csum,			0, 1, 1, 0, 1,	6,	\
-		NOFF_F | VLAN_F | L3L4CSUM_F)			\
-T(noff_vlan_ol3ol4csum,			0, 1, 1, 1, 0,	6,	\
-		NOFF_F | VLAN_F | OL3OL4CSUM_F)			\
-T(noff_vlan_ol3ol4csum_l3l4csum,	0, 1, 1, 1, 1,	6,	\
-		NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
-T(ts,					1, 0, 0, 0, 0,	8,	\
-		TSP_F)						\
-T(ts_l3l4csum,			1, 0, 0, 0, 1,	8,		\
-		TSP_F | L3L4CSUM_F)				\
-T(ts_ol3ol4csum,			1, 0, 0, 1, 0,	8,	\
-		TSP_F | OL3OL4CSUM_F)				\
-T(ts_ol3ol4csum_l3l4csum,		1, 0, 0, 1, 1,	8,	\
-		TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
-T(ts_vlan,				1, 0, 1, 0, 0,	8,	\
-		TSP_F | VLAN_F)					\
-T(ts_vlan_l3l4csum,			1, 0, 1, 0, 1,	8,	\
-		TSP_F | VLAN_F | L3L4CSUM_F)			\
-T(ts_vlan_ol3ol4csum,		1, 0, 1, 1, 0,	8,		\
-		TSP_F | VLAN_F | OL3OL4CSUM_F)			\
-T(ts_vlan_ol3ol4csum_l3l4csum,	1, 0, 1, 1, 1,	8,		\
-		TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
-T(ts_noff,				1, 1, 0, 0, 0,	8,	\
-		TSP_F | NOFF_F)					\
-T(ts_noff_l3l4csum,			1, 1, 0, 0, 1,	8,	\
-		TSP_F | NOFF_F | L3L4CSUM_F)			\
-T(ts_noff_ol3ol4csum,		1, 1, 0, 1, 0,	8,		\
-		TSP_F | NOFF_F | OL3OL4CSUM_F)			\
-T(ts_noff_ol3ol4csum_l3l4csum,	1, 1, 0, 1, 1,	8,		\
-		TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
-T(ts_noff_vlan,			1, 1, 1, 0, 0,	8,		\
-		TSP_F | NOFF_F | VLAN_F)			\
-T(ts_noff_vlan_l3l4csum,		1, 1, 1, 0, 1,	8,	\
-		TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)		\
-T(ts_noff_vlan_ol3ol4csum,		1, 1, 1, 1, 0,	8,	\
-		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)		\
-T(ts_noff_vlan_ol3ol4csum_l3l4csum,	1, 1, 1, 1, 1,	8,	\
-		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
-
+/* [TSO] [TSTMP] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
+#define NIX_TX_FASTPATH_MODES						\
+T(no_offload,				0, 0, 0, 0, 0, 0,	4,	\
+		NIX_TX_OFFLOAD_NONE)					\
+T(l3l4csum,				0, 0, 0, 0, 0, 1,	4,	\
+		L3L4CSUM_F)						\
+T(ol3ol4csum,				0, 0, 0, 0, 1, 0,	4,	\
+		OL3OL4CSUM_F)						\
+T(ol3ol4csum_l3l4csum,			0, 0, 0, 0, 1, 1,	4,	\
+		OL3OL4CSUM_F | L3L4CSUM_F)				\
+T(vlan,					0, 0, 0, 1, 0, 0,	6,	\
+		VLAN_F)							\
+T(vlan_l3l4csum,			0, 0, 0, 1, 0, 1,	6,	\
+		VLAN_F | L3L4CSUM_F)					\
+T(vlan_ol3ol4csum,			0, 0, 0, 1, 1, 0,	6,	\
+		VLAN_F | OL3OL4CSUM_F)					\
+T(vlan_ol3ol4csum_l3l4csum,		0, 0, 0, 1, 1, 1,	6,	\
+		VLAN_F | OL3OL4CSUM_F |	L3L4CSUM_F)			\
+T(noff,					0, 0, 1, 0, 0, 0,	4,	\
+		NOFF_F)							\
+T(noff_l3l4csum,			0, 0, 1, 0, 0, 1,	4,	\
+		NOFF_F | L3L4CSUM_F)					\
+T(noff_ol3ol4csum,			0, 0, 1, 0, 1, 0,	4,	\
+		NOFF_F | OL3OL4CSUM_F)					\
+T(noff_ol3ol4csum_l3l4csum,		0, 0, 1, 0, 1, 1,	4,	\
+		NOFF_F | OL3OL4CSUM_F |	L3L4CSUM_F)			\
+T(noff_vlan,				0, 0, 1, 1, 0, 0,	6,	\
+		NOFF_F | VLAN_F)					\
+T(noff_vlan_l3l4csum,			0, 0, 1, 1, 0, 1,	6,	\
+		NOFF_F | VLAN_F | L3L4CSUM_F)				\
+T(noff_vlan_ol3ol4csum,			0, 0, 1, 1, 1, 0,	6,	\
+		NOFF_F | VLAN_F | OL3OL4CSUM_F)				\
+T(noff_vlan_ol3ol4csum_l3l4csum,	0, 0, 1, 1, 1, 1,	6,	\
+		NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(ts,					0, 1, 0, 0, 0, 0,	8,	\
+		TSP_F)							\
+T(ts_l3l4csum,				0, 1, 0, 0, 0, 1,	8,	\
+		TSP_F | L3L4CSUM_F)					\
+T(ts_ol3ol4csum,			0, 1, 0, 0, 1, 0,	8,	\
+		TSP_F | OL3OL4CSUM_F)					\
+T(ts_ol3ol4csum_l3l4csum,		0, 1, 0, 0, 1, 1,	8,	\
+		TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)			\
+T(ts_vlan,				0, 1, 0, 1, 0, 0,	8,	\
+		TSP_F | VLAN_F)						\
+T(ts_vlan_l3l4csum,			0, 1, 0, 1, 0, 1,	8,	\
+		TSP_F | VLAN_F | L3L4CSUM_F)				\
+T(ts_vlan_ol3ol4csum,			0, 1, 0, 1, 1, 0,	8,	\
+		TSP_F | VLAN_F | OL3OL4CSUM_F)				\
+T(ts_vlan_ol3ol4csum_l3l4csum,		0, 1, 0, 1, 1, 1,	8,	\
+		TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(ts_noff,				0, 1, 1, 0, 0, 0,	8,	\
+		TSP_F | NOFF_F)						\
+T(ts_noff_l3l4csum,			0, 1, 1, 0, 0, 1,	8,	\
+		TSP_F | NOFF_F | L3L4CSUM_F)				\
+T(ts_noff_ol3ol4csum,			0, 1, 1, 0, 1, 0,	8,	\
+		TSP_F | NOFF_F | OL3OL4CSUM_F)				\
+T(ts_noff_ol3ol4csum_l3l4csum,		0, 1, 1, 0, 1, 1,	8,	\
+		TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(ts_noff_vlan,				0, 1, 1, 1, 0, 0,	8,	\
+		TSP_F | NOFF_F | VLAN_F)				\
+T(ts_noff_vlan_l3l4csum,		0, 1, 1, 1, 0, 1,	8,	\
+		TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)			\
+T(ts_noff_vlan_ol3ol4csum,		0, 1, 1, 1, 1, 0,	8,	\
+		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)			\
+T(ts_noff_vlan_ol3ol4csum_l3l4csum,	0, 1, 1, 1, 1, 1,	8,	\
+		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+									\
+T(tso,					1, 0, 0, 0, 0, 0,	6,	\
+		TSO_F)							\
+T(tso_l3l4csum,				1, 0, 0, 0, 0, 1,	6,	\
+		TSO_F | L3L4CSUM_F)					\
+T(tso_ol3ol4csum,			1, 0, 0, 0, 1, 0,	6,	\
+		TSO_F | OL3OL4CSUM_F)					\
+T(tso_ol3ol4csum_l3l4csum,		1, 0, 0, 0, 1, 1,	6,	\
+		TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)			\
+T(tso_vlan,				1, 0, 0, 1, 0, 0,	6,	\
+		TSO_F | VLAN_F)						\
+T(tso_vlan_l3l4csum,			1, 0, 0, 1, 0, 1,	6,	\
+		TSO_F | VLAN_F | L3L4CSUM_F)				\
+T(tso_vlan_ol3ol4csum,			1, 0, 0, 1, 1, 0,	6,	\
+		TSO_F | VLAN_F | OL3OL4CSUM_F)				\
+T(tso_vlan_ol3ol4csum_l3l4csum,		1, 0, 0, 1, 1, 1,	6,	\
+		TSO_F | VLAN_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
+T(tso_noff,				1, 0, 1, 0, 0, 0,	6,	\
+		TSO_F | NOFF_F)						\
+T(tso_noff_l3l4csum,			1, 0, 1, 0, 0, 1,	6,	\
+		TSO_F | NOFF_F | L3L4CSUM_F)				\
+T(tso_noff_ol3ol4csum,			1, 0, 1, 0, 1, 0,	6,	\
+		TSO_F | NOFF_F | OL3OL4CSUM_F)				\
+T(tso_noff_ol3ol4csum_l3l4csum,		1, 0, 1, 0, 1, 1,	6,	\
+		TSO_F | NOFF_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
+T(tso_noff_vlan,			1, 0, 1, 1, 0, 0,	6,	\
+		TSO_F | NOFF_F | VLAN_F)				\
+T(tso_noff_vlan_l3l4csum,		1, 0, 1, 1, 0, 1,	6,	\
+		TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)			\
+T(tso_noff_vlan_ol3ol4csum,		1, 0, 1, 1, 1, 0,	6,	\
+		TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)			\
+T(tso_noff_vlan_ol3ol4csum_l3l4csum,	1, 0, 1, 1, 1, 1,	6,	\
+		TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+T(tso_ts,				1, 1, 0, 0, 0, 0,	8,	\
+		TSO_F | TSP_F)						\
+T(tso_ts_l3l4csum,			1, 1, 0, 0, 0, 1,	8,	\
+		TSO_F | TSP_F | L3L4CSUM_F)				\
+T(tso_ts_ol3ol4csum,			1, 1, 0, 0, 1, 0,	8,	\
+		TSO_F | TSP_F | OL3OL4CSUM_F)				\
+T(tso_ts_ol3ol4csum_l3l4csum,		1, 1, 0, 0, 1, 1,	8,	\
+		TSO_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(tso_ts_vlan,				1, 1, 0, 1, 0, 0,	8,	\
+		TSO_F | TSP_F | VLAN_F)					\
+T(tso_ts_vlan_l3l4csum,			1, 1, 0, 1, 0, 1,	8,	\
+		TSO_F | TSP_F | VLAN_F | L3L4CSUM_F)			\
+T(tso_ts_vlan_ol3ol4csum,		1, 1, 0, 1, 1, 0,	8,	\
+		TSO_F | TSP_F | VLAN_F | OL3OL4CSUM_F)			\
+T(tso_ts_vlan_ol3ol4csum_l3l4csum,	1, 1, 0, 1, 1, 1,	8,	\
+		TSO_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+T(tso_ts_noff,				1, 1, 1, 0, 0, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F)					\
+T(tso_ts_noff_l3l4csum,			1, 1, 1, 0, 0, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | L3L4CSUM_F)			\
+T(tso_ts_noff_ol3ol4csum,		1, 1, 1, 0, 1, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F | OL3OL4CSUM_F)			\
+T(tso_ts_noff_ol3ol4csum_l3l4csum,	1, 1, 1, 0, 1, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+T(tso_ts_noff_vlan,			1, 1, 1, 1, 0, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F)			\
+T(tso_ts_noff_vlan_l3l4csum,		1, 1, 1, 1, 0, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)		\
+T(tso_ts_noff_vlan_ol3ol4csum,		1, 1, 1, 1, 1, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)		\
+T(tso_ts_noff_vlan_ol3ol4csum_l3l4csum,	1, 1, 1, 1, 1, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
 #endif /* __OTX2_TX_H__ */
-- 
2.8.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/octeontx2: add TSO offload support
  2019-09-25  4:54 ` [dpdk-dev] [PATCH v2] net/octeontx2: add " Nithin Dabilpuram
@ 2019-10-04  7:57   ` Jerin Jacob
  0 siblings, 0 replies; 4+ messages in thread
From: Jerin Jacob @ 2019-10-04  7:57 UTC (permalink / raw)
  To: Nithin Dabilpuram
  Cc: Jerin Jacob, Kiran Kumar K, John McNamara, Marko Kovacevic,
	Vamsi Attunuru, Pavan Nikhilesh, dpdk-dev, Ferruh Yigit

On Wed, Sep 25, 2019 at 10:28 AM Nithin Dabilpuram
<ndabilpuram@marvell.com> wrote:
>
> Add support to below TCP segmentation offloads for
> 96XX A1 onwards and 95xx B0 onwards.
> - TCPv4, TCPv6
> - VXLAN[v4 | v6][v4 | v6]
> - GENEVE[v4 | v6][v4 | v6]
>
> This patch also modifies a fastpath function to be forced
> inline due to performance reasons for multi-seg mode.
>
> Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
> ---
>

Updated missing TSO support in doc/guides/nics/features/octeontx2_vf.ini.
Only in vector mode, TSO is not supported.

Applied to dpdk-next-net-mrvl/master. Thanks

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [dpdk-dev] [PATCH v2] net/octeontx2: add TSO offload support
  2019-09-25  4:54 [dpdk-dev] [PATCH] net/octeontx2: add GRE " Nithin Dabilpuram
@ 2019-09-25  4:54 ` Nithin Dabilpuram
  2019-10-04  7:57   ` Jerin Jacob
  0 siblings, 1 reply; 4+ messages in thread
From: Nithin Dabilpuram @ 2019-09-25  4:54 UTC (permalink / raw)
  To: Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K, John McNamara,
	Marko Kovacevic, Vamsi Attunuru, Pavan Nikhilesh
  Cc: dev

Add support to below TCP segmentation offloads for
96XX A1 onwards and 95xx B0 onwards.
- TCPv4, TCPv6
- VXLAN[v4 | v6][v4 | v6]
- GENEVE[v4 | v6][v4 | v6]

This patch also modifies a fastpath function to be forced
inline due to performance reasons for multi-seg mode.

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---

v2:
* Remove duplicate NIX_LSO_FIELD_MAX define

 doc/guides/nics/features/octeontx2.ini     |   1 +
 drivers/common/octeontx2/hw/otx2_nix.h     |  12 ++
 drivers/common/octeontx2/otx2_common.h     |  17 ++
 drivers/common/octeontx2/otx2_mbox.h       |   1 -
 drivers/event/octeontx2/otx2_evdev.c       |  30 +--
 drivers/event/octeontx2/otx2_evdev.h       |   2 +-
 drivers/event/octeontx2/otx2_worker.c      |   4 +-
 drivers/event/octeontx2/otx2_worker.h      |   2 +
 drivers/event/octeontx2/otx2_worker_dual.c |   4 +-
 drivers/net/octeontx2/otx2_ethdev.c        | 222 ++++++++++++++++++++-
 drivers/net/octeontx2/otx2_ethdev.h        |   4 +
 drivers/net/octeontx2/otx2_tx.c            |  55 ++++--
 drivers/net/octeontx2/otx2_tx.h            | 298 ++++++++++++++++++++++-------
 13 files changed, 545 insertions(+), 107 deletions(-)

diff --git a/doc/guides/nics/features/octeontx2.ini b/doc/guides/nics/features/octeontx2.ini
index 6695232..5b73c8b 100644
--- a/doc/guides/nics/features/octeontx2.ini
+++ b/doc/guides/nics/features/octeontx2.ini
@@ -17,6 +17,7 @@ Fast mbuf free       = Y
 Free Tx mbuf on demand = Y
 Queue start/stop     = Y
 MTU update           = Y
+TSO                  = Y
 Promiscuous mode     = Y
 Allmulticast mode    = Y
 Unicast MAC filter   = Y
diff --git a/drivers/common/octeontx2/hw/otx2_nix.h b/drivers/common/octeontx2/hw/otx2_nix.h
index d5ad988..e3b6850 100644
--- a/drivers/common/octeontx2/hw/otx2_nix.h
+++ b/drivers/common/octeontx2/hw/otx2_nix.h
@@ -1376,4 +1376,16 @@ struct nix_lso_format {
 	uint64_t rsvd_19_63  : 45;
 };
 
+#define NIX_LSO_FIELD_MAX	(8)
+#define NIX_LSO_FIELD_ALG_MASK	GENMASK(18, 16)
+#define NIX_LSO_FIELD_SZ_MASK	GENMASK(13, 12)
+#define NIX_LSO_FIELD_LY_MASK	GENMASK(9, 8)
+#define NIX_LSO_FIELD_OFF_MASK	GENMASK(7, 0)
+
+#define NIX_LSO_FIELD_MASK			\
+		(NIX_LSO_FIELD_OFF_MASK |	\
+		 NIX_LSO_FIELD_LY_MASK |	\
+		 NIX_LSO_FIELD_SZ_MASK |	\
+		 NIX_LSO_FIELD_ALG_MASK)
+
 #endif /* __OTX2_NIX_HW_H__ */
diff --git a/drivers/common/octeontx2/otx2_common.h b/drivers/common/octeontx2/otx2_common.h
index a155462..0d0b87e 100644
--- a/drivers/common/octeontx2/otx2_common.h
+++ b/drivers/common/octeontx2/otx2_common.h
@@ -31,6 +31,23 @@
 #define BIT(nr)     (1UL << (nr))
 #endif
 
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#endif
+#ifndef BITS_PER_LONG_LONG
+#define BITS_PER_LONG_LONG (__SIZEOF_LONG_LONG__ * 8)
+#endif
+
+#ifndef GENMASK
+#define GENMASK(h, l) \
+		(((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+#endif
+#ifndef GENMASK_ULL
+#define GENMASK_ULL(h, l) \
+	(((~0ULL) - (1ULL << (l)) + 1) & \
+	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+#endif
+
 /* Compiler attributes */
 #ifndef __hot
 #define __hot   __attribute__((hot))
diff --git a/drivers/common/octeontx2/otx2_mbox.h b/drivers/common/octeontx2/otx2_mbox.h
index 9652f4d..8f6aeb6 100644
--- a/drivers/common/octeontx2/otx2_mbox.h
+++ b/drivers/common/octeontx2/otx2_mbox.h
@@ -869,7 +869,6 @@ struct nix_mark_format_cfg_rsp {
 struct nix_lso_format_cfg {
 	struct mbox_msghdr hdr;
 	uint64_t __otx2_io field_mask;
-#define NIX_LSO_FIELD_MAX	(8)
 	uint64_t __otx2_io fields[NIX_LSO_FIELD_MAX];
 };
 
diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c
index 160ac94..4d8860f 100644
--- a/drivers/event/octeontx2/otx2_evdev.c
+++ b/drivers/event/octeontx2/otx2_evdev.c
@@ -169,33 +169,35 @@ SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 	};
 
 	/* Tx modes */
-	const event_tx_adapter_enqueue ssogws_tx_adptr_enq[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =  otx2_ssogws_tx_adptr_enq_ ## name,
+	const event_tx_adapter_enqueue ssogws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =  otx2_ssogws_tx_adptr_enq_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 	};
 
 	const event_tx_adapter_enqueue
-		ssogws_tx_adptr_enq_seg[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =  otx2_ssogws_tx_adptr_enq_seg_ ## name,
+		ssogws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =				\
+			otx2_ssogws_tx_adptr_enq_seg_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 	};
 
 	const event_tx_adapter_enqueue
-		ssogws_dual_tx_adptr_enq[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =  otx2_ssogws_dual_tx_adptr_enq_ ## name,
+		ssogws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =				\
+			otx2_ssogws_dual_tx_adptr_enq_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 	};
 
 	const event_tx_adapter_enqueue
-		ssogws_dual_tx_adptr_enq_seg[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-		[f4][f3][f2][f1][f0] =					\
+		ssogws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+		[f5][f4][f3][f2][f1][f0] =				\
 			otx2_ssogws_dual_tx_adptr_enq_seg_ ## name,
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -274,6 +276,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
 		/* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
 		event_dev->txa_enqueue = ssogws_tx_adptr_enq_seg
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
@@ -281,6 +284,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
 	} else {
 		event_dev->txa_enqueue = ssogws_tx_adptr_enq
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
 			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
@@ -406,6 +410,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 		if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
 		/* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
 			event_dev->txa_enqueue = ssogws_dual_tx_adptr_enq_seg
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 				[!!(dev->tx_offloads &
 						NIX_TX_OFFLOAD_MBUF_NOFF_F)]
@@ -417,6 +422,7 @@ SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 						NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
 		} else {
 			event_dev->txa_enqueue = ssogws_dual_tx_adptr_enq
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
 				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
 				[!!(dev->tx_offloads &
 						NIX_TX_OFFLOAD_MBUF_NOFF_F)]
diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
index 5cd80e3b2..530060f 100644
--- a/drivers/event/octeontx2/otx2_evdev.h
+++ b/drivers/event/octeontx2/otx2_evdev.h
@@ -331,7 +331,7 @@ uint16_t otx2_ssogws_dual_deq_seg_timeout_burst_ ##name(void *port,	       \
 SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 #undef R
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				     \
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			     \
 uint16_t otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],\
 					   uint16_t nb_events);		     \
 uint16_t otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port,		     \
diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
index cd14cd3..34d39f4 100644
--- a/drivers/event/octeontx2/otx2_worker.c
+++ b/drivers/event/octeontx2/otx2_worker.c
@@ -267,7 +267,7 @@ otx2_ssogws_enq_fwd_burst(void *port, const struct rte_event ev[],
 	return 1;
 }
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 				  uint16_t nb_events)			\
@@ -281,7 +281,7 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
 				      uint16_t nb_events)		\
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 76f91bb..4e971f2 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -256,6 +256,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
 	struct rte_mbuf *m = ev[0].mbuf;
 	const struct otx2_eth_txq *txq = otx2_ssogws_xtract_meta(m);
 
+	/* Perform header writes before barrier for TSO */
+	otx2_nix_xmit_prepare_tso(m, flags);
 	otx2_ssogws_head_wait(ws, !ev->sched_type);
 	otx2_ssogws_prepare_pkt(txq, m, cmd, flags);
 
diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
index 37c274a..ff68472 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.c
+++ b/drivers/event/octeontx2/otx2_worker_dual.c
@@ -306,7 +306,7 @@ otx2_ssogws_dual_deq_seg_timeout_burst_ ##name(void *port,		\
 SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 #undef R
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 				       struct rte_event ev[],		\
@@ -323,7 +323,7 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 uint16_t __hot								\
 otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,			\
 					   struct rte_event ev[],	\
diff --git a/drivers/net/octeontx2/otx2_ethdev.c b/drivers/net/octeontx2/otx2_ethdev.c
index b84128f..c447f83 100644
--- a/drivers/net/octeontx2/otx2_ethdev.c
+++ b/drivers/net/octeontx2/otx2_ethdev.c
@@ -27,9 +27,14 @@ nix_get_rx_offload_capa(struct otx2_eth_dev *dev)
 static inline uint64_t
 nix_get_tx_offload_capa(struct otx2_eth_dev *dev)
 {
-	RTE_SET_USED(dev);
+	uint64_t capa = NIX_TX_OFFLOAD_CAPA;
 
-	return NIX_TX_OFFLOAD_CAPA;
+	/* TSO not supported for earlier chip revisions */
+	if (otx2_dev_is_96xx_A0(dev) || otx2_dev_is_95xx_Ax(dev))
+		capa &= ~(DEV_TX_OFFLOAD_TCP_TSO |
+			  DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+			  DEV_TX_OFFLOAD_GENEVE_TNL_TSO);
+	return capa;
 }
 
 static const struct otx2_dev_ops otx2_dev_ops = {
@@ -643,6 +648,18 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	if (conf & DEV_TX_OFFLOAD_MULTI_SEGS)
 		flags |= NIX_TX_MULTI_SEG_F;
 
+	/* Enable Inner checksum for TSO */
+	if (conf & DEV_TX_OFFLOAD_TCP_TSO)
+		flags |= (NIX_TX_OFFLOAD_TSO_F |
+			  NIX_TX_OFFLOAD_L3_L4_CSUM_F);
+
+	/* Enable Inner and Outer checksum for Tunnel TSO */
+	if (conf & (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+		    DEV_TX_OFFLOAD_GENEVE_TNL_TSO))
+		flags |= (NIX_TX_OFFLOAD_TSO_F |
+			  NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+			  NIX_TX_OFFLOAD_L3_L4_CSUM_F);
+
 	if ((dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP))
 		flags |= NIX_TX_OFFLOAD_TSTAMP_F;
 
@@ -1205,6 +1222,201 @@ nix_set_nop_rxtx_function(struct rte_eth_dev *eth_dev)
 	rte_mb();
 }
 
+static void
+nix_lso_tcp(struct nix_lso_format_cfg *req, bool v4)
+{
+	volatile struct nix_lso_format *field;
+
+	/* Format works only with TCP packet marked by OL3/OL4 */
+	field = (volatile struct nix_lso_format *)&req->fields[0];
+	req->field_mask = NIX_LSO_FIELD_MASK;
+	/* Outer IPv4/IPv6 */
+	field->layer = NIX_TXLAYER_OL3;
+	field->offset = v4 ? 2 : 4;
+	field->sizem1 = 1; /* 2B */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+	if (v4) {
+		/* IPID field */
+		field->layer = NIX_TXLAYER_OL3;
+		field->offset = 4;
+		field->sizem1 = 1;
+		/* Incremented linearly per segment */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* TCP sequence number update */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 4;
+	field->sizem1 = 3; /* 4 bytes */
+	field->alg = NIX_LSOALG_ADD_OFFSET;
+	field++;
+	/* TCP flags field */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 12;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_TCP_FLAGS;
+	field++;
+}
+
+static void
+nix_lso_udp_tun_tcp(struct nix_lso_format_cfg *req,
+		    bool outer_v4, bool inner_v4)
+{
+	volatile struct nix_lso_format *field;
+
+	field = (volatile struct nix_lso_format *)&req->fields[0];
+	req->field_mask = NIX_LSO_FIELD_MASK;
+	/* Outer IPv4/IPv6 len */
+	field->layer = NIX_TXLAYER_OL3;
+	field->offset = outer_v4 ? 2 : 4;
+	field->sizem1 = 1; /* 2B */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+	if (outer_v4) {
+		/* IPID */
+		field->layer = NIX_TXLAYER_OL3;
+		field->offset = 4;
+		field->sizem1 = 1;
+		/* Incremented linearly per segment */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* Outer UDP length */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 4;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+
+	/* Inner IPv4/IPv6 */
+	field->layer = NIX_TXLAYER_IL3;
+	field->offset = inner_v4 ? 2 : 4;
+	field->sizem1 = 1; /* 2B */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+	if (inner_v4) {
+		/* IPID field */
+		field->layer = NIX_TXLAYER_IL3;
+		field->offset = 4;
+		field->sizem1 = 1;
+		/* Incremented linearly per segment */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* TCP sequence number update */
+	field->layer = NIX_TXLAYER_IL4;
+	field->offset = 4;
+	field->sizem1 = 3; /* 4 bytes */
+	field->alg = NIX_LSOALG_ADD_OFFSET;
+	field++;
+
+	/* TCP flags field */
+	field->layer = NIX_TXLAYER_IL4;
+	field->offset = 12;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_TCP_FLAGS;
+	field++;
+}
+
+static int
+nix_setup_lso_formats(struct otx2_eth_dev *dev)
+{
+	struct otx2_mbox *mbox = dev->mbox;
+	struct nix_lso_format_cfg_rsp *rsp;
+	struct nix_lso_format_cfg *req;
+	uint8_t base;
+	int rc;
+
+	/* Skip if TSO was not requested */
+	if (!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+		return 0;
+	/*
+	 * IPv4/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_tcp(req, true);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	base = rsp->lso_format_idx;
+	if (base != NIX_LSO_FORMAT_IDX_TSOV4)
+		return -EFAULT;
+	dev->lso_base_idx = base;
+	otx2_nix_dbg("tcpv4 lso fmt=%u", base);
+
+
+	/*
+	 * IPv6/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_tcp(req, false);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 1)
+		return -EFAULT;
+	otx2_nix_dbg("tcpv6 lso fmt=%u\n", base + 1);
+
+	/*
+	 * IPv4/UDP/TUN HDR/IPv4/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, true, true);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 2)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v4v4 fmt=%u\n", base + 2);
+
+	/*
+	 * IPv4/UDP/TUN HDR/IPv6/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, true, false);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 3)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v4v6 fmt=%u\n", base + 3);
+
+	/*
+	 * IPv6/UDP/TUN HDR/IPv4/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, false, true);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+
+	if (rsp->lso_format_idx != base + 4)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v6v4 fmt=%u\n", base + 4);
+
+	/*
+	 * IPv6/UDP/TUN HDR/IPv6/TCP LSO
+	 */
+	req = otx2_mbox_alloc_msg_nix_lso_format_cfg(mbox);
+	nix_lso_udp_tun_tcp(req, false, false);
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		return rc;
+	if (rsp->lso_format_idx != base + 5)
+		return -EFAULT;
+	otx2_nix_dbg("udp tun v6v6 fmt=%u\n", base + 5);
+
+	return 0;
+}
+
 static int
 otx2_nix_configure(struct rte_eth_dev *eth_dev)
 {
@@ -1292,6 +1504,12 @@ otx2_nix_configure(struct rte_eth_dev *eth_dev)
 		goto fail_offloads;
 	}
 
+	rc = nix_setup_lso_formats(dev);
+	if (rc) {
+		otx2_err("failed to setup nix lso format fields, rc=%d", rc);
+		goto free_nix_lf;
+	}
+
 	/* Configure RSS */
 	rc = otx2_nix_rss_config(eth_dev);
 	if (rc) {
diff --git a/drivers/net/octeontx2/otx2_ethdev.h b/drivers/net/octeontx2/otx2_ethdev.h
index 5de0a1d..38f341e 100644
--- a/drivers/net/octeontx2/otx2_ethdev.h
+++ b/drivers/net/octeontx2/otx2_ethdev.h
@@ -127,6 +127,9 @@
 	DEV_TX_OFFLOAD_TCP_CKSUM	| \
 	DEV_TX_OFFLOAD_UDP_CKSUM	| \
 	DEV_TX_OFFLOAD_SCTP_CKSUM	| \
+	DEV_TX_OFFLOAD_TCP_TSO		| \
+	DEV_TX_OFFLOAD_VXLAN_TNL_TSO    | \
+	DEV_TX_OFFLOAD_GENEVE_TNL_TSO   | \
 	DEV_TX_OFFLOAD_MULTI_SEGS	| \
 	DEV_TX_OFFLOAD_IPV4_CKSUM)
 
@@ -249,6 +252,7 @@ struct otx2_eth_dev {
 	uint8_t tx_chan_cnt;
 	uint8_t lso_tsov4_idx;
 	uint8_t lso_tsov6_idx;
+	uint8_t lso_base_idx;
 	uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
 	uint8_t mkex_pfl_name[MKEX_NAME_LEN];
 	uint8_t max_mac_entries;
diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
index 0dcadff..fa53300 100644
--- a/drivers/net/octeontx2/otx2_tx.c
+++ b/drivers/net/octeontx2/otx2_tx.c
@@ -32,6 +32,12 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	otx2_lmt_mov(cmd, &txq->cmd[0], otx2_nix_tx_ext_subs(flags));
 
+	/* Perform header writes before barrier for TSO */
+	if (flags & NIX_TX_OFFLOAD_TSO_F) {
+		for (i = 0; i < pkts; i++)
+			otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
+	}
+
 	/* Lets commit any changes in the packet */
 	rte_cio_wmb();
 
@@ -62,6 +68,12 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	otx2_lmt_mov(cmd, &txq->cmd[0], otx2_nix_tx_ext_subs(flags));
 
+	/* Perform header writes before barrier for TSO */
+	if (flags & NIX_TX_OFFLOAD_TSO_F) {
+		for (i = 0; i < pkts; i++)
+			otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
+	}
+
 	/* Lets commit any changes in the packet */
 	rte_cio_wmb();
 
@@ -933,26 +945,34 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 #endif
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 static uint16_t __rte_noinline	__hot					\
 otx2_nix_xmit_pkts_ ## name(void *tx_queue,				\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
 {									\
 	uint64_t cmd[sz];						\
 									\
+	/* For TSO inner checksum is a must */				\
+	if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&				\
+	    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))			\
+		return 0;						\
 	return nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, flags);	\
 }
 
 NIX_TX_FASTPATH_MODES
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 static uint16_t __rte_noinline	__hot					\
 otx2_nix_xmit_pkts_mseg_ ## name(void *tx_queue,			\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
 {									\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
 									\
+	/* For TSO inner checksum is a must */				\
+	if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&				\
+	    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))			\
+		return 0;						\
 	return nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,		\
 				  (flags) | NIX_TX_MULTI_SEG_F);	\
 }
@@ -960,14 +980,15 @@ otx2_nix_xmit_pkts_mseg_ ## name(void *tx_queue,			\
 NIX_TX_FASTPATH_MODES
 #undef T
 
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
 static uint16_t __rte_noinline	__hot					\
 otx2_nix_xmit_pkts_vec_ ## name(void *tx_queue,				\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
 {									\
-	/* VLAN and TSTMP is not supported by vec */			\
+	/* VLAN, TSTMP, TSO is not supported by vec */			\
 	if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F ||			\
-	    (flags) & NIX_TX_OFFLOAD_TSTAMP_F)				\
+	    (flags) & NIX_TX_OFFLOAD_TSTAMP_F ||			\
+	    (flags) & NIX_TX_OFFLOAD_TSO_F)				\
 		return 0;						\
 	return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, (flags));	\
 }
@@ -977,12 +998,13 @@ NIX_TX_FASTPATH_MODES
 
 static inline void
 pick_tx_func(struct rte_eth_dev *eth_dev,
-	     const eth_tx_burst_t tx_burst[2][2][2][2][2])
+	     const eth_tx_burst_t tx_burst[2][2][2][2][2][2])
 {
 	struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
 
 	/* [TSTMP] [NOFF] [VLAN] [OL3_OL4_CSUM] [IL3_IL4_CSUM] */
 	eth_dev->tx_pkt_burst = tx_burst
+		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)]
 		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSTAMP_F)]
 		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
 		[!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
@@ -995,25 +1017,25 @@ otx2_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 {
 	struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
 
-	const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-	[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_ ## name,
+	const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+	[f5][f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_ ## name,
 
 NIX_TX_FASTPATH_MODES
 #undef T
 	};
 
-	const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-	[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_mseg_ ## name,
+	const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+	[f5][f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_mseg_ ## name,
 
 NIX_TX_FASTPATH_MODES
 #undef T
 	};
 
-	const eth_tx_burst_t nix_eth_tx_vec_burst[2][2][2][2][2] = {
-#define T(name, f4, f3, f2, f1, f0, sz, flags)				\
-	[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_vec_ ## name,
+	const eth_tx_burst_t nix_eth_tx_vec_burst[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			\
+	[f5][f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_vec_ ## name,
 
 NIX_TX_FASTPATH_MODES
 #undef T
@@ -1021,7 +1043,8 @@ NIX_TX_FASTPATH_MODES
 
 	if (dev->scalar_ena ||
 	    (dev->tx_offload_flags &
-	     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F)))
+	     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
+	      NIX_TX_OFFLOAD_TSO_F)))
 		pick_tx_func(eth_dev, nix_eth_tx_burst);
 	else
 		pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h
index 82a6af6..e919198 100644
--- a/drivers/net/octeontx2/otx2_tx.h
+++ b/drivers/net/octeontx2/otx2_tx.h
@@ -11,6 +11,7 @@
 #define NIX_TX_OFFLOAD_VLAN_QINQ_F	BIT(2)
 #define NIX_TX_OFFLOAD_MBUF_NOFF_F	BIT(3)
 #define NIX_TX_OFFLOAD_TSTAMP_F		BIT(4)
+#define NIX_TX_OFFLOAD_TSO_F		BIT(5)
 
 /* Flags to control xmit_prepare function.
  * Defining it from backwards to denote its been
@@ -20,10 +21,18 @@
 
 #define NIX_TX_NEED_SEND_HDR_W1	\
 	(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |	\
-	 NIX_TX_OFFLOAD_VLAN_QINQ_F)
+	 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
 
 #define NIX_TX_NEED_EXT_HDR \
-	(NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F)
+	(NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
+	 NIX_TX_OFFLOAD_TSO_F)
+
+#define NIX_UDP_TUN_BITMASK \
+	((1ull << (PKT_TX_TUNNEL_VXLAN >> 45)) | \
+	 (1ull << (PKT_TX_TUNNEL_GENEVE >> 45)))
+
+#define NIX_LSO_FORMAT_IDX_TSOV4	(0)
+#define NIX_LSO_FORMAT_IDX_TSOV6	(1)
 
 /* Function to determine no of tx subdesc required in case ext
  * sub desc is enabled.
@@ -32,7 +41,8 @@ static __rte_always_inline int
 otx2_nix_tx_ext_subs(const uint16_t flags)
 {
 	return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 2 :
-		((flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) ? 1 : 0);
+		((flags & (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
+		 1 : 0);
 }
 
 static __rte_always_inline void
@@ -135,7 +145,57 @@ otx2_nix_prefree_seg(struct rte_mbuf *m)
 	return 1;
 }
 
-static inline void
+static __rte_always_inline void
+otx2_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
+{
+	uint64_t mask, ol_flags = m->ol_flags;
+
+	if (flags & NIX_TX_OFFLOAD_TSO_F &&
+	    (ol_flags & PKT_TX_TCP_SEG)) {
+		uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
+		uint16_t *iplen, *oiplen, *oudplen;
+		uint16_t lso_sb, paylen;
+
+		mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
+		lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
+			m->l2_len + m->l3_len + m->l4_len;
+
+		/* Reduce payload len from base headers */
+		paylen = m->pkt_len - lso_sb;
+
+		/* Get iplen position assuming no tunnel hdr */
+		iplen = (uint16_t *)(mdata + m->l2_len +
+				     (2 << !!(ol_flags & PKT_TX_IPV6)));
+		/* Handle tunnel tso */
+		if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+		    (ol_flags & PKT_TX_TUNNEL_MASK)) {
+			const uint8_t is_udp_tun = (NIX_UDP_TUN_BITMASK >>
+				((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & 0x1;
+
+			oiplen = (uint16_t *)(mdata + m->outer_l2_len +
+				(2 << !!(ol_flags & PKT_TX_OUTER_IPV6)));
+			*oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
+						   paylen);
+
+			/* Update format for UDP tunneled packet */
+			if (is_udp_tun) {
+				oudplen = (uint16_t *)(mdata + m->outer_l2_len +
+						       m->outer_l3_len + 4);
+				*oudplen =
+				rte_cpu_to_be_16(rte_be_to_cpu_16(*oudplen) -
+						 paylen);
+			}
+
+			/* Update iplen position to inner ip hdr */
+			iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
+				m->l4_len + (2 << !!(ol_flags & PKT_TX_IPV6)));
+		}
+
+		*iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
+	}
+}
+
+static __rte_always_inline void
 otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 {
 	struct nix_send_ext_s *send_hdr_ext;
@@ -258,6 +318,37 @@ otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 		send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
 	}
 
+	if (flags & NIX_TX_OFFLOAD_TSO_F &&
+	    (ol_flags & PKT_TX_TCP_SEG)) {
+		uint16_t lso_sb;
+		uint64_t mask;
+
+		mask = -(!w1.il3type);
+		lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
+
+		send_hdr_ext->w0.lso_sb = lso_sb;
+		send_hdr_ext->w0.lso = 1;
+		send_hdr_ext->w0.lso_mps = m->tso_segsz;
+		send_hdr_ext->w0.lso_format =
+			NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+		w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+		/* Handle tunnel tso */
+		if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+		    (ol_flags & PKT_TX_TUNNEL_MASK)) {
+			const uint8_t is_udp_tun = (NIX_UDP_TUN_BITMASK >>
+				((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & 0x1;
+
+			w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+			w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+			/* Update format for UDP tunneled packet */
+			send_hdr_ext->w0.lso_format += (is_udp_tun << 1);
+
+			send_hdr_ext->w0.lso_format +=
+				!!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
+		}
+	}
+
 	if (flags & NIX_TX_NEED_SEND_HDR_W1)
 		send_hdr->w1.u = w1.u;
 
@@ -378,72 +469,137 @@ otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr,
 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
+#define TSO_F        NIX_TX_OFFLOAD_TSO_F
 
-/* [TSTMP] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
-#define NIX_TX_FASTPATH_MODES					\
-T(no_offload,				0, 0, 0, 0, 0,	4,	\
-		NIX_TX_OFFLOAD_NONE)				\
-T(l3l4csum,				0, 0, 0, 0, 1,	4,	\
-		L3L4CSUM_F)					\
-T(ol3ol4csum,				0, 0, 0, 1, 0,	4,	\
-		OL3OL4CSUM_F)					\
-T(ol3ol4csum_l3l4csum,			0, 0, 0, 1, 1,	4,	\
-		OL3OL4CSUM_F | L3L4CSUM_F)			\
-T(vlan,					0, 0, 1, 0, 0,	6,	\
-		VLAN_F)						\
-T(vlan_l3l4csum,			0, 0, 1, 0, 1,	6,	\
-		VLAN_F | L3L4CSUM_F)				\
-T(vlan_ol3ol4csum,			0, 0, 1, 1, 0,	6,	\
-		VLAN_F | OL3OL4CSUM_F)				\
-T(vlan_ol3ol4csum_l3l4csum,		0, 0, 1, 1, 1,	6,	\
-		VLAN_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
-T(noff,					0, 1, 0, 0, 0,	4,	\
-		NOFF_F)						\
-T(noff_l3l4csum,			0, 1, 0, 0, 1,	4,	\
-		NOFF_F | L3L4CSUM_F)				\
-T(noff_ol3ol4csum,			0, 1, 0, 1, 0,	4,	\
-		NOFF_F | OL3OL4CSUM_F)				\
-T(noff_ol3ol4csum_l3l4csum,		0, 1, 0, 1, 1,	4,	\
-		NOFF_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
-T(noff_vlan,				0, 1, 1, 0, 0,	6,	\
-		NOFF_F | VLAN_F)				\
-T(noff_vlan_l3l4csum,			0, 1, 1, 0, 1,	6,	\
-		NOFF_F | VLAN_F | L3L4CSUM_F)			\
-T(noff_vlan_ol3ol4csum,			0, 1, 1, 1, 0,	6,	\
-		NOFF_F | VLAN_F | OL3OL4CSUM_F)			\
-T(noff_vlan_ol3ol4csum_l3l4csum,	0, 1, 1, 1, 1,	6,	\
-		NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
-T(ts,					1, 0, 0, 0, 0,	8,	\
-		TSP_F)						\
-T(ts_l3l4csum,			1, 0, 0, 0, 1,	8,		\
-		TSP_F | L3L4CSUM_F)				\
-T(ts_ol3ol4csum,			1, 0, 0, 1, 0,	8,	\
-		TSP_F | OL3OL4CSUM_F)				\
-T(ts_ol3ol4csum_l3l4csum,		1, 0, 0, 1, 1,	8,	\
-		TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
-T(ts_vlan,				1, 0, 1, 0, 0,	8,	\
-		TSP_F | VLAN_F)					\
-T(ts_vlan_l3l4csum,			1, 0, 1, 0, 1,	8,	\
-		TSP_F | VLAN_F | L3L4CSUM_F)			\
-T(ts_vlan_ol3ol4csum,		1, 0, 1, 1, 0,	8,		\
-		TSP_F | VLAN_F | OL3OL4CSUM_F)			\
-T(ts_vlan_ol3ol4csum_l3l4csum,	1, 0, 1, 1, 1,	8,		\
-		TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
-T(ts_noff,				1, 1, 0, 0, 0,	8,	\
-		TSP_F | NOFF_F)					\
-T(ts_noff_l3l4csum,			1, 1, 0, 0, 1,	8,	\
-		TSP_F | NOFF_F | L3L4CSUM_F)			\
-T(ts_noff_ol3ol4csum,		1, 1, 0, 1, 0,	8,		\
-		TSP_F | NOFF_F | OL3OL4CSUM_F)			\
-T(ts_noff_ol3ol4csum_l3l4csum,	1, 1, 0, 1, 1,	8,		\
-		TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
-T(ts_noff_vlan,			1, 1, 1, 0, 0,	8,		\
-		TSP_F | NOFF_F | VLAN_F)			\
-T(ts_noff_vlan_l3l4csum,		1, 1, 1, 0, 1,	8,	\
-		TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)		\
-T(ts_noff_vlan_ol3ol4csum,		1, 1, 1, 1, 0,	8,	\
-		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)		\
-T(ts_noff_vlan_ol3ol4csum_l3l4csum,	1, 1, 1, 1, 1,	8,	\
-		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
-
+/* [TSO] [TSTMP] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
+#define NIX_TX_FASTPATH_MODES						\
+T(no_offload,				0, 0, 0, 0, 0, 0,	4,	\
+		NIX_TX_OFFLOAD_NONE)					\
+T(l3l4csum,				0, 0, 0, 0, 0, 1,	4,	\
+		L3L4CSUM_F)						\
+T(ol3ol4csum,				0, 0, 0, 0, 1, 0,	4,	\
+		OL3OL4CSUM_F)						\
+T(ol3ol4csum_l3l4csum,			0, 0, 0, 0, 1, 1,	4,	\
+		OL3OL4CSUM_F | L3L4CSUM_F)				\
+T(vlan,					0, 0, 0, 1, 0, 0,	6,	\
+		VLAN_F)							\
+T(vlan_l3l4csum,			0, 0, 0, 1, 0, 1,	6,	\
+		VLAN_F | L3L4CSUM_F)					\
+T(vlan_ol3ol4csum,			0, 0, 0, 1, 1, 0,	6,	\
+		VLAN_F | OL3OL4CSUM_F)					\
+T(vlan_ol3ol4csum_l3l4csum,		0, 0, 0, 1, 1, 1,	6,	\
+		VLAN_F | OL3OL4CSUM_F |	L3L4CSUM_F)			\
+T(noff,					0, 0, 1, 0, 0, 0,	4,	\
+		NOFF_F)							\
+T(noff_l3l4csum,			0, 0, 1, 0, 0, 1,	4,	\
+		NOFF_F | L3L4CSUM_F)					\
+T(noff_ol3ol4csum,			0, 0, 1, 0, 1, 0,	4,	\
+		NOFF_F | OL3OL4CSUM_F)					\
+T(noff_ol3ol4csum_l3l4csum,		0, 0, 1, 0, 1, 1,	4,	\
+		NOFF_F | OL3OL4CSUM_F |	L3L4CSUM_F)			\
+T(noff_vlan,				0, 0, 1, 1, 0, 0,	6,	\
+		NOFF_F | VLAN_F)					\
+T(noff_vlan_l3l4csum,			0, 0, 1, 1, 0, 1,	6,	\
+		NOFF_F | VLAN_F | L3L4CSUM_F)				\
+T(noff_vlan_ol3ol4csum,			0, 0, 1, 1, 1, 0,	6,	\
+		NOFF_F | VLAN_F | OL3OL4CSUM_F)				\
+T(noff_vlan_ol3ol4csum_l3l4csum,	0, 0, 1, 1, 1, 1,	6,	\
+		NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(ts,					0, 1, 0, 0, 0, 0,	8,	\
+		TSP_F)							\
+T(ts_l3l4csum,				0, 1, 0, 0, 0, 1,	8,	\
+		TSP_F | L3L4CSUM_F)					\
+T(ts_ol3ol4csum,			0, 1, 0, 0, 1, 0,	8,	\
+		TSP_F | OL3OL4CSUM_F)					\
+T(ts_ol3ol4csum_l3l4csum,		0, 1, 0, 0, 1, 1,	8,	\
+		TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)			\
+T(ts_vlan,				0, 1, 0, 1, 0, 0,	8,	\
+		TSP_F | VLAN_F)						\
+T(ts_vlan_l3l4csum,			0, 1, 0, 1, 0, 1,	8,	\
+		TSP_F | VLAN_F | L3L4CSUM_F)				\
+T(ts_vlan_ol3ol4csum,			0, 1, 0, 1, 1, 0,	8,	\
+		TSP_F | VLAN_F | OL3OL4CSUM_F)				\
+T(ts_vlan_ol3ol4csum_l3l4csum,		0, 1, 0, 1, 1, 1,	8,	\
+		TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(ts_noff,				0, 1, 1, 0, 0, 0,	8,	\
+		TSP_F | NOFF_F)						\
+T(ts_noff_l3l4csum,			0, 1, 1, 0, 0, 1,	8,	\
+		TSP_F | NOFF_F | L3L4CSUM_F)				\
+T(ts_noff_ol3ol4csum,			0, 1, 1, 0, 1, 0,	8,	\
+		TSP_F | NOFF_F | OL3OL4CSUM_F)				\
+T(ts_noff_ol3ol4csum_l3l4csum,		0, 1, 1, 0, 1, 1,	8,	\
+		TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(ts_noff_vlan,				0, 1, 1, 1, 0, 0,	8,	\
+		TSP_F | NOFF_F | VLAN_F)				\
+T(ts_noff_vlan_l3l4csum,		0, 1, 1, 1, 0, 1,	8,	\
+		TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)			\
+T(ts_noff_vlan_ol3ol4csum,		0, 1, 1, 1, 1, 0,	8,	\
+		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)			\
+T(ts_noff_vlan_ol3ol4csum_l3l4csum,	0, 1, 1, 1, 1, 1,	8,	\
+		TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+									\
+T(tso,					1, 0, 0, 0, 0, 0,	6,	\
+		TSO_F)							\
+T(tso_l3l4csum,				1, 0, 0, 0, 0, 1,	6,	\
+		TSO_F | L3L4CSUM_F)					\
+T(tso_ol3ol4csum,			1, 0, 0, 0, 1, 0,	6,	\
+		TSO_F | OL3OL4CSUM_F)					\
+T(tso_ol3ol4csum_l3l4csum,		1, 0, 0, 0, 1, 1,	6,	\
+		TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)			\
+T(tso_vlan,				1, 0, 0, 1, 0, 0,	6,	\
+		TSO_F | VLAN_F)						\
+T(tso_vlan_l3l4csum,			1, 0, 0, 1, 0, 1,	6,	\
+		TSO_F | VLAN_F | L3L4CSUM_F)				\
+T(tso_vlan_ol3ol4csum,			1, 0, 0, 1, 1, 0,	6,	\
+		TSO_F | VLAN_F | OL3OL4CSUM_F)				\
+T(tso_vlan_ol3ol4csum_l3l4csum,		1, 0, 0, 1, 1, 1,	6,	\
+		TSO_F | VLAN_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
+T(tso_noff,				1, 0, 1, 0, 0, 0,	6,	\
+		TSO_F | NOFF_F)						\
+T(tso_noff_l3l4csum,			1, 0, 1, 0, 0, 1,	6,	\
+		TSO_F | NOFF_F | L3L4CSUM_F)				\
+T(tso_noff_ol3ol4csum,			1, 0, 1, 0, 1, 0,	6,	\
+		TSO_F | NOFF_F | OL3OL4CSUM_F)				\
+T(tso_noff_ol3ol4csum_l3l4csum,		1, 0, 1, 0, 1, 1,	6,	\
+		TSO_F | NOFF_F | OL3OL4CSUM_F |	L3L4CSUM_F)		\
+T(tso_noff_vlan,			1, 0, 1, 1, 0, 0,	6,	\
+		TSO_F | NOFF_F | VLAN_F)				\
+T(tso_noff_vlan_l3l4csum,		1, 0, 1, 1, 0, 1,	6,	\
+		TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)			\
+T(tso_noff_vlan_ol3ol4csum,		1, 0, 1, 1, 1, 0,	6,	\
+		TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)			\
+T(tso_noff_vlan_ol3ol4csum_l3l4csum,	1, 0, 1, 1, 1, 1,	6,	\
+		TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+T(tso_ts,				1, 1, 0, 0, 0, 0,	8,	\
+		TSO_F | TSP_F)						\
+T(tso_ts_l3l4csum,			1, 1, 0, 0, 0, 1,	8,	\
+		TSO_F | TSP_F | L3L4CSUM_F)				\
+T(tso_ts_ol3ol4csum,			1, 1, 0, 0, 1, 0,	8,	\
+		TSO_F | TSP_F | OL3OL4CSUM_F)				\
+T(tso_ts_ol3ol4csum_l3l4csum,		1, 1, 0, 0, 1, 1,	8,	\
+		TSO_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)		\
+T(tso_ts_vlan,				1, 1, 0, 1, 0, 0,	8,	\
+		TSO_F | TSP_F | VLAN_F)					\
+T(tso_ts_vlan_l3l4csum,			1, 1, 0, 1, 0, 1,	8,	\
+		TSO_F | TSP_F | VLAN_F | L3L4CSUM_F)			\
+T(tso_ts_vlan_ol3ol4csum,		1, 1, 0, 1, 1, 0,	8,	\
+		TSO_F | TSP_F | VLAN_F | OL3OL4CSUM_F)			\
+T(tso_ts_vlan_ol3ol4csum_l3l4csum,	1, 1, 0, 1, 1, 1,	8,	\
+		TSO_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+T(tso_ts_noff,				1, 1, 1, 0, 0, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F)					\
+T(tso_ts_noff_l3l4csum,			1, 1, 1, 0, 0, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | L3L4CSUM_F)			\
+T(tso_ts_noff_ol3ol4csum,		1, 1, 1, 0, 1, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F | OL3OL4CSUM_F)			\
+T(tso_ts_noff_ol3ol4csum_l3l4csum,	1, 1, 1, 0, 1, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)	\
+T(tso_ts_noff_vlan,			1, 1, 1, 1, 0, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F)			\
+T(tso_ts_noff_vlan_l3l4csum,		1, 1, 1, 1, 0, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)		\
+T(tso_ts_noff_vlan_ol3ol4csum,		1, 1, 1, 1, 1, 0,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)		\
+T(tso_ts_noff_vlan_ol3ol4csum_l3l4csum,	1, 1, 1, 1, 1, 1,	8,	\
+		TSO_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
 #endif /* __OTX2_TX_H__ */
-- 
2.8.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-10-04  7:57 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-06  8:53 [dpdk-dev] [PATCH] net/octeontx2: add TSO offload support Nithin Dabilpuram
2019-09-25  4:36 ` [dpdk-dev] [PATCH v2] " Nithin Dabilpuram
2019-09-25  4:54 [dpdk-dev] [PATCH] net/octeontx2: add GRE " Nithin Dabilpuram
2019-09-25  4:54 ` [dpdk-dev] [PATCH v2] net/octeontx2: add " Nithin Dabilpuram
2019-10-04  7:57   ` Jerin Jacob

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).