DPDK patches and discussions
 help / color / mirror / Atom feed
From: David Miller <dmiller423@gmail.com>
To: dev@dpdk.org
Cc: David Miller <dmiller423@gmail.com>,
	Mathew S Thoennes <tardis@us.ibm.com>,
	Konstantin Ananyev <konstantin.ananyev@intel.com>,
	Xiaoyun Li <xiaoyun.li@intel.com>,
	Aman Singh <aman.deep.singh@intel.com>,
	Yuying Zhang <yuying.zhang@intel.com>,
	Olivier Matz <olivier.matz@6wind.com>,
	Yipeng Wang <yipeng1.wang@intel.com>,
	Sameh Gobriel <sameh.gobriel@intel.com>,
	Bruce Richardson <bruce.richardson@intel.com>,
	Vladimir Medvedkin <vladimir.medvedkin@intel.com>,
	Neil Horman <nhorman@tuxdriver.com>,
	Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>,
	Beilei Xing <beilei.xing@intel.com>,
	Matan Azrad <matan@nvidia.com>,
	Viacheslav Ovsiienko <viacheslavo@nvidia.com>,
	Haiyue Wang <haiyue.wang@intel.com>,
	Jakub Grajciar <jgrajcia@cisco.com>,
	Harman Kalra <hkalra@marvell.com>
Subject: [PATCH] Add support for IBM Z s390x
Date: Fri, 28 Oct 2022 17:52:40 -0400	[thread overview]
Message-ID: <20221028215240.103365-1-dmiller423@gmail.com> (raw)

Signed-off-by: David Miller <dmiller423@gmail.com>
Reviewed-by: Mathew S Thoennes <tardis@us.ibm.com>
---
 app/test-acl/main.c                          |   4 +
 app/test-pmd/config.c                        |  12 +-
 app/test/test_acl.c                          |   1 +
 app/test/test_atomic.c                       |   7 +-
 app/test/test_cmdline.c                      |   6 +-
 app/test/test_cmdline_ipaddr.c               |  11 +
 app/test/test_cmdline_num.c                  | 110 ++++
 app/test/test_hash_functions.c               |  29 +
 app/test/test_xmmt_ops.h                     |  14 +
 buildtools/pmdinfogen.py                     |  11 +-
 config/meson.build                           |   2 +
 config/s390x/meson.build                     |  51 ++
 config/s390x/s390x_linux_clang_ubuntu        |  19 +
 doc/guides/nics/features/i40e.ini            |   1 +
 drivers/common/mlx5/mlx5_common.h            |   9 +
 drivers/net/i40e/i40e_rxtx_vec_s390x.c       | 630 +++++++++++++++++++
 drivers/net/i40e/meson.build                 |   2 +
 drivers/net/ixgbe/ixgbe_rxtx.c               |   2 +-
 drivers/net/memif/rte_eth_memif.h            |   2 +
 drivers/net/mlx5/mlx5_rx.c                   |  22 +-
 drivers/net/octeontx/base/octeontx_pki_var.h |   6 +
 examples/l3fwd-acl/main.c                    |   4 +
 examples/l3fwd/l3fwd_em.c                    |   8 +
 examples/l3fwd/l3fwd_lpm_s390x.h             | 137 ++++
 examples/l3fwd/l3fwd_s390x.h                 | 259 ++++++++
 lib/acl/acl_bld.c                            |   3 +
 lib/acl/acl_gen.c                            |   9 +
 lib/acl/acl_run_scalar.c                     |   8 +
 lib/acl/rte_acl.c                            |  27 +
 lib/acl/rte_acl.h                            |   5 +-
 lib/eal/s390x/include/meson.build            |  16 +
 lib/eal/s390x/include/rte_atomic.h           |  47 ++
 lib/eal/s390x/include/rte_byteorder.h        |  43 ++
 lib/eal/s390x/include/rte_cpuflags.h         |  42 ++
 lib/eal/s390x/include/rte_cycles.h           |  44 ++
 lib/eal/s390x/include/rte_io.h               | 184 ++++++
 lib/eal/s390x/include/rte_mcslock.h          |  18 +
 lib/eal/s390x/include/rte_memcpy.h           |  55 ++
 lib/eal/s390x/include/rte_pause.h            |  22 +
 lib/eal/s390x/include/rte_power_intrinsics.h |  20 +
 lib/eal/s390x/include/rte_prefetch.h         |  46 ++
 lib/eal/s390x/include/rte_rwlock.h           |  42 ++
 lib/eal/s390x/include/rte_spinlock.h         |  85 +++
 lib/eal/s390x/include/rte_ticketlock.h       |  18 +
 lib/eal/s390x/include/rte_vect.h             |  35 ++
 lib/eal/s390x/meson.build                    |  16 +
 lib/eal/s390x/rte_cpuflags.c                 |  91 +++
 lib/eal/s390x/rte_cycles.c                   |  11 +
 lib/eal/s390x/rte_hypervisor.c               |  11 +
 lib/eal/s390x/rte_power_intrinsics.c         |  51 ++
 lib/hash/rte_fbk_hash.h                      |   7 +
 lib/lpm/meson.build                          |   1 +
 lib/lpm/rte_lpm.h                            |   2 +
 lib/lpm/rte_lpm6.c                           |  18 +
 lib/lpm/rte_lpm_s390x.h                      | 130 ++++
 meson.build                                  |   2 +
 56 files changed, 2450 insertions(+), 18 deletions(-)
 create mode 100644 config/s390x/meson.build
 create mode 100644 config/s390x/s390x_linux_clang_ubuntu
 create mode 100644 drivers/net/i40e/i40e_rxtx_vec_s390x.c
 create mode 100644 examples/l3fwd/l3fwd_lpm_s390x.h
 create mode 100644 examples/l3fwd/l3fwd_s390x.h
 create mode 100644 lib/eal/s390x/include/meson.build
 create mode 100644 lib/eal/s390x/include/rte_atomic.h
 create mode 100644 lib/eal/s390x/include/rte_byteorder.h
 create mode 100644 lib/eal/s390x/include/rte_cpuflags.h
 create mode 100644 lib/eal/s390x/include/rte_cycles.h
 create mode 100644 lib/eal/s390x/include/rte_io.h
 create mode 100644 lib/eal/s390x/include/rte_mcslock.h
 create mode 100644 lib/eal/s390x/include/rte_memcpy.h
 create mode 100644 lib/eal/s390x/include/rte_pause.h
 create mode 100644 lib/eal/s390x/include/rte_power_intrinsics.h
 create mode 100644 lib/eal/s390x/include/rte_prefetch.h
 create mode 100644 lib/eal/s390x/include/rte_rwlock.h
 create mode 100644 lib/eal/s390x/include/rte_spinlock.h
 create mode 100644 lib/eal/s390x/include/rte_ticketlock.h
 create mode 100644 lib/eal/s390x/include/rte_vect.h
 create mode 100644 lib/eal/s390x/meson.build
 create mode 100644 lib/eal/s390x/rte_cpuflags.c
 create mode 100644 lib/eal/s390x/rte_cycles.c
 create mode 100644 lib/eal/s390x/rte_hypervisor.c
 create mode 100644 lib/eal/s390x/rte_power_intrinsics.c
 create mode 100644 lib/lpm/rte_lpm_s390x.h

diff --git a/app/test-acl/main.c b/app/test-acl/main.c
index 06e3847ab9..1f567c5359 100644
--- a/app/test-acl/main.c
+++ b/app/test-acl/main.c
@@ -83,6 +83,10 @@ static const struct acl_alg acl_alg[] = {
 		.name = "altivec",
 		.alg = RTE_ACL_CLASSIFY_ALTIVEC,
 	},
+	{
+		.name = "s390x",
+		.alg = RTE_ACL_CLASSIFY_S390X,
+	},
 	{
 		.name = "avx512x16",
 		.alg = RTE_ACL_CLASSIFY_AVX512X16,
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index cc8e7aa138..2a863f3d39 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -245,9 +245,9 @@ nic_stats_display(portid_t port_id)
 	static uint64_t prev_bytes_tx[RTE_MAX_ETHPORTS];
 	static uint64_t prev_ns[RTE_MAX_ETHPORTS];
 	struct timespec cur_time;
-	uint64_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx,
-								diff_ns;
-	uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx;
+    __uint128_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx,
+            diff_ns;
+    __uint128_t mpps_rx, mpps_tx, mbps_rx, mbps_tx;
 	struct rte_eth_stats stats;
 
 	static const char *nic_stats_border = "########################";
@@ -302,9 +302,9 @@ nic_stats_display(portid_t port_id)
 		(double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0;
 
 	printf("\n  Throughput (since last show)\n");
-	printf("  Rx-pps: %12"PRIu64"          Rx-bps: %12"PRIu64"\n  Tx-pps: %12"
-	       PRIu64"          Tx-bps: %12"PRIu64"\n", mpps_rx, mbps_rx * 8,
-	       mpps_tx, mbps_tx * 8);
+    printf("  Rx-pps: %12llu          Rx-bps: %12llu \n  Tx-pps: %12llu          Tx-bps: %12llu \n",
+           (unsigned long long) mpps_rx, (unsigned long long) mbps_rx * 8,
+           (unsigned long long) mpps_tx, (unsigned long long) mbps_tx * 8);
 
 	if (xstats_display_num > 0)
 		nic_xstats_display_periodic(port_id);
diff --git a/app/test/test_acl.c b/app/test/test_acl.c
index 4d51098925..da16365294 100644
--- a/app/test/test_acl.c
+++ b/app/test/test_acl.c
@@ -351,6 +351,7 @@ test_classify_run(struct rte_acl_ctx *acx, struct ipv4_7tuple test_data[],
 		RTE_ACL_CLASSIFY_AVX2,
 		RTE_ACL_CLASSIFY_NEON,
 		RTE_ACL_CLASSIFY_ALTIVEC,
+        RTE_ACL_CLASSIFY_S390X,
 		RTE_ACL_CLASSIFY_AVX512X16,
 		RTE_ACL_CLASSIFY_AVX512X32,
 	};
diff --git a/app/test/test_atomic.c b/app/test/test_atomic.c
index e4b997827e..37ece78425 100644
--- a/app/test/test_atomic.c
+++ b/app/test/test_atomic.c
@@ -17,6 +17,7 @@
 #include <rte_lcore.h>
 #include <rte_random.h>
 #include <rte_hash_crc.h>
+#include <rte_byteorder.h>
 
 #include "test.h"
 
@@ -351,6 +352,7 @@ volatile uint16_t token16;
 volatile uint32_t token32;
 volatile uint64_t token64;
 
+#ifndef RTE_ARCH_S390X
 static void
 build_crc8_table(void)
 {
@@ -441,6 +443,8 @@ test_atomic_exchange(__rte_unused void *arg)
 
 	return 0;
 }
+#endif
+
 static int
 test_atomic(void)
 {
@@ -597,6 +601,7 @@ test_atomic(void)
 	}
 #endif
 
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 	/*
 	 * Test 16/32/64bit atomic exchange.
 	 */
@@ -628,7 +633,7 @@ test_atomic(void)
 		printf("Atomic exchange test failed\n");
 		return -1;
 	}
-
+#endif
 	return 0;
 }
 REGISTER_TEST_COMMAND(atomic_autotest, test_atomic);
diff --git a/app/test/test_cmdline.c b/app/test/test_cmdline.c
index 115bee966d..e0720ff345 100644
--- a/app/test/test_cmdline.c
+++ b/app/test/test_cmdline.c
@@ -10,21 +10,21 @@
 static int
 test_cmdline(void)
 {
-	printf("Testind parsing ethernet addresses...\n");
+	printf("Testing parsing ethernet addresses...\n");
 	if (test_parse_etheraddr_valid() < 0)
 		return -1;
 	if (test_parse_etheraddr_invalid_data() < 0)
 		return -1;
 	if (test_parse_etheraddr_invalid_param() < 0)
 		return -1;
-	printf("Testind parsing port lists...\n");
+	printf("Testing parsing port lists...\n");
 	if (test_parse_portlist_valid() < 0)
 		return -1;
 	if (test_parse_portlist_invalid_data() < 0)
 		return -1;
 	if (test_parse_portlist_invalid_param() < 0)
 		return -1;
-	printf("Testind parsing numbers...\n");
+	printf("Testing parsing numbers...\n");
 	if (test_parse_num_valid() < 0)
 		return -1;
 	if (test_parse_num_invalid_data() < 0)
diff --git a/app/test/test_cmdline_ipaddr.c b/app/test/test_cmdline_ipaddr.c
index f540063508..d950383e10 100644
--- a/app/test/test_cmdline_ipaddr.c
+++ b/app/test/test_cmdline_ipaddr.c
@@ -6,12 +6,14 @@
 #include <inttypes.h>
 
 #include <rte_string_fns.h>
+#include <rte_byteorder.h>
 
 #include <cmdline_parse.h>
 #include <cmdline_parse_ipaddr.h>
 
 #include "test_cmdline.h"
 
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #define IP4(a,b,c,d) {.s_addr = (uint32_t)(((a) & 0xff) | \
 					   (((b) & 0xff) << 8) | \
 					   (((c) & 0xff) << 16)  | \
@@ -19,6 +21,15 @@
 
 #define U16_SWAP(x) \
 		(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8))
+#else
+#define IP4(a,b,c,d) {((uint32_t)(((a) & 0xff) << 24) | \
+					   (((b) & 0xff) << 16) | \
+					   (((c) & 0xff) << 8)  | \
+					   ((d) & 0xff))}
+
+#define U16_SWAP(x) x
+
+#endif
 
 /* create IPv6 address, swapping bytes where needed */
 #ifndef s6_addr16
diff --git a/app/test/test_cmdline_num.c b/app/test/test_cmdline_num.c
index 9276de59bd..a710109707 100644
--- a/app/test/test_cmdline_num.c
+++ b/app/test/test_cmdline_num.c
@@ -10,6 +10,7 @@
 
 #include <cmdline_parse.h>
 #include <cmdline_parse_num.h>
+#include <rte_byteorder.h>
 
 #include "test_cmdline.h"
 
@@ -438,6 +439,48 @@ test_parse_num_valid(void)
 			/* check if result matches what it should have matched
 			 * since unsigned numbers don't care about number of bits, we can just convert
 			 * everything to uint64_t without any worries. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+            switch (type) {
+                case RTE_UINT8:
+                {
+                    uint8_t *temp = (uint8_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_UINT16:
+                {
+                    uint16_t *temp = (uint16_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_UINT32:
+                {
+                    uint32_t *temp = (uint32_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT8:
+                {
+                    int8_t *temp = (int8_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT16:
+                {
+                    int16_t *temp = (int16_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT32:
+                {
+                    int32_t *temp = (int32_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                default:
+                    break;
+            }
+#endif
 			if (ret > 0 && num_valid_positive_strs[i].result != result) {
 				printf("Error: parsing %s as %s failed: result mismatch!\n",
 						num_valid_positive_strs[i].str, buf);
@@ -467,6 +510,7 @@ test_parse_num_valid(void)
 			 * the result is signed in this case, so we have to account for that */
 			if (ret > 0) {
 				/* detect negative */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 				switch (type) {
 				case RTE_INT8:
 					result = (int8_t) result;
@@ -480,6 +524,30 @@ test_parse_num_valid(void)
 				default:
 					break;
 				}
+#else
+                switch (type) {
+				case RTE_INT8:
+				{
+					int8_t *temp = (int8_t *)&result;
+					result = *temp;
+					break;
+				}
+				case RTE_INT16:
+				{
+					int16_t *temp = (int16_t *)&result;
+					result = *temp;
+					break;
+				}
+				case RTE_INT32:
+				{
+					int32_t *temp = (int32_t *)&result;
+					result = *temp;
+					break;
+				}
+				default:
+					break;
+				}
+#endif
 				if (num_valid_negative_strs[i].result == (int64_t) result)
 					continue;
 				printf("Error: parsing %s as %s failed: result mismatch!\n",
@@ -516,6 +584,48 @@ test_parse_num_valid(void)
 			/* check if result matches what it should have matched
 			 * since unsigned numbers don't care about number of bits, we can just convert
 			 * everything to uint64_t without any worries. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+            switch (type) {
+                case RTE_UINT8:
+                {
+                    uint8_t *temp = (uint8_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_UINT16:
+                {
+                    uint16_t *temp = (uint16_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_UINT32:
+                {
+                    uint32_t *temp = (uint32_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT8:
+                {
+                    int8_t *temp = (int8_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT16:
+                {
+                    int16_t *temp = (int16_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT32:
+                {
+                    int32_t *temp = (int32_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                default:
+                    break;
+            }
+#endif
 			if (ret > 0 && num_garbage_positive_strs[i].result != result) {
 				printf("Error: parsing %s as %s failed: result mismatch!\n",
 						num_garbage_positive_strs[i].str, buf);
diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 76d51b6e71..b387d0eabb 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -25,6 +25,7 @@
  * e.g.: key size = 4, key = 0x03020100
  *       key size = 8, key = 0x0706050403020100
  */
+#if !defined(RTE_ARCH_S390X)
 static uint32_t hash_values_jhash[2][12] = {{
 	0x8ba9414b, 0xdf0d39c9,
 	0xe4cf1d42, 0xd4ccb93c, 0x5e84eafc, 0x21362cfe,
@@ -51,6 +52,34 @@ static uint32_t hash_values_crc[2][12] = {{
 	0x789c104f, 0x53028d3e
 }
 };
+#else
+static uint32_t hash_values_jhash[2][12] = {{
+	0x8ba9414b, 0x8a2f8eb,
+	0x55dcd60b, 0xf0b95bfe, 0x1a28d94c, 0x003d8f00,
+	0x84c90b2c, 0x24b83acf, 0x5e16af2f, 0x751c9f59,
+	0x665b8254, 0x6e347c81
+},
+{
+	0x5c62c303, 0xb21d4b7b,
+	0xa33cdfcf, 0x47cf3d14, 0x1cae829f, 0x1253a9ea,
+	0x7171efd1, 0xcef21db0, 0x3df3f5fe, 0x35fd67d2,
+	0x2922cbc4, 0xeaee5c5c
+}
+};
+static uint32_t hash_values_crc[2][12] = {{
+	0x00000000, 0x13a29877,
+	0x3eef4343, 0xb6719589, 0x938d3d79, 0xed93196b,
+	0xe710a46c, 0x81f7ab71, 0x702bc9ee, 0x26c72488,
+	0x2e7092a9, 0xf2fbc80b
+},
+{
+	0xbdfd3980, 0x91e95e36,
+	0x37765e57, 0x6559eb17, 0x49c8a164, 0x18daa0d3,
+	0x67065980, 0x62f966d0, 0x4e28a2a0, 0xe342d18f,
+	0x1518c680, 0xebe8026b
+}
+};
+#endif
 
 /*******************************************************************************
  * Hash function performance test configuration section. Each performance test
diff --git a/app/test/test_xmmt_ops.h b/app/test/test_xmmt_ops.h
index 3a82d5ecac..a11f759af4 100644
--- a/app/test/test_xmmt_ops.h
+++ b/app/test/test_xmmt_ops.h
@@ -49,6 +49,20 @@ vect_set_epi32(int i3, int i2, int i1, int i0)
 	return data;
 }
 
+#elif defined(RTE_ARCH_S390X)
+
+/* loads the xmm_t value from address p(does not need to be 16-byte aligned)*/
+#define vect_loadu_sil128(p) vec_xld2(0, (signed int *)p)
+
+/* sets the 4 signed 32-bit integer values and returns the xmm_t variable */
+static __rte_always_inline xmm_t
+vect_set_epi32(int i3, int i2, int i1, int i0)
+{
+	xmm_t data = (xmm_t){i0, i1, i2, i3};
+
+	return data;
+}
+
 #endif
 
 #endif /* _TEST_XMMT_OPS_H_ */
diff --git a/buildtools/pmdinfogen.py b/buildtools/pmdinfogen.py
index 2a44f17bda..10467c1a3e 100755
--- a/buildtools/pmdinfogen.py
+++ b/buildtools/pmdinfogen.py
@@ -16,8 +16,15 @@
 except ImportError:
     pass
 
-import coff
+try:
+    import coff
+except TypeError:
+    pass
 
+def decode_asciiz(data):
+    index = data.find(b'\x00')
+    end = index if index >= 0 else len(data)
+    return data[:end].decode()
 
 class ELFSymbol:
     def __init__(self, image, symbol):
@@ -28,7 +35,7 @@ def __init__(self, image, symbol):
     def string_value(self):
         size = self._symbol["st_size"]
         value = self.get_value(0, size)
-        return coff.decode_asciiz(value)  # not COFF-specific
+        return decode_asciiz(value)  # not COFF-specific
 
     def get_value(self, offset, size):
         section = self._symbol["st_shndx"]
diff --git a/config/meson.build b/config/meson.build
index 7134e80e8d..407aa1483d 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -121,6 +121,8 @@ if cpu_instruction_set == 'generic'
         cpu_instruction_set = 'generic'
     elif host_machine.cpu_family().startswith('ppc')
         cpu_instruction_set = 'power8'
+	elif host_machine.cpu_family().startswith('s390x')
+		machine = 'z13'
     endif
 endif
 
diff --git a/config/s390x/meson.build b/config/s390x/meson.build
new file mode 100644
index 0000000000..b15e74ba44
--- /dev/null
+++ b/config/s390x/meson.build
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# (c) Copyright IBM Corp. 2019, 2020
+
+if not dpdk_conf.get('RTE_ARCH_64')
+	error('Only 64-bit compiles are supported for this platform type')
+endif
+dpdk_conf.set('RTE_ARCH', 's390x')
+dpdk_conf.set('RTE_ARCH_S390X', 1)
+dpdk_conf.set('RTE_FORCE_INTRINSICS', 1)
+
+# overrides specific to s390x
+dpdk_conf.set('RTE_MAX_LCORE', 256)
+dpdk_conf.set('RTE_MAX_NUMA_NODES', 32)
+dpdk_conf.set('RTE_CACHE_LINE_SIZE', 128)
+
+
+
+# default to z13
+cpu_instruction_set = 'z13'
+
+# test compiler support
+cc_march_z14 = cc.has_argument('-march=z14')
+cc_march_z15 = cc.has_argument('-march=z15')
+
+
+machine_args = ['-march=' + cpu_instruction_set, '-mtune=' + cpu_instruction_set]
+
+dpdk_conf.set('RTE_MACHINE','s390x')
+dpdk_conf.set('RTE_MACHINE_CPUFLAG_ZARCH', 1)   # should this be z# 13 ?
+#dpdk_conf.set('RTE_MACHINE', cpu_instruction_set)
+
+if (cc.get_define('__s390x__', args: machine_args) != '')
+    compile_time_cpuflags += ['RTE_MACHINE_CPUFLAG_ZARCH']
+endif
+
+
+# Suppress the gcc warning "note: the layout of aggregates containing
+# vectors with 4-byte alignment has changed in GCC 5".
+if (cc.get_id() == 'gcc' and cc.version().version_compare('>=10.0') and
+        cc.version().version_compare('<12.0') and cc.has_argument('-Wno-psabi'))
+    add_project_arguments('-Wno-psabi', language: 'c')
+endif
+
+
+
+
+
+
+
+
+
diff --git a/config/s390x/s390x_linux_clang_ubuntu b/config/s390x/s390x_linux_clang_ubuntu
new file mode 100644
index 0000000000..952d1ce460
--- /dev/null
+++ b/config/s390x/s390x_linux_clang_ubuntu
@@ -0,0 +1,19 @@
+[binaries]
+c = 'clang'
+cpp = 'clang++'
+ar = 'llvm-ar'
+strip = 'llvm-strip'
+llvm-config = 'llvm-config'
+pcap-config = 'llvm-config'
+pkgconfig = 'pkg-config'
+
+[host_machine]
+system = 'linux'
+cpu_family = 's390x'
+cpu = 'z13'
+endian = 'big'
+
+[properties]
+platform = 'generic'
+c_args = ['-target', 'aarch64-linux-gnu', '--sysroot', '/usr/aarch64-linux-gnu']
+c_link_args = ['-target', 'aarch64-linux-gnu', '-fuse-ld=lld', '--gcc-toolchain=/usr']
diff --git a/doc/guides/nics/features/i40e.ini b/doc/guides/nics/features/i40e.ini
index dd18fec217..bc0c8b1969 100644
--- a/doc/guides/nics/features/i40e.ini
+++ b/doc/guides/nics/features/i40e.ini
@@ -50,6 +50,7 @@ x86-32               = Y
 x86-64               = Y
 ARMv8                = Y
 Power8               = Y
+s390x                = Y
 
 [rte_flow items]
 ah                   = Y
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index 63f31437da..61fd6afa02 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -20,6 +20,11 @@
 #include <rte_spinlock.h>
 #include <rte_os_shim.h>
 
+/* s390x pci implemenation. */
+#ifdef RTE_MACHINE_CPUFLAG_ZARCH
+#include <rte_io.h>
+#endif
+
 #include "mlx5_prm.h"
 #include "mlx5_devx_cmds.h"
 #include "mlx5_common_os.h"
@@ -358,7 +363,11 @@ mlx5_doorbell_ring(struct mlx5_uar_data *uar, uint64_t val, uint32_t index,
 	/* Ensure ordering between DB record actual update and UAR access. */
 	rte_wmb();
 #ifdef RTE_ARCH_64
+# ifndef RTE_MACHINE_CPUFLAG_ZARCH
 	*uar->db = val;
+# else
+    rte_write64_relaxed(val, uar->db);
+# endif
 #else /* !RTE_ARCH_64 */
 	rte_spinlock_lock(uar->sl_p);
 	*(volatile uint32_t *)uar->db = val;
diff --git a/drivers/net/i40e/i40e_rxtx_vec_s390x.c b/drivers/net/i40e/i40e_rxtx_vec_s390x.c
new file mode 100644
index 0000000000..1cee842ad8
--- /dev/null
+++ b/drivers/net/i40e/i40e_rxtx_vec_s390x.c
@@ -0,0 +1,630 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+
+#include <stdint.h>
+#include <vecintrin.h>
+#include <rte_ethdev_driver.h>
+#include <rte_malloc.h>
+
+#include "base/i40e_prototype.h"
+#include "base/i40e_type.h"
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+#include "i40e_rxtx_vec_common.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+typedef unsigned long long vector_unsigned_long_long
+	__attribute__((vector_size(2 * sizeof(unsigned long long))));
+typedef unsigned int vector_unsigned_int
+	__attribute__((vector_size(4 * sizeof(unsigned int))));
+typedef unsigned short vector_unsigned_short
+	__attribute__((vector_size(8 * sizeof(unsigned short))));
+typedef unsigned char vector_unsigned_char
+	__attribute__((vector_size(16 * sizeof(unsigned char))));
+
+
+static inline void
+i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union i40e_rx_desc *rxdp;
+
+	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct rte_mbuf *mb0, *mb1;
+
+	vector_unsigned_long_long hdr_room = (vector_unsigned_long_long){
+						RTE_PKTMBUF_HEADROOM,
+						RTE_PKTMBUF_HEADROOM};
+	vector_unsigned_long_long dma_addr0, dma_addr1;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			dma_addr0 = (vector_unsigned_long_long){};
+			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				vec_xstd2(dma_addr0, 0,
+					(unsigned long long *)&rxdp[i].read);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			RTE_I40E_RXQ_REARM_THRESH;
+		return;
+	}
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		vector_unsigned_long_long vaddr0, vaddr1;
+		uintptr_t p0, p1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		 /* Flush mbuf with pkt template.
+		  * Data to be rearmed is 6 bytes long.
+		  * Though, RX will overwrite ol_flags that are coming next
+		  * anyway. So overwrite whole 8 bytes with one load:
+		  * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
+		  */
+		p0 = (uintptr_t)&mb0->rearm_data;
+		*(uint64_t *)p0 = rxq->mbuf_initializer;
+		p1 = (uintptr_t)&mb1->rearm_data;
+		*(uint64_t *)p1 = rxq->mbuf_initializer;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		vaddr0 = vec_xld2(0, (unsigned long long *)&mb0->buf_addr);
+		vaddr1 = vec_xld2(0, (unsigned long long *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = vec_mergel(vaddr0, vaddr0);
+		dma_addr1 = vec_mergel(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = dma_addr0 + hdr_room;
+		dma_addr1 = dma_addr1 + hdr_room;
+
+		/* flush desc with pa dma_addr */
+		vec_xstd2(dma_addr0, 0, (unsigned long long *)&rxdp++->read);
+		vec_xstd2(dma_addr1, 0, (unsigned long long *)&rxdp++->read);
+	}
+
+	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+}
+
+static inline void
+desc_to_olflags_v(vector_unsigned_long_long descs[4], struct rte_mbuf **rx_pkts)
+{
+	vector_unsigned_int vlan0, vlan1, rss, l3_l4e;
+
+	/* mask everything except RSS, flow director and VLAN flags
+	 * bit2 is for VLAN tag, bit11 for flow director indication
+	 * bit13:12 for RSS indication.
+	 */
+	const vector_unsigned_int rss_vlan_msk = (vector_unsigned_int){
+			(int32_t)0x1c03804, (int32_t)0x1c03804,
+			(int32_t)0x1c03804, (int32_t)0x1c03804};
+
+	/* map rss and vlan type to rss hash and vlan flag */
+	const vector_unsigned_char vlan_flags = (vector_unsigned_char){
+			0, 0, 0, 0,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0};
+
+	const vector_unsigned_char rss_flags = (vector_unsigned_char){
+			0, PKT_RX_FDIR, 0, 0,
+			0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH | PKT_RX_FDIR,
+			0, 0, 0, 0,
+			0, 0, 0, 0};
+
+	const vector_unsigned_char l3_l4e_flags = (vector_unsigned_char){
+			0,
+			PKT_RX_IP_CKSUM_BAD,
+			PKT_RX_L4_CKSUM_BAD,
+			PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+			PKT_RX_IP_CKSUM_BAD,
+			PKT_RX_IP_CKSUM_BAD,
+			PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+			PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+			0, 0, 0, 0, 0, 0, 0, 0};
+
+	vlan0 = (vector_unsigned_int)vec_mergel(descs[0], descs[1]);
+	vlan1 = (vector_unsigned_int)vec_mergel(descs[2], descs[3]);
+	vlan0 = (vector_unsigned_int)vec_mergeh(vlan0, vlan1);
+
+	vlan1 = vec_and(vlan0, rss_vlan_msk);
+	vlan0 = (vector_unsigned_int)vec_perm(vlan_flags,
+					(vector_unsigned_char){},
+					*(vector_unsigned_char *)&vlan1);
+
+	rss[0] = (uint32_t)vlan1[0] >> 11;
+	rss[1] = (uint32_t)vlan1[1] >> 11;
+	rss[2] = (uint32_t)vlan1[2] >> 11;
+	rss[3] = (uint32_t)vlan1[3] >> 11;
+	rss = (vector_unsigned_int)vec_perm(rss_flags, (vector_unsigned_char){},
+					*(vector_unsigned_char *)&rss);
+
+	l3_l4e[0] = (uint32_t)vlan1[0] >> 22;
+	l3_l4e[1] = (uint32_t)vlan1[1] >> 22;
+	l3_l4e[2] = (uint32_t)vlan1[2] >> 22;
+	l3_l4e[3] = (uint32_t)vlan1[3] >> 22;
+
+	l3_l4e = (vector_unsigned_int)vec_perm(l3_l4e_flags,
+					(vector_unsigned_char){},
+					*(vector_unsigned_char *)&l3_l4e);
+
+	vlan0 = vec_or(vlan0, rss);
+	vlan0 = vec_or(vlan0, l3_l4e);
+
+	rx_pkts[0]->ol_flags = (uint64_t)vlan0[2];
+	rx_pkts[1]->ol_flags = (uint64_t)vlan0[3];
+	rx_pkts[2]->ol_flags = (uint64_t)vlan0[0];
+	rx_pkts[3]->ol_flags = (uint64_t)vlan0[1];
+}
+
+#define PKTLEN_SHIFT     10
+
+static inline void
+desc_to_ptype_v(vector_unsigned_long_long descs[4], struct rte_mbuf **rx_pkts,
+		uint32_t *ptype_tbl)
+{
+	vector_unsigned_long_long ptype0 = vec_mergel(descs[0], descs[1]);
+	vector_unsigned_long_long ptype1 = vec_mergel(descs[2], descs[3]);
+
+	ptype0[0] = ptype0[0] >> 30;
+	ptype0[1] = ptype0[1] >> 30;
+
+	ptype1[0] = ptype1[0] >> 30;
+	ptype1[1] = ptype1[1] >> 30;
+
+	rx_pkts[0]->packet_type =
+		ptype_tbl[(*(vector_unsigned_char *)&ptype0)[0]];
+	rx_pkts[1]->packet_type =
+		ptype_tbl[(*(vector_unsigned_char *)&ptype0)[8]];
+	rx_pkts[2]->packet_type =
+		ptype_tbl[(*(vector_unsigned_char *)&ptype1)[0]];
+	rx_pkts[3]->packet_type =
+		ptype_tbl[(*(vector_unsigned_char *)&ptype1)[8]];
+}
+
+ /* Notice:
+  * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+  * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+  *   numbers of DD bits
+  */
+static inline uint16_t
+_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+		   uint16_t nb_pkts, uint8_t *split_packet)
+{
+	volatile union i40e_rx_desc *rxdp;
+	struct i40e_rx_entry *sw_ring;
+	uint16_t nb_pkts_recd;
+	int pos;
+	uint64_t var;
+	vector_unsigned_char shuf_msk;
+	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	vector_unsigned_short crc_adjust = (vector_unsigned_short){
+		0, 0,         /* ignore pkt_type field */
+		rxq->crc_len, /* sub crc on pkt_len */
+		0,            /* ignore high-16bits of pkt_len */
+		rxq->crc_len, /* sub crc on data_len */
+		0, 0, 0       /* ignore non-length fields */
+		};
+	vector_unsigned_long_long dd_check, eop_check;
+
+	/* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST);
+
+	/* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP);
+
+	/* Just the act of getting into the function from the application is
+	 * going to cost about 7 cycles
+	 */
+	rxdp = rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
+		i40e_rxq_rearm(rxq);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.qword1.status_error_len &
+			rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
+		return 0;
+
+	/* 4 packets DD mask */
+	dd_check = (vector_unsigned_long_long){0x0000000100000001ULL,
+					  0x0000000100000001ULL};
+
+	/* 4 packets EOP mask */
+	eop_check = (vector_unsigned_long_long){0x0000000200000002ULL,
+					   0x0000000200000002ULL};
+
+	/* mask to shuffle from desc. to mbuf */
+	shuf_msk = (vector_unsigned_char){
+		0xFF, 0xFF,   /* pkt_type set as unknown */
+		0xFF, 0xFF,   /* pkt_type set as unknown */
+		14, 15,       /* octet 15~14, low 16 bits pkt_len */
+		0xFF, 0xFF,   /* skip high 16 bits pkt_len, zero out */
+		14, 15,       /* octet 15~14, 16 bits data_len */
+		2, 3,         /* octet 2~3, low 16 bits vlan_macip */
+		4, 5, 6, 7    /* octet 4~7, 32bits rss */
+		};
+
+	/* Cache is empty -> need to scan the buffer rings, but first move
+	 * the next 'n' mbufs into the cache
+	 */
+	sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+	/* A. load 4 packet in one loop
+	 * [A*. mask out 4 unused dirty field in desc]
+	 * B. copy 4 mbuf point from swring to rx_pkts
+	 * C. calc the number of DD bits among the 4 packets
+	 * [C*. extract the end-of-packet bit, if requested]
+	 * D. fill info. from desc to mbuf
+	 */
+
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+			pos += RTE_I40E_DESCS_PER_LOOP,
+			rxdp += RTE_I40E_DESCS_PER_LOOP) {
+		vector_unsigned_long_long descs[RTE_I40E_DESCS_PER_LOOP];
+		vector_unsigned_char pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		vector_unsigned_short staterr, sterr_tmp1, sterr_tmp2;
+		vector_unsigned_long_long mbp1, mbp2;  /* two mbuf pointer
+							* in one XMM reg.
+							*/
+
+		/* B.1 load 1 mbuf point */
+		mbp1 = *(vector_unsigned_long_long *)&sw_ring[pos];
+		/* Read desc statuses backwards to avoid race condition */
+		/* A.1 load 4 pkts desc */
+		descs[3] = *(vector_unsigned_long_long *)(rxdp + 3);
+		rte_compiler_barrier();
+
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		*(vector_unsigned_long_long *)&rx_pkts[pos] = mbp1;
+
+		/* B.1 load 1 mbuf point */
+		mbp2 = *(vector_unsigned_long_long *)&sw_ring[pos + 2];
+
+		descs[2] = *(vector_unsigned_long_long *)(rxdp + 2);
+		rte_compiler_barrier();
+		/* B.1 load 2 mbuf point */
+		descs[1] = *(vector_unsigned_long_long *)(rxdp + 1);
+		rte_compiler_barrier();
+		descs[0] = *(vector_unsigned_long_long *)(rxdp);
+
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		*(vector_unsigned_long_long *)&rx_pkts[pos + 2] =  mbp2;
+
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
+		/* avoid compiler reorder optimization */
+		rte_compiler_barrier();
+
+		/* pkt 3,4 shift the pktlen field to be 16-bit aligned*/
+		vector_unsigned_int len3_temp = vec_xld2(0,
+				(unsigned int *)&descs[3]);
+		len3_temp[3] = len3_temp[3] << PKTLEN_SHIFT;
+		const vector_unsigned_int len3 = len3_temp;
+
+		vector_unsigned_int len2_temp = vec_xld2(0,
+				(unsigned int *)&descs[2]);
+		len2_temp[3] = len2_temp[3] << PKTLEN_SHIFT;
+		const vector_unsigned_int len2 = len2_temp;
+
+		/* merge the now-aligned packet length fields back in */
+		descs[3] = (vector_unsigned_long_long)len3;
+		descs[2] = (vector_unsigned_long_long)len2;
+
+		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
+		pkt_mb4 = vec_perm((vector_unsigned_char)descs[3],
+				  (vector_unsigned_char){}, shuf_msk);
+		pkt_mb3 = vec_perm((vector_unsigned_char)descs[2],
+				  (vector_unsigned_char){}, shuf_msk);
+
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp2 = vec_mergel((vector_unsigned_short)descs[3],
+					(vector_unsigned_short)descs[2]);
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp1 = vec_mergel((vector_unsigned_short)descs[1],
+					(vector_unsigned_short)descs[0]);
+		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+		pkt_mb4 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb4
+				- crc_adjust);
+		pkt_mb3 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb3
+				- crc_adjust);
+
+		/* pkt 1,2 shift the pktlen field to be 16-bit aligned*/
+		const vector_unsigned_int len1 =
+			vec_sll(vec_xld2(0, (unsigned int *)&descs[1]),
+			(vector_unsigned_int){0, 0, 0, PKTLEN_SHIFT});
+		const vector_unsigned_int len0 =
+			vec_sll(vec_xld2(0, (unsigned int *)&descs[0]),
+			(vector_unsigned_int){0, 0, 0, PKTLEN_SHIFT});
+
+		/* merge the now-aligned packet length fields back in */
+		descs[1] = (vector_unsigned_long_long)len1;
+		descs[0] = (vector_unsigned_long_long)len0;
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb2 = vec_perm((vector_unsigned_char)descs[1],
+				   (vector_unsigned_char){}, shuf_msk);
+		pkt_mb1 = vec_perm((vector_unsigned_char)descs[0],
+				   (vector_unsigned_char){}, shuf_msk);
+
+		/* C.2 get 4 pkts staterr value  */
+		staterr = (vector_unsigned_short)vec_mergeh(sterr_tmp1,
+				sterr_tmp2);
+
+		/* D.3 copy final 3,4 data to rx_pkts */
+		vec_xstd2(pkt_mb4, 0, (unsigned char *)&rx_pkts[pos + 3]
+			->rx_descriptor_fields1);
+		vec_xstd2(pkt_mb3, 0, (unsigned char *)&rx_pkts[pos + 2]
+			->rx_descriptor_fields1);
+
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb2 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb2
+				- crc_adjust);
+		pkt_mb1 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb1
+				- crc_adjust);
+
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			vector_unsigned_char eop_shuf_mask =
+				(vector_unsigned_char){
+					0xFF, 0xFF, 0xFF, 0xFF,
+					0xFF, 0xFF, 0xFF, 0xFF,
+					0xFF, 0xFF, 0xFF, 0xFF,
+					0x04, 0x0C, 0x00, 0x08
+				};
+
+			/* and with mask to extract bits, flipping 1-0 */
+			vector_unsigned_char eop_bits =
+				vec_and((vector_unsigned_char)vec_nor(staterr,
+				staterr), (vector_unsigned_char)eop_check);
+			/* the staterr values are not in order, as the count
+			 * count of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = vec_perm(eop_bits, (vector_unsigned_char){},
+					    eop_shuf_mask);
+			/* store the resulting 32-bit value */
+			*split_packet = (vec_xld2(0,
+					 (unsigned int *)&eop_bits))[0];
+			split_packet += RTE_I40E_DESCS_PER_LOOP;
+
+			/* zero-out next pointers */
+			rx_pkts[pos]->next = NULL;
+			rx_pkts[pos + 1]->next = NULL;
+			rx_pkts[pos + 2]->next = NULL;
+			rx_pkts[pos + 3]->next = NULL;
+		}
+
+		/* C.3 calc available number of desc */
+		staterr = vec_and(staterr, (vector_unsigned_short)dd_check);
+
+		/* D.3 copy final 1,2 data to rx_pkts */
+		vec_xstd2(pkt_mb2, 0, (unsigned char *)&rx_pkts[pos + 1]
+			->rx_descriptor_fields1);
+		vec_xstd2(pkt_mb1, 0, (unsigned char *)&rx_pkts[pos]
+			->rx_descriptor_fields1);
+
+		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
+		desc_to_olflags_v(descs, &rx_pkts[pos]);
+
+		/* C.4 calc avaialbe number of desc */
+		var = __builtin_popcountll((vec_xld2(0,
+			(unsigned long long *)&staterr)[0]));
+		nb_pkts_recd += var;
+		if (likely(var != RTE_I40E_DESCS_PER_LOOP))
+			break;
+	}
+
+	/* Update our internal tail pointer */
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+	return nb_pkts_recd;
+}
+
+ /* Notice:
+  * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+  * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+  *   numbers of DD bits
+  */
+uint16_t
+i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+		   uint16_t nb_pkts)
+{
+	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
+ /* vPMD receive routine that reassembles scattered packets
+  * Notice:
+  * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+  * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+  *   numbers of DD bits
+  */
+uint16_t
+i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			     uint16_t nb_pkts)
+{
+	struct i40e_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
+			split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (rxq->pkt_first_seg == NULL &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	unsigned int i = 0;
+
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+		&split_flags[i]);
+}
+
+static inline void
+vtx1(volatile struct i40e_tx_desc *txdp,
+	struct rte_mbuf *pkt, uint64_t flags)
+{
+	uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
+		((uint64_t)flags  << I40E_TXD_QW1_CMD_SHIFT) |
+		((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
+
+	vector_unsigned_long_long descriptor = (vector_unsigned_long_long){
+		pkt->buf_iova + pkt->data_off, high_qw};
+	*(vector_unsigned_long_long *)txdp = descriptor;
+}
+
+static inline void
+vtx(volatile struct i40e_tx_desc *txdp,
+	struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
+{
+	int i;
+
+	for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
+		vtx1(txdp, *pkt, flags);
+}
+
+uint16_t
+i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
+			  uint16_t nb_pkts)
+{
+	struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue;
+	volatile struct i40e_tx_desc *txdp;
+	struct i40e_tx_entry *txep;
+	uint16_t n, nb_commit, tx_id;
+	uint64_t flags = I40E_TD_CMD;
+	uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
+	int i;
+
+	/* cross rx_thresh boundary is not allowed */
+	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
+
+	if (txq->nb_tx_free < txq->tx_free_thresh)
+		i40e_tx_free_bufs(txq);
+
+	nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
+	nb_commit = nb_pkts;
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	tx_id = txq->tx_tail;
+	txdp = &txq->tx_ring[tx_id];
+	txep = &txq->sw_ring[tx_id];
+
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
+
+	n = (uint16_t)(txq->nb_tx_desc - tx_id);
+	if (nb_commit >= n) {
+		tx_backlog_entry(txep, tx_pkts, n);
+
+		for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
+			vtx1(txdp, *tx_pkts, flags);
+
+		vtx1(txdp, *tx_pkts++, rs);
+
+		nb_commit = (uint16_t)(nb_commit - n);
+
+		tx_id = 0;
+		txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
+
+		/* avoid reach the end of ring */
+		txdp = &txq->tx_ring[tx_id];
+		txep = &txq->sw_ring[tx_id];
+	}
+
+	tx_backlog_entry(txep, tx_pkts, nb_commit);
+
+	vtx(txdp, tx_pkts, nb_commit, flags);
+
+	tx_id = (uint16_t)(tx_id + nb_commit);
+	if (tx_id > txq->tx_next_rs) {
+		txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+			rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
+						I40E_TXD_QW1_CMD_SHIFT);
+		txq->tx_next_rs =
+			(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
+	}
+
+	txq->tx_tail = tx_id;
+
+	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+
+	return nb_pkts;
+}
+
+void __attribute__((cold))
+i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+{
+	_i40e_rx_queue_release_mbufs_vec(rxq);
+}
+
+int __attribute__((cold))
+i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+{
+	return i40e_rxq_vec_setup_default(rxq);
+}
+
+int __attribute__((cold))
+i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused * txq)
+{
+	return 0;
+}
+
+int __attribute__((cold))
+i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
+{
+	return i40e_rx_vec_dev_conf_condition_check_default(dev);
+}
diff --git a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build
index efc5f93e35..88fac6fc2c 100644
--- a/drivers/net/i40e/meson.build
+++ b/drivers/net/i40e/meson.build
@@ -73,6 +73,8 @@ if arch_subdir == 'x86'
     endif
 elif arch_subdir == 'ppc'
        sources += files('i40e_rxtx_vec_altivec.c')
+elif arch_subdir == 's390x'
+       sources += files('i40e_rxtx_vec_s390x.c')
 elif arch_subdir == 'arm'
        sources += files('i40e_rxtx_vec_neon.c')
 endif
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 9e8ea366a5..98d8eb93eb 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -5958,7 +5958,7 @@ ixgbe_config_rss_filter(struct rte_eth_dev *dev,
 }
 
 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
-#if defined(RTE_ARCH_PPC_64)
+#if defined(RTE_ARCH_PPC_64) || defined(RTE_ARCH_S390X)
 int
 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
 {
diff --git a/drivers/net/memif/rte_eth_memif.h b/drivers/net/memif/rte_eth_memif.h
index a5ee23d42e..0270e7859a 100644
--- a/drivers/net/memif/rte_eth_memif.h
+++ b/drivers/net/memif/rte_eth_memif.h
@@ -178,6 +178,8 @@ const char *memif_version(void);
 #define __NR_memfd_create 279
 #elif defined __powerpc__
 #define __NR_memfd_create 360
+#elif defined __s390x__
+#define __NR_memfd_create 350
 #elif defined __i386__
 #define __NR_memfd_create 356
 #else
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index e5eea0ad94..7618a68c4c 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -209,6 +209,8 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
 		snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon");
 #elif defined RTE_ARCH_PPC_64
 		snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec");
+#elif defined RTE_ARCH_S390X
+        snprintf(mode->info, sizeof(mode->info), "%s", "Vector S390X");
 #else
 		return -EINVAL;
 #endif
@@ -219,6 +221,8 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
 		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon");
 #elif defined RTE_ARCH_PPC_64
 		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec");
+#elif defined RTE_ARCH_S390X
+        snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector S390X");
 #else
 		return -EINVAL;
 #endif
@@ -313,12 +317,24 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 	uint8_t ptype;
 	uint8_t pinfo = (cqe->pkt_info & 0x3) << 6;
 
+    /*
+     * hdr_type_etc is from the cqe thus it is BE
+     * the logic below did not convert BE -> LE prior
+     * to using the value of it.  So the logic below
+     * is written for LE thus the value of hdr_type_etc has
+     * to be converted from LE to BE for the logic to work
+    */
+    uint16_t cqe_t_le  = rte_le_to_cpu_16(cqe->hdr_type_etc);
+    uint16_t mcqe_t_le;
+
 	/* Get l3/l4 header from mini-CQE in case L3/L4 format*/
 	if (mcqe == NULL ||
 	    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
-		ptype = (cqe->hdr_type_etc & 0xfc00) >> 10;
-	else
-		ptype = mcqe->hdr_type >> 2;
+		ptype = (cqe_t_le & 0xfc00) >> 10;
+	else {
+        mcqe_t_le = rte_le_to_cpu_16(mcqe->hdr_type);
+		ptype = mcqe_t_le >> 2;
+    }
 	/*
 	 * The index to the array should have:
 	 * bit[1:0] = l3_hdr_type
diff --git a/drivers/net/octeontx/base/octeontx_pki_var.h b/drivers/net/octeontx/base/octeontx_pki_var.h
index 4445369ce7..b37d79eb83 100644
--- a/drivers/net/octeontx/base/octeontx_pki_var.h
+++ b/drivers/net/octeontx/base/octeontx_pki_var.h
@@ -157,6 +157,12 @@ typedef union octtx_wqe_s {
 			uint64_t	lbptr : 8;
 			uint64_t	laptr : 8;
 		} w4;
+
+		struct {
+			uint64_t	size  :16;
+			uint64_t	dwd   : 1;
+			uint64_t	rsvd0 :47;
+		} w5;
 #endif
 	} s;
 
diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
index 2d2ecc7635..e39153f16c 100644
--- a/examples/l3fwd-acl/main.c
+++ b/examples/l3fwd-acl/main.c
@@ -170,6 +170,10 @@ static const struct {
 		.name = "altivec",
 		.alg = RTE_ACL_CLASSIFY_ALTIVEC,
 	},
+    {
+        .name = "s390x",
+        .alg = RTE_ACL_CLASSIFY_S390X,
+    }
 	{
 		.name = "avx512x16",
 		.alg = RTE_ACL_CLASSIFY_AVX512X16,
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 24d0910fe0..dc6c53dc12 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -239,6 +239,14 @@ em_mask_key(void *key, xmm_t mask)
 
 	return vec_and(data, mask);
 }
+#elif defined(__s390x__)
+static inline xmm_t
+em_mask_key(void *key, xmm_t mask)
+{
+	xmm_t data = (xmm_t) vec_xld2(0, (unsigned int *)(key));
+
+	return data + mask;
+}
 #else
 #error No vector engine (SSE, NEON, ALTIVEC) available, check your toolchain
 #endif
diff --git a/examples/l3fwd/l3fwd_lpm_s390x.h b/examples/l3fwd/l3fwd_lpm_s390x.h
new file mode 100644
index 0000000000..858f696ba9
--- /dev/null
+++ b/examples/l3fwd/l3fwd_lpm_s390x.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2016 Intel Corporation.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+#ifndef __L3FWD_LPM_S390X_H__
+#define __L3FWD_LPM_S390X_H__
+
+#include "l3fwd_s390x.h"
+
+typedef unsigned char vector_unsigned_char
+	__attribute__((vector_size(16*sizeof(unsigned char))));
+
+/*
+ * Read packet_type and destination IPV4 addresses from 4 mbufs.
+ */
+static inline void
+processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
+		vector_unsigned_int *dip,
+		uint32_t *ipv4_flag)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ether_hdr *eth_hdr;
+	uint32_t x0, x1, x2, x3;
+
+	eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x0 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
+
+	rte_compiler_barrier();
+	eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x1 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] &= pkt[1]->packet_type;
+
+	rte_compiler_barrier();
+	eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x2 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] &= pkt[2]->packet_type;
+
+	rte_compiler_barrier();
+	eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x3 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] &= pkt[3]->packet_type;
+
+	rte_compiler_barrier();
+	dip[0] = (vector_unsigned_int){x0, x1, x2, x3};
+}
+
+/*
+ * Lookup into LPM for destination port.
+ * If lookup fails, use incoming port (portid) as destination port.
+ */
+static inline void
+processx4_step2(const struct lcore_conf *qconf,
+		vector_unsigned_int dip,
+		uint32_t ipv4_flag,
+		uint8_t portid,
+		struct rte_mbuf *pkt[FWDSTEP],
+		uint16_t dprt[FWDSTEP])
+{
+	rte_xmm_t dst;
+	const vector_unsigned_char bswap_mask = (vector_unsigned_char){
+							3, 2, 1, 0,
+							7, 6, 5, 4,
+							11, 10, 9, 8,
+							15, 14, 13, 12};
+
+	/* Byte swap 4 IPV4 addresses. */
+	dip = (vector_unsigned_int)vec_perm(*(vector_unsigned_char *)&dip,
+					(vector_unsigned_char){}, bswap_mask);
+
+	/* if all 4 packets are IPV4. */
+	if (likely(ipv4_flag)) {
+		rte_lpm_lookupx4(qconf->ipv4_lookup_struct, (xmm_t)dip,
+			(uint32_t *)&dst, portid);
+		/* get rid of unused upper 16 bit for each dport. */
+		dst.x = (xmm_t)vec_packs(dst.x, dst.x);
+		*(uint64_t *)dprt = dst.u64[0];
+	} else {
+		dst.x = (xmm_t)dip;
+		dprt[0] = lpm_get_dst_port_with_ipv4(qconf, pkt[0],
+							dst.u32[0], portid);
+		dprt[1] = lpm_get_dst_port_with_ipv4(qconf, pkt[1],
+							dst.u32[1], portid);
+		dprt[2] = lpm_get_dst_port_with_ipv4(qconf, pkt[2],
+							dst.u32[2], portid);
+		dprt[3] = lpm_get_dst_port_with_ipv4(qconf, pkt[3],
+							dst.u32[3], portid);
+	}
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+			uint8_t portid, struct lcore_conf *qconf)
+{
+	int32_t j;
+	uint16_t dst_port[MAX_PKT_BURST];
+	vector_unsigned_int dip[MAX_PKT_BURST / FWDSTEP];
+	uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
+	const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+
+	for (j = 0; j != k; j += FWDSTEP)
+		processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP],
+				&ipv4_flag[j / FWDSTEP]);
+
+	for (j = 0; j != k; j += FWDSTEP)
+		processx4_step2(qconf, dip[j / FWDSTEP],
+				ipv4_flag[j / FWDSTEP],
+				portid, &pkts_burst[j], &dst_port[j]);
+
+	/* Classify last up to 3 packets one by one */
+	switch (nb_rx % FWDSTEP) {
+	case 3:
+		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+		j++;
+		/* fall-through */
+	case 2:
+		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+		j++;
+		/* fall-through */
+	case 1:
+		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+		j++;
+		/* fall-through */
+	}
+
+	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+}
+
+#endif /* __L3FWD_LPM_S390X_H__ */
diff --git a/examples/l3fwd/l3fwd_s390x.h b/examples/l3fwd/l3fwd_s390x.h
new file mode 100644
index 0000000000..d027092a49
--- /dev/null
+++ b/examples/l3fwd/l3fwd_s390x.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+#ifndef _L3FWD_S390X_H_
+#define _L3FWD_S390X_H_
+
+#include "l3fwd.h"
+#include "l3fwd_common.h"
+
+#define vec_sro(a, b) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
+typedef unsigned int vector_unsigned_int
+	__attribute__((vector_size(4*sizeof(unsigned int))));
+typedef unsigned short vector_unsigned_short
+	__attribute__((vector_size(8*sizeof(unsigned short))));
+
+/*
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
+{
+	vector_unsigned_int te[FWDSTEP];
+	vector_unsigned_int ve[FWDSTEP];
+	vector_unsigned_int *p[FWDSTEP];
+
+	p[0] = rte_pktmbuf_mtod(pkt[0], vector_unsigned_int *);
+	p[1] = rte_pktmbuf_mtod(pkt[1], vector_unsigned_int *);
+	p[2] = rte_pktmbuf_mtod(pkt[2], vector_unsigned_int *);
+	p[3] = rte_pktmbuf_mtod(pkt[3], vector_unsigned_int *);
+
+	ve[0] = (vector_unsigned_int)val_eth[dst_port[0]];
+	te[0] = *p[0];
+
+	ve[1] = (vector_unsigned_int)val_eth[dst_port[1]];
+	te[1] = *p[1];
+
+	ve[2] = (vector_unsigned_int)val_eth[dst_port[2]];
+	te[2] = *p[2];
+
+	ve[3] = (vector_unsigned_int)val_eth[dst_port[3]];
+	te[3] = *p[3];
+
+	/* Update first 12 bytes, keep rest bytes intact. */
+	te[0] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[0],
+			(vector_unsigned_short)te[0],
+			(vector_unsigned_short) {0, 0, 0, 0,
+						0, 0, 0xffff, 0xffff});
+
+	te[1] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[1],
+			(vector_unsigned_short)te[1],
+			(vector_unsigned_short) {0, 0, 0, 0,
+						0, 0, 0xffff, 0xffff});
+
+	te[2] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[2],
+			(vector_unsigned_short)te[2],
+			(vector_unsigned_short) {0, 0, 0, 0, 0,
+						0, 0xffff, 0xffff});
+
+	te[3] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[3],
+			(vector_unsigned_short)te[3],
+			(vector_unsigned_short) {0, 0, 0, 0,
+						0, 0, 0xffff, 0xffff});
+
+	*p[0] = te[0];
+	*p[1] = te[1];
+	*p[2] = te[2];
+	*p[3] = te[3];
+
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
+		&dst_port[0], pkt[0]->packet_type);
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
+		&dst_port[1], pkt[1]->packet_type);
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
+		&dst_port[2], pkt[2]->packet_type);
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
+		&dst_port[3], pkt[3]->packet_type);
+}
+
+/*
+ * Group consecutive packets with the same destination port in bursts of 4.
+ * Suppose we have array of destination ports:
+ * dst_port[] = {a, b, c, d,, e, ... }
+ * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
+ * We doing 4 comparisons at once and the result is 4 bit mask.
+ * This mask is used as an index into prebuild array of pnum values.
+ */
+static inline uint16_t *
+port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, vector_unsigned_short dp1,
+	vector_unsigned_short dp2)
+{
+	union {
+		uint16_t u16[FWDSTEP + 1];
+		uint64_t u64;
+	} *pnum = (void *)pn;
+
+	int32_t v;
+
+	v = vec_any_eq(dp1, dp2);
+
+
+	/* update last port counter. */
+	lp[0] += gptbl[v].lpv;
+
+	/* if dest port value has changed. */
+	if (v != GRPMSK) {
+		pnum->u64 = gptbl[v].pnum;
+		pnum->u16[FWDSTEP] = 1;
+		lp = pnum->u16 + gptbl[v].idx;
+	}
+
+	return lp;
+}
+
+/**
+ * Process one packet:
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
+{
+	struct ether_hdr *eth_hdr;
+	vector_unsigned_int te, ve;
+
+	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+
+	te = *(vector_unsigned_int *)eth_hdr;
+	ve = (vector_unsigned_int)val_eth[dst_port[0]];
+
+	rfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port,
+			pkt->packet_type);
+
+	/* dynamically vec_sel te and ve for MASK_ETH (0x3f) */
+	te = (vector_unsigned_int)vec_sel(
+		(vector_unsigned_short)ve,
+		(vector_unsigned_short)te,
+		(vector_unsigned_short){0, 0, 0, 0,
+					0, 0, 0xffff, 0xffff});
+
+	*(vector_unsigned_int *)eth_hdr = te;
+}
+
+/**
+ * Send packets burst from pkts_burst to the ports in dst_port array
+ */
+static __rte_always_inline void
+send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
+		uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+{
+	int32_t k;
+	int j = 0;
+	uint16_t dlp;
+	uint16_t *lp;
+	uint16_t pnum[MAX_PKT_BURST + 1];
+
+	/*
+	 * Finish packet processing and group consecutive
+	 * packets with the same destination port.
+	 */
+	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+	if (k != 0) {
+		vector_unsigned_short dp1, dp2;
+
+		lp = pnum;
+		lp[0] = 1;
+
+		processx4_step3(pkts_burst, dst_port);
+
+		/* dp1: <d[0], d[1], d[2], d[3], ... > */
+		dp1 = *(vector_unsigned_short *)dst_port;
+
+		for (j = FWDSTEP; j != k; j += FWDSTEP) {
+			processx4_step3(&pkts_burst[j], &dst_port[j]);
+
+			/*
+			 * dp2:
+			 * <d[j-3], d[j-2], d[j-1], d[j], ... >
+			 */
+			dp2 = *((vector_unsigned_short *)
+					&dst_port[j - FWDSTEP + 1]);
+			lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+			/*
+			 * dp1:
+			 * <d[j], d[j+1], d[j+2], d[j+3], ... >
+			 */
+			dp1 = vec_sro(dp2, (vector unsigned char) {
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, (FWDSTEP - 1) * sizeof(dst_port[0])});
+		}
+
+		/*
+		 * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
+		 */
+		dp2 = vec_perm(dp1, (vector_unsigned_short){},
+				(vector unsigned char) {0xf9});
+		lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+		/*
+		 * remove values added by the last repeated
+		 * dst port.
+		 */
+		lp[0]--;
+		dlp = dst_port[j - 1];
+	} else {
+		/* set dlp and lp to the never used values. */
+		dlp = BAD_PORT - 1;
+		lp = pnum + MAX_PKT_BURST;
+	}
+
+	/* Process up to last 3 packets one by one. */
+	switch (nb_rx % FWDSTEP) {
+	case 3:
+		process_packet(pkts_burst[j], dst_port + j);
+		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+		j++;
+		/* fall-through */
+	case 2:
+		process_packet(pkts_burst[j], dst_port + j);
+		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+		j++;
+		/* fall-through */
+	case 1:
+		process_packet(pkts_burst[j], dst_port + j);
+		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+		j++;
+	}
+
+	/*
+	 * Send packets out, through destination port.
+	 * Consecutive packets with the same destination port
+	 * are already grouped together.
+	 * If destination port for the packet equals BAD_PORT,
+	 * then free the packet without sending it out.
+	 */
+	for (j = 0; j < nb_rx; j += k) {
+
+		int32_t m;
+		uint16_t pn;
+
+		pn = dst_port[j];
+		k = pnum[j];
+
+		if (likely(pn != BAD_PORT))
+			send_packetsx4(qconf, pn, pkts_burst + j, k);
+		else
+			for (m = j; m != j + k; m++)
+				rte_pktmbuf_free(pkts_burst[m]);
+
+	}
+}
+
+#endif /* _L3FWD_S390X_H_ */
diff --git a/lib/acl/acl_bld.c b/lib/acl/acl_bld.c
index 7ea30f4186..04f5f0a820 100644
--- a/lib/acl/acl_bld.c
+++ b/lib/acl/acl_bld.c
@@ -777,6 +777,9 @@ acl_build_reset(struct rte_acl_ctx *ctx)
 		sizeof(*ctx) - offsetof(struct rte_acl_ctx, num_categories));
 }
 
+
+
+
 static void
 acl_gen_full_range(struct acl_build_context *context, struct rte_acl_node *root,
 	struct rte_acl_node *end, int size, int level)
diff --git a/lib/acl/acl_gen.c b/lib/acl/acl_gen.c
index e759a2ca15..a3c31b0dc9 100644
--- a/lib/acl/acl_gen.c
+++ b/lib/acl/acl_gen.c
@@ -360,7 +360,16 @@ acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
 		array_ptr = &node_array[index->quad_index];
 		acl_add_ptrs(node, array_ptr, no_match, 0);
 		qtrp = (uint32_t *)node->transitions;
+
+		/* Swap qtrp on big endian that transitions[0]
+		 * is at least signifcant byte.
+		 */
+#if __BYTE_ORDER == __ORDER_BIG_ENDIAN__
+		node->node_index = __bswap_32(qtrp[0]);
+#else
 		node->node_index = qtrp[0];
+#endif
+
 		node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
 		node->node_index |= index->quad_index | node->node_type;
 		index->quad_index += node->fanout;
diff --git a/lib/acl/acl_run_scalar.c b/lib/acl/acl_run_scalar.c
index 3d61e79409..9f01ef8d8c 100644
--- a/lib/acl/acl_run_scalar.c
+++ b/lib/acl/acl_run_scalar.c
@@ -141,6 +141,14 @@ rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
 		input0 = GET_NEXT_4BYTES(parms, 0);
 		input1 = GET_NEXT_4BYTES(parms, 1);
 
+		/* input needs to be swapped because the rules get
+		 * swapped while building the trie.
+		 */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+		input0 = __bswap_32(input0);
+		input1 = __bswap_32(input1);
+#endif
+
 		for (n = 0; n < 4; n++) {
 
 			transition0 = scalar_transition(flows.trans,
diff --git a/lib/acl/rte_acl.c b/lib/acl/rte_acl.c
index a61c3ba188..ae42ea5b54 100644
--- a/lib/acl/rte_acl.c
+++ b/lib/acl/rte_acl.c
@@ -101,6 +101,8 @@ static const rte_acl_classify_t classify_fns[] = {
 	[RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
 	[RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
 	[RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
+	/* use scalar for s390x for now */
+	[RTE_ACL_CLASSIFY_S390X] = rte_acl_classify_scalar,
 	[RTE_ACL_CLASSIFY_AVX512X16] = rte_acl_classify_avx512x16,
 	[RTE_ACL_CLASSIFY_AVX512X32] = rte_acl_classify_avx512x32,
 };
@@ -145,6 +147,27 @@ acl_check_alg_ppc(enum rte_acl_classify_alg alg)
 	return -EINVAL;
 }
 
+
+
+/*
+ * Helper function for acl_check_alg.
+ * Check support for PPC specific classify methods.
+ */
+static int
+acl_check_alg_s390x(enum rte_acl_classify_alg alg)
+{
+    if (alg == RTE_ACL_CLASSIFY_S390X) {
+#if defined(RTE_ARCH_S390X)
+        if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) 
+			return 0;
+#endif
+        return -ENOTSUP;
+    }
+
+    return -EINVAL;
+}
+
+
 #ifdef CC_AVX512_SUPPORT
 static int
 acl_check_avx512_cpu_flags(void)
@@ -216,6 +239,8 @@ acl_check_alg(enum rte_acl_classify_alg alg)
 		return acl_check_alg_arm(alg);
 	case RTE_ACL_CLASSIFY_ALTIVEC:
 		return acl_check_alg_ppc(alg);
+    case RTE_ACL_CLASSIFY_S390X:
+        return acl_check_alg_s390x(alg);
 	case RTE_ACL_CLASSIFY_AVX512X32:
 	case RTE_ACL_CLASSIFY_AVX512X16:
 	case RTE_ACL_CLASSIFY_AVX2:
@@ -244,6 +269,8 @@ acl_get_best_alg(void)
 		RTE_ACL_CLASSIFY_NEON,
 #elif defined(RTE_ARCH_PPC_64)
 		RTE_ACL_CLASSIFY_ALTIVEC,
+#elif defined(RTE_ARCH_S390X)
+        RTE_ACL_CLASSIFY_S390X,
 #elif defined(RTE_ARCH_X86)
 		RTE_ACL_CLASSIFY_AVX512X32,
 		RTE_ACL_CLASSIFY_AVX512X16,
diff --git a/lib/acl/rte_acl.h b/lib/acl/rte_acl.h
index f7f5f08701..307a78ceac 100644
--- a/lib/acl/rte_acl.h
+++ b/lib/acl/rte_acl.h
@@ -241,8 +241,9 @@ enum rte_acl_classify_alg {
 	RTE_ACL_CLASSIFY_AVX2 = 3,    /**< requires AVX2 support. */
 	RTE_ACL_CLASSIFY_NEON = 4,    /**< requires NEON support. */
 	RTE_ACL_CLASSIFY_ALTIVEC = 5,    /**< requires ALTIVEC support. */
-	RTE_ACL_CLASSIFY_AVX512X16 = 6,  /**< requires AVX512 support. */
-	RTE_ACL_CLASSIFY_AVX512X32 = 7,  /**< requires AVX512 support. */
+    RTE_ACL_CLASSIFY_S390X = 6,    /**< requires s390x z13 support. */
+	RTE_ACL_CLASSIFY_AVX512X16 = 7,  /**< requires AVX512 support. */
+	RTE_ACL_CLASSIFY_AVX512X32 = 8,  /**< requires AVX512 support. */
 };
 
 /**
diff --git a/lib/eal/s390x/include/meson.build b/lib/eal/s390x/include/meson.build
new file mode 100644
index 0000000000..b4561d6a82
--- /dev/null
+++ b/lib/eal/s390x/include/meson.build
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# (c) Copyright IBM Corp. 2018, 2019
+
+install_headers(
+	'rte_atomic.h',
+	'rte_byteorder.h',
+	'rte_cpuflags.h',
+	'rte_cycles.h',
+	'rte_io.h',
+	'rte_memcpy.h',
+	'rte_pause.h',
+	'rte_prefetch.h',
+	'rte_rwlock.h',
+	'rte_spinlock.h',
+	'rte_vect.h',
+	subdir: get_option('include_subdir_arch'))
diff --git a/lib/eal/s390x/include/rte_atomic.h b/lib/eal/s390x/include/rte_atomic.h
new file mode 100644
index 0000000000..5fce6d5f07
--- /dev/null
+++ b/lib/eal/s390x/include/rte_atomic.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_ATOMIC_S390X_H_
+#define _RTE_ATOMIC_S390X_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+//#define dsb(opt) asm volatile("" : : : "memory")
+//#define dmb(opt) asm volatile("" : : : "memory")
+
+#define rte_mb() rte_compiler_barrier() //asm volatile("" : : : "memory")
+
+#define rte_wmb() rte_mb()
+
+#define rte_rmb() rte_mb()
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_wmb()
+
+#define rte_smp_rmb() rte_rmb()
+
+#define rte_io_mb() rte_mb()
+
+#define rte_io_wmb() rte_wmb()
+
+#define rte_io_rmb() rte_rmb()
+
+#define rte_cio_wmb() rte_wmb()
+
+#define rte_cio_rmb() rte_rmb()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_byteorder.h b/lib/eal/s390x/include/rte_byteorder.h
new file mode 100644
index 0000000000..de6e410b4b
--- /dev/null
+++ b/lib/eal/s390x/include/rte_byteorder.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+/* Inspired from FreeBSD src/sys/powerpc/include/endian.h
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California.  All rights reserved.
+ */
+
+#ifndef _RTE_BYTEORDER_S390X_H_
+#define _RTE_BYTEORDER_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include "generic/rte_byteorder.h"
+
+/* s390x is big endian
+ */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_cpuflags.h b/lib/eal/s390x/include/rte_cpuflags.h
new file mode 100644
index 0000000000..bfeff3f98b
--- /dev/null
+++ b/lib/eal/s390x/include/rte_cpuflags.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_CPUFLAGS_S390X_H_
+#define _RTE_CPUFLAGS_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+	RTE_CPUFLAG_ESAN3 = 0,
+	RTE_CPUFLAG_ZARCH,
+	RTE_CPUFLAG_STFLE,
+	RTE_CPUFLAG_MSA,
+	RTE_CPUFLAG_LDISP,
+	RTE_CPUFLAG_EIMM,
+	RTE_CPUFLAG_DFP,
+	RTE_CPUFLAG_HPAGE, //from elf.h
+	//RTE_CPUFLAG_EDAT, //from hwcap.h
+	RTE_CPUFLAG_ETF3EH,
+	RTE_CPUFLAG_HIGH_GPRS,
+	RTE_CPUFLAG_TE,
+	RTE_CPUFLAG_VXRS,
+	RTE_CPUFLAG_VXRS_BCD,
+	RTE_CPUFLAG_VXRS_EXT,
+	RTE_CPUFLAG_GS,
+	/* The last item */
+	RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_cycles.h b/lib/eal/s390x/include/rte_cycles.h
new file mode 100644
index 0000000000..7a430e06a8
--- /dev/null
+++ b/lib/eal/s390x/include/rte_cycles.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_CYCLES_S390X_H_
+#define _RTE_CYCLES_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+#include <rte_common.h>
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+	uint64_t tsc;
+	asm volatile("stckf %0" : "=Q"(tsc) : : "cc");
+	return tsc;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+	rte_mb();
+	return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_io.h b/lib/eal/s390x/include/rte_io.h
new file mode 100644
index 0000000000..9cb3c1ca7c
--- /dev/null
+++ b/lib/eal/s390x/include/rte_io.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_IO_S390X_H_
+#define _RTE_IO_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_OVERRIDE_IO_H
+
+#include "generic/rte_io.h"
+
+#include <unistd.h>
+#include <sys/syscall.h>
+
+union register_pair {
+	__int128_t pair;
+	struct {
+		unsigned long even;
+		unsigned long odd;
+	} even_odd;
+};
+
+/* s390 requires special instructions to access IO memory. */
+static inline uint64_t pcilgi(const volatile void *ioaddr, size_t len)
+{
+        union register_pair ioaddr_len =
+                {.even_odd.even = (uint64_t)ioaddr, .even_odd.odd = len};
+	uint64_t val;
+	int cc = -1;
+
+	asm volatile (
+		"       .insn   rre,0xb9d60000,%[val],%[ioaddr_len]\n"
+		"       ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		: [cc] "+d" (cc), [val] "=d" (val),
+		  [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
+	return val;
+}
+
+static inline void pcistgi(volatile void *ioaddr, uint64_t val, size_t len)
+{
+        union register_pair ioaddr_len =
+                {.even_odd.even = (uint64_t)ioaddr, .even_odd.odd = len};
+	int cc = -1;
+
+	asm volatile (
+		"       .insn   rre,0xb9d40000,%[val],%[ioaddr_len]\n"
+		"       ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		: [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+		: [val] "d" (val)
+		: "cc", "memory");
+}
+
+/* fall back to syscall on old machines ? */
+static __rte_always_inline uint8_t
+rte_read8_relaxed(const volatile void *addr)
+{
+	return pcilgi(addr, 1);
+}
+
+static __rte_always_inline uint16_t
+rte_read16_relaxed(const volatile void *addr)
+{
+	return pcilgi(addr, 2);
+}
+
+static __rte_always_inline uint32_t
+rte_read32_relaxed(const volatile void *addr)
+{
+	return pcilgi(addr, 4);
+}
+
+static __rte_always_inline uint64_t
+rte_read64_relaxed(const volatile void *addr)
+{
+	return pcilgi(addr, 8);
+}
+
+static __rte_always_inline void
+rte_write8_relaxed(uint8_t value, volatile void *addr)
+{
+	pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline void
+rte_write16_relaxed(uint16_t value, volatile void *addr)
+{
+	pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline void
+rte_write32_relaxed(uint32_t value, volatile void *addr)
+{
+	pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline void
+rte_write64_relaxed(uint64_t value, volatile void *addr)
+{
+	pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline uint8_t
+rte_read8(const volatile void *addr)
+{
+	uint8_t val;
+	val = rte_read8_relaxed(addr);
+	rte_io_rmb();
+	return val;
+}
+
+static __rte_always_inline uint16_t
+rte_read16(const volatile void *addr)
+{
+	uint16_t val;
+	val = rte_read16_relaxed(addr);
+	rte_io_rmb();
+	return val;
+}
+
+static __rte_always_inline uint32_t
+rte_read32(const volatile void *addr)
+{
+	uint32_t val;
+	val = rte_read32_relaxed(addr);
+	rte_io_rmb();
+	return val;
+}
+
+static __rte_always_inline uint64_t
+rte_read64(const volatile void *addr)
+{
+	uint64_t val;
+	val = rte_read64_relaxed(addr);
+	rte_io_rmb();
+	return val;
+}
+
+static __rte_always_inline void
+rte_write8(uint8_t value, volatile void *addr)
+{
+	rte_io_wmb();
+	rte_write8_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write16(uint16_t value, volatile void *addr)
+{
+	rte_io_wmb();
+	rte_write16_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32(uint32_t value, volatile void *addr)
+{
+	rte_io_wmb();
+	rte_write32_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr)
+{
+    rte_write32(value, addr);
+}
+
+static __rte_always_inline void
+rte_write64(uint64_t value, volatile void *addr)
+{
+	rte_io_wmb();
+	rte_write64_relaxed(value, addr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IO_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_mcslock.h b/lib/eal/s390x/include/rte_mcslock.h
new file mode 100644
index 0000000000..9125237dfd
--- /dev/null
+++ b/lib/eal/s390x/include/rte_mcslock.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_MCSLOCK_S390X_H_
+#define _RTE_MCSLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_mcslock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MCSLOCK_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_memcpy.h b/lib/eal/s390x/include/rte_memcpy.h
new file mode 100644
index 0000000000..1135b1af6f
--- /dev/null
+++ b/lib/eal/s390x/include/rte_memcpy.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_MEMCPY_S390X_H_
+#define _RTE_MEMCPY_S390X_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 16);
+}
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 32);
+}
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 48);
+}
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 64);
+}
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 128);
+}
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 256);
+}
+#define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_pause.h b/lib/eal/s390x/include/rte_pause.h
new file mode 100644
index 0000000000..be90ce6a1f
--- /dev/null
+++ b/lib/eal/s390x/include/rte_pause.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_PAUSE_S390X_H_
+#define _RTE_PAUSE_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pause.h"
+
+static inline void rte_pause(void)
+{
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_power_intrinsics.h b/lib/eal/s390x/include/rte_power_intrinsics.h
new file mode 100644
index 0000000000..c0e9ac279f
--- /dev/null
+++ b/lib/eal/s390x/include/rte_power_intrinsics.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_POWER_INTRINSIC_PPC_H_
+#define _RTE_POWER_INTRINSIC_PPC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+#include "generic/rte_power_intrinsics.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_POWER_INTRINSIC_PPC_H_ */
diff --git a/lib/eal/s390x/include/rte_prefetch.h b/lib/eal/s390x/include/rte_prefetch.h
new file mode 100644
index 0000000000..4a2e73116d
--- /dev/null
+++ b/lib/eal/s390x/include/rte_prefetch.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_PREFETCH_S390X_H_
+#define _RTE_PREFETCH_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+	asm volatile ("pfd 1, 0(%[p])" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+	asm volatile ("pfd 1, 0(%[p])" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+	asm volatile ("pfd 1, 0(%[p])" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	/* non-temporal version not available, fallback to rte_prefetch0 */
+	rte_prefetch0(p);
+}
+
+__rte_experimental
+static inline void rte_cldemote(const volatile void *p)
+{
+    RTE_SET_USED(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_rwlock.h b/lib/eal/s390x/include/rte_rwlock.h
new file mode 100644
index 0000000000..f649484f35
--- /dev/null
+++ b/lib/eal/s390x/include/rte_rwlock.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_RWLOCK_S390X_H_
+#define _RTE_RWLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_spinlock.h b/lib/eal/s390x/include/rte_spinlock.h
new file mode 100644
index 0000000000..0434864fbc
--- /dev/null
+++ b/lib/eal/s390x/include/rte_spinlock.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_SPINLOCK_S390X_H_
+#define _RTE_SPINLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+#ifndef RTE_FORCE_INTRINSICS
+
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+	while (__sync_lock_test_and_set(&sl->locked, 1))
+		while (sl->locked)
+			rte_pause();
+}
+
+static inline void
+rte_spinlock_unlock(rte_spinlock_t *sl)
+{
+	__sync_lock_release(&sl->locked);
+}
+
+static inline int
+rte_spinlock_trylock(rte_spinlock_t *sl)
+{
+	return __sync_lock_test_and_set(&sl->locked, 1) == 0;
+}
+
+#endif
+
+
+static inline int rte_tm_supported(void)
+{
+	return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+	return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+	return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_ticketlock.h b/lib/eal/s390x/include/rte_ticketlock.h
new file mode 100644
index 0000000000..0785363c94
--- /dev/null
+++ b/lib/eal/s390x/include/rte_ticketlock.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2019
+ */
+
+#ifndef _RTE_TICKETLOCK_S390X_H_
+#define _RTE_TICKETLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_ticketlock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TICKETLOCK_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_vect.h b/lib/eal/s390x/include/rte_vect.h
new file mode 100644
index 0000000000..8fe3535965
--- /dev/null
+++ b/lib/eal/s390x/include/rte_vect.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_VECT_S390X_H_
+#define _RTE_VECT_S390X_H_
+
+#include <vecintrin.h>
+#include "generic/rte_vect.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_256
+
+typedef int xmm_t __attribute__((vector_size(4*sizeof(int))));
+
+#define	XMM_SIZE	(sizeof(xmm_t))
+#define	XMM_MASK	(XMM_SIZE - 1)
+
+typedef union rte_xmm {
+	xmm_t    x;
+	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
+	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+	double   pd[XMM_SIZE / sizeof(double)];
+} __attribute__((aligned(16))) rte_xmm_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VECT_S390X_H_ */
diff --git a/lib/eal/s390x/meson.build b/lib/eal/s390x/meson.build
new file mode 100644
index 0000000000..c8cc8d1f3d
--- /dev/null
+++ b/lib/eal/s390x/meson.build
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# (c) Copyright IBM Corp. 2018, 2019
+
+subdir('include')
+
+# 19.xx zarch patches lib/librte_eal/common/arch/s390x/meson.build:
+# var was: eal_common_arch_sources
+#
+sources += files(
+        'rte_cpuflags.c',
+        'rte_cycles.c',
+        'rte_hypervisor.c',
+        'rte_power_intrinsics.c',
+)
+
+
diff --git a/lib/eal/s390x/rte_cpuflags.c b/lib/eal/s390x/rte_cpuflags.c
new file mode 100644
index 0000000000..d57a51d267
--- /dev/null
+++ b/lib/eal/s390x/rte_cpuflags.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP  16
+#define AT_HWCAP2 26
+
+/* software based registers */
+enum cpu_register_t {
+	REG_NONE = 0,
+	REG_HWCAP,
+	REG_HWCAP2,
+	REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+struct feature_entry {
+	uint32_t reg;
+	uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+	char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+	[RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+const struct feature_entry rte_cpu_feature_table[] = {
+	FEAT_DEF(ESAN3,                  REG_HWCAP,   0)
+	FEAT_DEF(ZARCH,                  REG_HWCAP,   1)
+	FEAT_DEF(STFLE,                  REG_HWCAP,   2)
+	FEAT_DEF(MSA,                    REG_HWCAP,   3)
+	FEAT_DEF(LDISP,                  REG_HWCAP,   4)
+	FEAT_DEF(EIMM,                   REG_HWCAP,   5)
+	FEAT_DEF(DFP,                    REG_HWCAP,   6)
+	FEAT_DEF(HPAGE,                  REG_HWCAP,   7)
+	FEAT_DEF(ETF3EH,                 REG_HWCAP,   8)
+	FEAT_DEF(HIGH_GPRS,              REG_HWCAP,   9)
+	FEAT_DEF(TE,                     REG_HWCAP,  10)
+	FEAT_DEF(VXRS,                   REG_HWCAP,  11)
+	FEAT_DEF(VXRS_BCD,               REG_HWCAP,  12)
+	FEAT_DEF(VXRS_EXT,               REG_HWCAP,  13)
+	FEAT_DEF(GS,                     REG_HWCAP,  14)
+};
+
+/*
+ * Read AUXV software register and get cpu features for Power
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+	out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+	out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+	const struct feature_entry *feat;
+	hwcap_registers_t regs = {0};
+
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		return -ENOENT;
+
+	feat = &rte_cpu_feature_table[feature];
+	if (feat->reg == REG_NONE)
+		return -EFAULT;
+
+	rte_cpu_get_features(regs);
+	return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		return NULL;
+	return rte_cpu_feature_table[feature].name;
+}
diff --git a/lib/eal/s390x/rte_cycles.c b/lib/eal/s390x/rte_cycles.c
new file mode 100644
index 0000000000..b29c4454a1
--- /dev/null
+++ b/lib/eal/s390x/rte_cycles.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#include "eal_private.h"
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+	return 0;
+}
diff --git a/lib/eal/s390x/rte_hypervisor.c b/lib/eal/s390x/rte_hypervisor.c
new file mode 100644
index 0000000000..22b0c5cc47
--- /dev/null
+++ b/lib/eal/s390x/rte_hypervisor.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#include "rte_hypervisor.h"
+
+enum rte_hypervisor
+rte_hypervisor_get(void)
+{
+	return RTE_HYPERVISOR_UNKNOWN;
+}
diff --git a/lib/eal/s390x/rte_power_intrinsics.c b/lib/eal/s390x/rte_power_intrinsics.c
new file mode 100644
index 0000000000..f00b58ade5
--- /dev/null
+++ b/lib/eal/s390x/rte_power_intrinsics.c
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include "rte_power_intrinsics.h"
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_monitor(const struct rte_power_monitor_cond *pmc,
+		const uint64_t tsc_timestamp)
+{
+	RTE_SET_USED(pmc);
+	RTE_SET_USED(tsc_timestamp);
+
+	return -ENOTSUP;
+}
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_pause(const uint64_t tsc_timestamp)
+{
+	RTE_SET_USED(tsc_timestamp);
+
+	return -ENOTSUP;
+}
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_monitor_wakeup(const unsigned int lcore_id)
+{
+	RTE_SET_USED(lcore_id);
+
+	return -ENOTSUP;
+}
+
+int
+rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+		const uint32_t num, const uint64_t tsc_timestamp)
+{
+	RTE_SET_USED(pmc);
+	RTE_SET_USED(num);
+	RTE_SET_USED(tsc_timestamp);
+
+	return -ENOTSUP;
+}
diff --git a/lib/hash/rte_fbk_hash.h b/lib/hash/rte_fbk_hash.h
index b01126999b..956d3f90f9 100644
--- a/lib/hash/rte_fbk_hash.h
+++ b/lib/hash/rte_fbk_hash.h
@@ -123,9 +123,16 @@ rte_fbk_hash_add_key_with_bucket(struct rte_fbk_hash_table *ht,
 	 * corrupted due to race conditions, but it's still possible to
 	 * overwrite entries that have just been made valid.
 	 */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 	const uint64_t new_entry = ((uint64_t)(key) << 32) |
 			((uint64_t)(value) << 16) |
 			1;  /* 1 = is_entry bit. */
+	#else
+	const uint64_t new_entry =
+			((uint64_t)(1) << 48) | /* 1 = is_entry bit. */
+			((uint64_t)(value) << 32) |
+			(uint64_t)(key);
+	#endif
 	uint32_t i;
 
 	for (i = 0; i < ht->entries_per_bucket; i++) {
diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build
index 78d91d3421..20f76368fa 100644
--- a/lib/lpm/meson.build
+++ b/lib/lpm/meson.build
@@ -13,6 +13,7 @@ headers = files('rte_lpm.h', 'rte_lpm6.h')
 # without worrying about which architecture we actually need
 indirect_headers += files(
         'rte_lpm_altivec.h',
+        'rte_lpm_s390x.h',
         'rte_lpm_neon.h',
         'rte_lpm_sse.h',
         'rte_lpm_sve.h',
diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h
index eb91960e81..b9ee616c1d 100644
--- a/lib/lpm/rte_lpm.h
+++ b/lib/lpm/rte_lpm.h
@@ -405,6 +405,8 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 #endif
 #elif defined(RTE_ARCH_PPC_64)
 #include "rte_lpm_altivec.h"
+#elif defined(RTE_ARCH_S390X)
+#include "rte_lpm_s390x.h"
 #else
 #include "rte_lpm_sse.h"
 #endif
diff --git a/lib/lpm/rte_lpm6.c b/lib/lpm/rte_lpm6.c
index 8d21aeddb8..4a0f5740a2 100644
--- a/lib/lpm/rte_lpm6.c
+++ b/lib/lpm/rte_lpm6.c
@@ -18,6 +18,7 @@
 #include <assert.h>
 #include <rte_jhash.h>
 #include <rte_tailq.h>
+#include <rte_byteorder.h>
 
 #include "rte_lpm6.h"
 
@@ -52,6 +53,8 @@ static struct rte_tailq_elem rte_lpm6_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_lpm6_tailq)
 
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
 /** Tbl entry structure. It is the same for both tbl24 and tbl8 */
 struct rte_lpm6_tbl_entry {
 	uint32_t next_hop:	21;  /**< Next hop / next table to be checked. */
@@ -63,6 +66,21 @@ struct rte_lpm6_tbl_entry {
 	uint32_t ext_entry :1;   /**< External entry. */
 };
 
+#else
+
+struct rte_lpm6_tbl_entry {
+
+	/* Flags. */
+	uint32_t ext_entry :1;   /**< External entry. */
+	uint32_t valid_group :1; /**< Group validation flag. */
+	uint32_t valid     :1;   /**< Validation flag. */
+
+	uint32_t depth	:8;      /**< Rule depth. */
+	uint32_t next_hop:	21;  /**< Next hop / next table to be checked. */
+};
+
+#endif
+
 /** Rules tbl entry structure. */
 struct rte_lpm6_rule {
 	uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
diff --git a/lib/lpm/rte_lpm_s390x.h b/lib/lpm/rte_lpm_s390x.h
new file mode 100644
index 0000000000..eb1fdd4509
--- /dev/null
+++ b/lib/lpm/rte_lpm_s390x.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2016, 2018
+ */
+
+#ifndef _RTE_LPM_S390X_H_
+#define _RTE_LPM_S390X_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+	uint32_t defv)
+{
+	typedef int vector_signed_int
+		__attribute__((vector_size(4*sizeof(int))));
+	vector_signed_int i24;
+	rte_xmm_t i8;
+	uint32_t tbl[4];
+	uint64_t idx, pt, pt2;
+	const uint32_t *ptbl;
+
+	const uint32_t mask = UINT8_MAX;
+	const vector_signed_int mask8 = (xmm_t){mask, mask, mask, mask};
+
+	/*
+	 * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries
+	 * as one 64-bit value (0x0300000003000000).
+	 */
+	const uint64_t mask_xv =
+		((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK |
+		(uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32);
+
+	/*
+	 * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries
+	 * as one 64-bit value (0x0100000001000000).
+	 */
+	const uint64_t mask_v =
+		((uint64_t)RTE_LPM_LOOKUP_SUCCESS |
+		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32);
+
+	/* get 4 indexes for tbl24[]. */
+	i24[0] = (uint32_t)ip[0] >> 8;
+	i24[1] = (uint32_t)ip[1] >> 8;
+	i24[2] = (uint32_t)ip[2] >> 8;
+	i24[3] = (uint32_t)ip[3] >> 8;
+
+	/* extract values from tbl24[] */
+	idx = (uint32_t)i24[0];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[0] = *ptbl;
+
+	idx = (uint32_t) i24[1];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[1] = *ptbl;
+
+	idx = (uint32_t) i24[2];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[2] = *ptbl;
+
+	idx = (uint32_t) i24[3];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[3] = *ptbl;
+
+	/* get 4 indexes for tbl8[]. */
+	i8.x = vec_and(ip, mask8);
+
+	pt = (uint64_t)tbl[0] |
+		(uint64_t)tbl[1] << 32;
+	pt2 = (uint64_t)tbl[2] |
+		(uint64_t)tbl[3] << 32;
+
+	/* search successfully finished for all 4 IP addresses. */
+	if (likely((pt & mask_xv) == mask_v) &&
+			likely((pt2 & mask_xv) == mask_v)) {
+		*(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES;
+		*(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES;
+		return;
+	}
+
+	if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[0] = i8.u32[0] +
+			(uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]];
+		tbl[0] = *ptbl;
+	}
+	if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[1] = i8.u32[1] +
+			(uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]];
+		tbl[1] = *ptbl;
+	}
+	if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[2] = i8.u32[2] +
+			(uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]];
+		tbl[2] = *ptbl;
+	}
+	if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[3] = i8.u32[3] +
+			(uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]];
+		tbl[3] = *ptbl;
+	}
+
+	hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv;
+	hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv;
+	hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv;
+	hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_S390X_H_ */
diff --git a/meson.build b/meson.build
index 937f6110c0..8c8d673609 100644
--- a/meson.build
+++ b/meson.build
@@ -50,6 +50,8 @@ elif host_machine.cpu_family().startswith('arm') or host_machine.cpu_family().st
     arch_subdir = 'arm'
 elif host_machine.cpu_family().startswith('ppc')
     arch_subdir = 'ppc'
+elif host_machine.cpu_family().startswith('s390x')
+	arch_subdir = 's390x'
 endif
 
 # configure the build, and make sure configs here and in config folder are
-- 
2.37.2


             reply	other threads:[~2022-10-28 21:52 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-28 21:52 David Miller [this message]
2022-10-28 22:45 ` Stephen Hemminger
2022-10-28 22:54   ` David Miller
2022-10-28 22:51 ` Stephen Hemminger
2023-07-06 22:47 ` Stephen Hemminger
2023-07-06 22:49 ` Stephen Hemminger
2023-07-07  0:04   ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221028215240.103365-1-dmiller423@gmail.com \
    --to=dmiller423@gmail.com \
    --cc=aman.deep.singh@intel.com \
    --cc=beilei.xing@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=dmitry.kozliuk@gmail.com \
    --cc=haiyue.wang@intel.com \
    --cc=hkalra@marvell.com \
    --cc=jgrajcia@cisco.com \
    --cc=konstantin.ananyev@intel.com \
    --cc=matan@nvidia.com \
    --cc=nhorman@tuxdriver.com \
    --cc=olivier.matz@6wind.com \
    --cc=sameh.gobriel@intel.com \
    --cc=tardis@us.ibm.com \
    --cc=viacheslavo@nvidia.com \
    --cc=vladimir.medvedkin@intel.com \
    --cc=xiaoyun.li@intel.com \
    --cc=yipeng1.wang@intel.com \
    --cc=yuying.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).