From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id EB31F43B0C;
	Tue, 13 Feb 2024 17:58:47 +0100 (CET)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id D74DA42E11;
	Tue, 13 Feb 2024 17:58:47 +0100 (CET)
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.17])
 by mails.dpdk.org (Postfix) with ESMTP id C6E3C42E0C
 for <dev@dpdk.org>; Tue, 13 Feb 2024 17:58:45 +0100 (CET)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1707843526; x=1739379526;
 h=from:to:subject:date:message-id:in-reply-to:references:
 mime-version:content-transfer-encoding;
 bh=3VLZxU+xRSE8/94oG9jWQN8sqjtoBvB+9fxYbrzzHjc=;
 b=Up1E3F+Dkr8a7n7v0as7u/W2y/eCIDo8CcqbCgqY8OgGA90OthxybqZm
 kwZobtqBXymQrT6YoQ5X9gEtA6NDrYPRC+eXQ4xB4IL4s8wQoVy5PYJhc
 mOHEuXdV8LCY8dVs7e8hciuQtB56IJE2fHm+FgIn93i+V9oyHuUWSK+FA
 G7IdWLo9sOXIz44RIehtwuE4+mn+V00JZS3Epzjo4CQj+Hg6liw3DIadL
 McRVNNpTWGvZR1ji1O9VMe4Tgdn8izvm+SAyIVDoB+Us1IIPWu4CkkH5u
 a/r/PXcTpjFJZiJL8Tbl4aPbBi0w0tvRv/u1dVTVXeIALzfxoBcamtizM A==;
X-IronPort-AV: E=McAfee;i="6600,9927,10982"; a="2000499"
X-IronPort-AV: E=Sophos;i="6.06,157,1705392000"; 
   d="scan'208";a="2000499"
Received: from fmviesa007.fm.intel.com ([10.60.135.147])
 by orvoesa109.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 13 Feb 2024 08:57:41 -0800
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.06,157,1705392000"; 
   d="scan'208";a="2855754"
Received: from silpixa00400573.ir.intel.com (HELO
 silpixa00400573.ger.corp.intel.com) ([10.237.223.184])
 by fmviesa007.fm.intel.com with ESMTP; 13 Feb 2024 08:57:40 -0800
From: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
To: dev@dpdk.org
Subject: [PATCH 2/3] pipeline: optimize conversion between IPv4 and IPv6
 addresses
Date: Tue, 13 Feb 2024 16:57:36 +0000
Message-Id: <20240213165737.1534180-3-cristian.dumitrescu@intel.com>
X-Mailer: git-send-email 2.34.1
In-Reply-To: <20240213165737.1534180-1-cristian.dumitrescu@intel.com>
References: <20240213165737.1534180-1-cristian.dumitrescu@intel.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

Enhanced the move instruction to detect and optimize the conversion
between 128-bit numbers (IPv6 addresses) and 64-bit (upper or lower
part of IPv6 addresses) or 32-bit numbers (IPv4 addresses).

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 56 ++++++++++++++++++++++++
 lib/pipeline/rte_swx_pipeline_internal.h | 52 ++++++++++++++++++++++
 2 files changed, 108 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 12f335005d..147c1c2ad4 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3234,8 +3234,16 @@ instr_mov_translate(struct rte_swx_pipeline *p,
 			instr->type = INSTR_MOV_DMA;
 			if (fdst->n_bits == 128 && fsrc->n_bits == 128)
 				instr->type = INSTR_MOV_128;
+
+			if (fdst->n_bits == 128 && fsrc->n_bits == 64)
+				instr->type = INSTR_MOV_128_64;
+			if (fdst->n_bits == 64 && fsrc->n_bits == 128)
+				instr->type = INSTR_MOV_64_128;
+
 			if (fdst->n_bits == 128 && fsrc->n_bits == 32)
 				instr->type = INSTR_MOV_128_32;
+			if (fdst->n_bits == 32 && fsrc->n_bits == 128)
+				instr->type = INSTR_MOV_32_128;
 		}
 
 		instr->mov.dst.struct_id = (uint8_t)dst_struct_id;
@@ -3335,6 +3343,30 @@ instr_mov_128_exec(struct rte_swx_pipeline *p)
 	thread_ip_inc(p);
 }
 
+static inline void
+instr_mov_128_64_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_mov_128_64_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc(p);
+}
+
+static inline void
+instr_mov_64_128_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_mov_64_128_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc(p);
+}
+
 static inline void
 instr_mov_128_32_exec(struct rte_swx_pipeline *p)
 {
@@ -3347,6 +3379,18 @@ instr_mov_128_32_exec(struct rte_swx_pipeline *p)
 	thread_ip_inc(p);
 }
 
+static inline void
+instr_mov_32_128_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_mov_32_128_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc(p);
+}
+
 static inline void
 instr_mov_i_exec(struct rte_swx_pipeline *p)
 {
@@ -7523,7 +7567,10 @@ static instr_exec_t instruction_table[] = {
 	[INSTR_MOV_HH] = instr_mov_hh_exec,
 	[INSTR_MOV_DMA] = instr_mov_dma_exec,
 	[INSTR_MOV_128] = instr_mov_128_exec,
+	[INSTR_MOV_128_64] = instr_mov_128_64_exec,
+	[INSTR_MOV_64_128] = instr_mov_64_128_exec,
 	[INSTR_MOV_128_32] = instr_mov_128_32_exec,
+	[INSTR_MOV_32_128] = instr_mov_32_128_exec,
 	[INSTR_MOV_I] = instr_mov_i_exec,
 
 	[INSTR_MOVH] = instr_movh_exec,
@@ -11850,7 +11897,10 @@ instr_type_to_name(struct instruction *instr)
 	case INSTR_MOV_HH: return "INSTR_MOV_HH";
 	case INSTR_MOV_DMA: return "INSTR_MOV_DMA";
 	case INSTR_MOV_128: return "INSTR_MOV_128";
+	case INSTR_MOV_128_64: return "INSTR_MOV_128_64";
+	case INSTR_MOV_64_128: return "INSTR_MOV_64_128";
 	case INSTR_MOV_128_32: return "INSTR_MOV_128_32";
+	case INSTR_MOV_32_128: return "INSTR_MOV_32_128";
 	case INSTR_MOV_I: return "INSTR_MOV_I";
 
 	case INSTR_MOVH: return "INSTR_MOVH";
@@ -12921,7 +12971,10 @@ static instruction_export_t export_table[] = {
 	[INSTR_MOV_HH] = instr_mov_export,
 	[INSTR_MOV_DMA] = instr_mov_export,
 	[INSTR_MOV_128] = instr_mov_export,
+	[INSTR_MOV_128_64] = instr_mov_export,
+	[INSTR_MOV_64_128] = instr_mov_export,
 	[INSTR_MOV_128_32] = instr_mov_export,
+	[INSTR_MOV_32_128] = instr_mov_export,
 	[INSTR_MOV_I] = instr_mov_export,
 
 	[INSTR_MOVH] = instr_movh_export,
@@ -13152,7 +13205,10 @@ instr_type_to_func(struct instruction *instr)
 	case INSTR_MOV_HH: return "__instr_mov_hh_exec";
 	case INSTR_MOV_DMA: return "__instr_mov_dma_exec";
 	case INSTR_MOV_128: return "__instr_mov_128_exec";
+	case INSTR_MOV_128_64: return "__instr_mov_128_64_exec";
+	case INSTR_MOV_64_128: return "__instr_mov_64_128_exec";
 	case INSTR_MOV_128_32: return "__instr_mov_128_32_exec";
+	case INSTR_MOV_32_128: return "__instr_mov_32_128_exec";
 	case INSTR_MOV_I: return "__instr_mov_i_exec";
 
 	case INSTR_MOVH: return "__instr_movh_exec";
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 7ae7622329..f5a64e0fe1 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -344,7 +344,10 @@ enum instruction_type {
 	INSTR_MOV_HH,  /* dst = H, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
 	INSTR_MOV_DMA, /* dst and src in NBO format. */
 	INSTR_MOV_128, /* dst and src in NBO format, size(dst) = size(src) = 128 bits. */
+	INSTR_MOV_128_64, /* dst and src in NBO format, size(dst) = 128 bits, size(src) = 64 b. */
+	INSTR_MOV_64_128, /* dst and src in NBO format, size(dst) = 64 bits, size(src) = 128 b. */
 	INSTR_MOV_128_32, /* dst and src in NBO format, size(dst) = 128 bits, size(src) = 32 b. */
+	INSTR_MOV_32_128, /* dst and src in NBO format, size(dst) = 32 bits, size(src) = 128 b. */
 	INSTR_MOV_I,   /* dst = HMEF, src = I; size(dst) <= 64 bits. */
 
 	/* movh dst src
@@ -2682,6 +2685,39 @@ __instr_mov_128_exec(struct rte_swx_pipeline *p __rte_unused,
 	dst64_ptr[1] = src64_ptr[1];
 }
 
+static inline void
+__instr_mov_128_64_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
+	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
+
+	uint64_t *dst64 = (uint64_t *)dst;
+	uint64_t *src64 = (uint64_t *)src;
+
+	TRACE("[Thread %2u] mov (128 <- 64)\n", p->thread_id);
+
+	dst64[0] = 0;
+	dst64[1] = src64[0];
+}
+
+static inline void
+__instr_mov_64_128_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
+	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
+
+	uint64_t *dst64 = (uint64_t *)dst;
+	uint64_t *src64 = (uint64_t *)src;
+
+	TRACE("[Thread %2u] mov (64 <- 128)\n", p->thread_id);
+
+	dst64[0] = src64[1];
+}
+
 static inline void
 __instr_mov_128_32_exec(struct rte_swx_pipeline *p __rte_unused,
 			struct thread *t,
@@ -2701,6 +2737,22 @@ __instr_mov_128_32_exec(struct rte_swx_pipeline *p __rte_unused,
 	dst32[3] = src32[0];
 }
 
+static inline void
+__instr_mov_32_128_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
+	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
+
+	uint32_t *dst32 = (uint32_t *)dst;
+	uint32_t *src32 = (uint32_t *)src;
+
+	TRACE("[Thread %2u] mov (32 <- 128)\n", p->thread_id);
+
+	dst32[0] = src32[3];
+}
+
 static inline void
 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
 		   struct thread *t,
-- 
2.34.1