From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from dpdk.org (dpdk.org [92.243.14.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 80F39A04B1;
	Wed, 30 Sep 2020 08:44:22 +0200 (CEST)
Received: from [92.243.14.124] (localhost [127.0.0.1])
	by dpdk.org (Postfix) with ESMTP id 774481DB66;
	Wed, 30 Sep 2020 08:35:54 +0200 (CEST)
Received: from mga09.intel.com (mga09.intel.com [134.134.136.24])
 by dpdk.org (Postfix) with ESMTP id 9AB161DAEB
 for <dev@dpdk.org>; Wed, 30 Sep 2020 08:34:56 +0200 (CEST)
IronPort-SDR: tbbS7hUWLbzqXJ7C88Y/+CwshuOX2ZOa+AEut+h/mP97OTZB/mdVJvEoxnLSvex4mJOPbCnH/o
 MgyLQaAzwUIQ==
X-IronPort-AV: E=McAfee;i="6000,8403,9759"; a="163238771"
X-IronPort-AV: E=Sophos;i="5.77,321,1596524400"; d="scan'208";a="163238771"
X-Amp-Result: SKIPPED(no attachment in message)
X-Amp-File-Uploaded: False
Received: from orsmga005.jf.intel.com ([10.7.209.41])
 by orsmga102.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 29 Sep 2020 23:34:55 -0700
IronPort-SDR: Y3N3FI6buV2eJPmmxfzhx3rfaGJbz3xlqkvo3AfdT2RiF8DRWn9DYqccgmbyGRxNuwErxse6Uj
 CEaWGZr96JOw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.77,321,1596524400"; d="scan'208";a="495883422"
Received: from silpixa00400573.ir.intel.com (HELO
 silpixa00400573.ger.corp.intel.com) ([10.237.223.107])
 by orsmga005.jf.intel.com with ESMTP; 29 Sep 2020 23:34:55 -0700
From: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
To: dev@dpdk.org
Cc: thomas@monjalon.net,
	david.marchand@redhat.com
Date: Wed, 30 Sep 2020 07:34:02 +0100
Message-Id: <20200930063416.68428-29-cristian.dumitrescu@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20200930063416.68428-1-cristian.dumitrescu@intel.com>
References: <20200923180645.55852-2-cristian.dumitrescu@intel.com>
 <20200930063416.68428-1-cristian.dumitrescu@intel.com>
Subject: [dpdk-dev] [PATCH v6 28/42] pipeline: add SWX instruction optimizer
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org
Sender: "dev" <dev-bounces@dpdk.org>

Instruction optimizer. Detects frequent patterns and replaces them
with some more powerful vector-like pipeline instructions without any
user effort. Executes at instruction translation, not at run-time.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/librte_pipeline/rte_swx_pipeline.c | 226 +++++++++++++++++++++++++
 1 file changed, 226 insertions(+)

diff --git a/lib/librte_pipeline/rte_swx_pipeline.c b/lib/librte_pipeline/rte_swx_pipeline.c
index d51fec821..77eae1927 100644
--- a/lib/librte_pipeline/rte_swx_pipeline.c
+++ b/lib/librte_pipeline/rte_swx_pipeline.c
@@ -5700,6 +5700,230 @@ instr_verify(struct rte_swx_pipeline *p __rte_unused,
 	return 0;
 }
 
+static int
+instr_pattern_extract_many_detect(struct instruction *instr,
+				  struct instruction_data *data,
+				  uint32_t n_instr,
+				  uint32_t *n_pattern_instr)
+{
+	uint32_t i;
+
+	for (i = 0; i < n_instr; i++) {
+		if (data[i].invalid)
+			break;
+
+		if (instr[i].type != INSTR_HDR_EXTRACT)
+			break;
+
+		if (i == RTE_DIM(instr->io.hdr.header_id))
+			break;
+
+		if (i && data[i].n_users)
+			break;
+	}
+
+	if (i < 2)
+		return 0;
+
+	*n_pattern_instr = i;
+	return 1;
+}
+
+static void
+instr_pattern_extract_many_optimize(struct instruction *instr,
+				    struct instruction_data *data,
+				    uint32_t n_instr)
+{
+	uint32_t i;
+
+	for (i = 1; i < n_instr; i++) {
+		instr[0].type++;
+		instr[0].io.hdr.header_id[i] = instr[i].io.hdr.header_id[0];
+		instr[0].io.hdr.struct_id[i] = instr[i].io.hdr.struct_id[0];
+		instr[0].io.hdr.n_bytes[i] = instr[i].io.hdr.n_bytes[0];
+
+		data[i].invalid = 1;
+	}
+}
+
+static int
+instr_pattern_emit_many_tx_detect(struct instruction *instr,
+				  struct instruction_data *data,
+				  uint32_t n_instr,
+				  uint32_t *n_pattern_instr)
+{
+	uint32_t i;
+
+	for (i = 0; i < n_instr; i++) {
+		if (data[i].invalid)
+			break;
+
+		if (instr[i].type != INSTR_HDR_EMIT)
+			break;
+
+		if (i == RTE_DIM(instr->io.hdr.header_id))
+			break;
+
+		if (i && data[i].n_users)
+			break;
+	}
+
+	if (!i)
+		return 0;
+
+	if (instr[i].type != INSTR_TX)
+		return 0;
+
+	i++;
+
+	*n_pattern_instr = i;
+	return 1;
+}
+
+static void
+instr_pattern_emit_many_tx_optimize(struct instruction *instr,
+				    struct instruction_data *data,
+				    uint32_t n_instr)
+{
+	uint32_t i;
+
+	/* Any emit instruction in addition to the first one. */
+	for (i = 1; i < n_instr - 1; i++) {
+		instr[0].type++;
+		instr[0].io.hdr.header_id[i] = instr[i].io.hdr.header_id[0];
+		instr[0].io.hdr.struct_id[i] = instr[i].io.hdr.struct_id[0];
+		instr[0].io.hdr.n_bytes[i] = instr[i].io.hdr.n_bytes[0];
+
+		data[i].invalid = 1;
+	}
+
+	/* The TX instruction is the last one in the pattern. */
+	instr[0].type++;
+	instr[0].io.io.offset = instr[i].io.io.offset;
+	instr[0].io.io.n_bits = instr[i].io.io.n_bits;
+	data[i].invalid = 1;
+}
+
+static int
+instr_pattern_dma_many_detect(struct instruction *instr,
+			      struct instruction_data *data,
+			      uint32_t n_instr,
+			      uint32_t *n_pattern_instr)
+{
+	uint32_t i;
+
+	for (i = 0; i < n_instr; i++) {
+		if (data[i].invalid)
+			break;
+
+		if (instr[i].type != INSTR_DMA_HT)
+			break;
+
+		if (i == RTE_DIM(instr->dma.dst.header_id))
+			break;
+
+		if (i && data[i].n_users)
+			break;
+	}
+
+	if (i < 2)
+		return 0;
+
+	*n_pattern_instr = i;
+	return 1;
+}
+
+static void
+instr_pattern_dma_many_optimize(struct instruction *instr,
+				struct instruction_data *data,
+				uint32_t n_instr)
+{
+	uint32_t i;
+
+	for (i = 1; i < n_instr; i++) {
+		instr[0].type++;
+		instr[0].dma.dst.header_id[i] = instr[i].dma.dst.header_id[0];
+		instr[0].dma.dst.struct_id[i] = instr[i].dma.dst.struct_id[0];
+		instr[0].dma.src.offset[i] = instr[i].dma.src.offset[0];
+		instr[0].dma.n_bytes[i] = instr[i].dma.n_bytes[0];
+
+		data[i].invalid = 1;
+	}
+}
+
+static uint32_t
+instr_optimize(struct instruction *instructions,
+	       struct instruction_data *instruction_data,
+	       uint32_t n_instructions)
+{
+	uint32_t i, pos = 0;
+
+	for (i = 0; i < n_instructions; ) {
+		struct instruction *instr = &instructions[i];
+		struct instruction_data *data = &instruction_data[i];
+		uint32_t n_instr = 0;
+		int detected;
+
+		/* Extract many. */
+		detected = instr_pattern_extract_many_detect(instr,
+							     data,
+							     n_instructions - i,
+							     &n_instr);
+		if (detected) {
+			instr_pattern_extract_many_optimize(instr,
+							    data,
+							    n_instr);
+			i += n_instr;
+			continue;
+		}
+
+		/* Emit many + TX. */
+		detected = instr_pattern_emit_many_tx_detect(instr,
+							     data,
+							     n_instructions - i,
+							     &n_instr);
+		if (detected) {
+			instr_pattern_emit_many_tx_optimize(instr,
+							    data,
+							    n_instr);
+			i += n_instr;
+			continue;
+		}
+
+		/* DMA many. */
+		detected = instr_pattern_dma_many_detect(instr,
+							 data,
+							 n_instructions - i,
+							 &n_instr);
+		if (detected) {
+			instr_pattern_dma_many_optimize(instr, data, n_instr);
+			i += n_instr;
+			continue;
+		}
+
+		/* No pattern starting at the current instruction. */
+		i++;
+	}
+
+	/* Eliminate the invalid instructions that have been optimized out. */
+	for (i = 0; i < n_instructions; i++) {
+		struct instruction *instr = &instructions[i];
+		struct instruction_data *data = &instruction_data[i];
+
+		if (data->invalid)
+			continue;
+
+		if (i != pos) {
+			memcpy(&instructions[pos], instr, sizeof(*instr));
+			memcpy(&instruction_data[pos], data, sizeof(*data));
+		}
+
+		pos++;
+	}
+
+	return pos;
+}
+
 static int
 instruction_config(struct rte_swx_pipeline *p,
 		   struct action *a,
@@ -5752,6 +5976,8 @@ instruction_config(struct rte_swx_pipeline *p,
 	if (err)
 		goto error;
 
+	n_instructions = instr_optimize(instr, data, n_instructions);
+
 	err = instr_jmp_resolve(instr, data, n_instructions);
 	if (err)
 		goto error;
-- 
2.17.1