From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id B02C5A04B1; Tue, 25 Aug 2020 17:28:01 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 22E924C81; Tue, 25 Aug 2020 17:28:01 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id 43FCEFEB for ; Tue, 25 Aug 2020 17:27:59 +0200 (CEST) IronPort-SDR: kKlX6Y6RE0XcMT3+Jbee7XqDp4bK53OOocK+i/YGr9Ev25KPLhx3qhI1B/muFC5uICME3vfSqM asEv/2pzhG9g== X-IronPort-AV: E=McAfee;i="6000,8403,9723"; a="240951223" X-IronPort-AV: E=Sophos;i="5.76,353,1592895600"; d="scan'208";a="240951223" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga105.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 25 Aug 2020 08:27:58 -0700 IronPort-SDR: PVU/9dKiwMzWIZ4B2JzUNfIvwB/oytmyVMAiUKqGaAFwpkHt/L8TMlJhx10bXnC4SGrRk1ALcS GR4pUiFJbNRg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.76,353,1592895600"; d="scan'208";a="322797040" Received: from klaatz-mobl1.ger.corp.intel.com (HELO [10.213.235.49]) ([10.213.235.49]) by fmsmga004.fm.intel.com with ESMTP; 25 Aug 2020 08:27:57 -0700 To: Bruce Richardson , dev@dpdk.org Cc: cheng1.jiang@intel.com, patrick.fu@intel.com, ping.yu@intel.com References: <20200721095140.719297-1-bruce.richardson@intel.com> <20200821162944.29840-1-bruce.richardson@intel.com> <20200821162944.29840-14-bruce.richardson@intel.com> From: "Laatz, Kevin" Message-ID: Date: Tue, 25 Aug 2020 16:27:56 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.11.0 MIME-Version: 1.0 In-Reply-To: <20200821162944.29840-14-bruce.richardson@intel.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-Language: en-US Subject: Re: [dpdk-dev] [PATCH v2 13/18] raw/ioat: add data path for idxd devices X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" On 21/08/2020 17:29, Bruce Richardson wrote: > Add support for doing copies using DSA hardware. This is implemented by > just switching on the device type field at the start of the inline > functions. Since there is no hardware which will have both device types > present this branch will always be predictable after the first call, > meaning it has little to no perf penalty. > > Signed-off-by: Bruce Richardson > --- > drivers/raw/ioat/ioat_common.c | 1 + > drivers/raw/ioat/ioat_rawdev.c | 1 + > drivers/raw/ioat/rte_ioat_rawdev_fns.h | 164 +++++++++++++++++++++++-- > 3 files changed, 157 insertions(+), 9 deletions(-) > > diff --git a/drivers/raw/ioat/rte_ioat_rawdev_fns.h b/drivers/raw/ioat/rte_ioat_rawdev_fns.h > index 1939437d50..19aaaa50c8 100644 > --- a/drivers/raw/ioat/rte_ioat_rawdev_fns.h > +++ b/drivers/raw/ioat/rte_ioat_rawdev_fns.h > @@ -194,8 +194,8 @@ struct rte_idxd_rawdev { > /** > * Enqueue a copy operation onto the ioat device > */ > -static inline int > -rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst, > +static __rte_always_inline int > +__ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst, > unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl, > int fence) > { > @@ -233,8 +233,8 @@ rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst, > /** > * Trigger hardware to begin performing enqueued copy operations > */ > -static inline void > -rte_ioat_do_copies(int dev_id) > +static __rte_always_inline void > +__ioat_perform_ops(int dev_id) > { > struct rte_ioat_rawdev *ioat = rte_rawdevs[dev_id].dev_private; > ioat->desc_ring[(ioat->next_write - 1) & (ioat->ring_size - 1)].u > @@ -248,8 +248,8 @@ rte_ioat_do_copies(int dev_id) > * @internal > * Returns the index of the last completed operation. > */ > -static inline int > -rte_ioat_get_last_completed(struct rte_ioat_rawdev *ioat, int *error) > +static __rte_always_inline int > +__ioat_get_last_completed(struct rte_ioat_rawdev *ioat, int *error) > { > uint64_t status = ioat->status; > > @@ -263,8 +263,8 @@ rte_ioat_get_last_completed(struct rte_ioat_rawdev *ioat, int *error) > /** > * Returns details of copy operations that have been completed > */ > -static inline int > -rte_ioat_completed_copies(int dev_id, uint8_t max_copies, > +static __rte_always_inline int > +__ioat_completed_ops(int dev_id, uint8_t max_copies, > uintptr_t *src_hdls, uintptr_t *dst_hdls) > { > struct rte_ioat_rawdev *ioat = rte_rawdevs[dev_id].dev_private; > @@ -274,7 +274,7 @@ rte_ioat_completed_copies(int dev_id, uint8_t max_copies, > int error; > int i = 0; > > - end_read = (rte_ioat_get_last_completed(ioat, &error) + 1) & mask; > + end_read = (__ioat_get_last_completed(ioat, &error) + 1) & mask; > count = (end_read - (read & mask)) & mask; > > if (error) { > @@ -311,4 +311,150 @@ rte_ioat_completed_copies(int dev_id, uint8_t max_copies, > return count; > } > > +static __rte_always_inline int > +__idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst, > + unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl, > + int fence __rte_unused) > +{ > + struct rte_idxd_rawdev *idxd = rte_rawdevs[dev_id].dev_private; For C++ compatibility, "dev_private" needs to be type cast to "struct rte_idxd_rawdev *" here. > + struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; > + uint32_t op_flags = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | > + IDXD_FLAG_CACHE_CONTROL; > + > + /* check for room in the handle ring */ > + if (((idxd->next_free_hdl + 1) & (idxd->hdl_ring_sz - 1)) == idxd->next_ret_hdl) { > + rte_errno = ENOSPC; > + return 0; > + } > + if (b->op_count >= BATCH_SIZE) { > + rte_errno = ENOSPC; > + return 0; > + } > + /* check that we can actually use the current batch */ > + if (b->submitted) { > + rte_errno = ENOSPC; > + return 0; > + } > + > + /* write the descriptor */ > + b->ops[b->op_count++] = (struct rte_idxd_hw_desc){ > + .op_flags = op_flags, > + .src = src, > + .dst = dst, > + .size = length > + }; > + > + /* store the completion details */ > + if (!idxd->hdls_disable) > + idxd->hdl_ring[idxd->next_free_hdl] = (struct rte_idxd_user_hdl) { > + .src = src_hdl, > + .dst = dst_hdl > + }; > + if (++idxd->next_free_hdl == idxd->hdl_ring_sz) > + idxd->next_free_hdl = 0; > + > + return 1; > +} > + > +static __rte_always_inline void > +__idxd_movdir64b(volatile void *dst, const void *src) > +{ > + asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" > + : > + : "a" (dst), "d" (src)); > +} > + > +static __rte_always_inline void > +__idxd_perform_ops(int dev_id) > +{ > + struct rte_idxd_rawdev *idxd = rte_rawdevs[dev_id].dev_private; Type cast needed here and more below. Thanks, Kevin > + struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; > + > + if (b->submitted || b->op_count == 0) > + return; > + b->hdl_end = idxd->next_free_hdl; > + b->comp.status = 0; > + b->submitted = 1; > + b->batch_desc.size = b->op_count + 1; > + __idxd_movdir64b(idxd->portal, &b->batch_desc); > + > + if (++idxd->next_batch == idxd->batch_ring_sz) > + idxd->next_batch = 0; > +} > + > +static __rte_always_inline int > +__idxd_completed_ops(int dev_id, uint8_t max_ops, > + uintptr_t *src_hdls, uintptr_t *dst_hdls) > +{ > + struct rte_idxd_rawdev *idxd = rte_rawdevs[dev_id].dev_private; > + struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_completed]; > + uint16_t h_idx = idxd->next_ret_hdl; > + int n = 0; > + > + while (b->submitted && b->comp.status != 0) { > + idxd->last_completed_hdl = b->hdl_end; > + b->submitted = 0; > + b->op_count = 0; > + if (++idxd->next_completed == idxd->batch_ring_sz) > + idxd->next_completed = 0; > + b = &idxd->batch_ring[idxd->next_completed]; > + } > + > + if (!idxd->hdls_disable) > + for (n = 0; n < max_ops && h_idx != idxd->last_completed_hdl; n++) { > + src_hdls[n] = idxd->hdl_ring[h_idx].src; > + dst_hdls[n] = idxd->hdl_ring[h_idx].dst; > + if (++h_idx == idxd->hdl_ring_sz) > + h_idx = 0; > + } > + else > + while (h_idx != idxd->last_completed_hdl) { > + n++; > + if (++h_idx == idxd->hdl_ring_sz) > + h_idx = 0; > + } > + > + idxd->next_ret_hdl = h_idx; > + > + return n; > +} > + > +static inline int > +rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst, > + unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl, > + int fence) > +{ > + enum rte_ioat_dev_type *type = rte_rawdevs[dev_id].dev_private; > + if (*type == RTE_IDXD_DEV) > + return __idxd_enqueue_copy(dev_id, src, dst, length, > + src_hdl, dst_hdl, fence); > + else > + return __ioat_enqueue_copy(dev_id, src, dst, length, > + src_hdl, dst_hdl, fence); > +} > + > +static inline void > +rte_ioat_do_copies(int dev_id) > +{ > + enum rte_ioat_dev_type *type = rte_rawdevs[dev_id].dev_private; > + if (*type == RTE_IDXD_DEV) > + return __idxd_perform_ops(dev_id); > + else > + return __ioat_perform_ops(dev_id); > +} > + > +static inline int > +rte_ioat_completed_copies(int dev_id, uint8_t max_copies, > + uintptr_t *src_hdls, uintptr_t *dst_hdls) > +{ > + enum rte_ioat_dev_type *type = rte_rawdevs[dev_id].dev_private; > + if (*type == RTE_IDXD_DEV) > + return __idxd_completed_ops(dev_id, max_copies, > + src_hdls, dst_hdls); > + else > + return __ioat_completed_ops(dev_id, max_copies, > + src_hdls, dst_hdls); > +} > + > + > #endif /* _RTE_IOAT_RAWDEV_FNS_H_ */