From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 86AE3A00C2; Thu, 6 Oct 2022 01:23:31 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id D04E142B9F; Thu, 6 Oct 2022 01:22:28 +0200 (CEST) Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by mails.dpdk.org (Postfix) with ESMTP id 8BDC342B8E for ; Thu, 6 Oct 2022 01:22:27 +0200 (CEST) Received: by linux.microsoft.com (Postfix, from userid 1004) id BE71B20C3334; Wed, 5 Oct 2022 16:22:26 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com BE71B20C3334 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linuxonhyperv.com; s=default; t=1665012146; bh=1SHH8wAYUme2RskSqFfN7U3MpUuiSURecPbg2PCBFQQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:Reply-To:From; b=i9IsKUtLTjp6Qe0Z1aLIsRh+09tk1pnH6WOOIur/AxrlPe8nRDgopY6uRTxIoZJI5 0tdPVzAWJ23VWMvF/zIzRY7ScRNcA9T3cHTBOnhtPDWkog8xnrwj6/ackpeXhlCj5n WCwn/xx2IjqhdLrpCafueIU2zpgDZsTnkHFjNic8= From: longli@linuxonhyperv.com To: Ferruh Yigit Cc: dev@dpdk.org, Ajay Sharma , Stephen Hemminger , Long Li Subject: [Patch v10 11/18] net/mana: implement the hardware layer operations Date: Wed, 5 Oct 2022 16:22:01 -0700 Message-Id: <1665012128-20520-12-git-send-email-longli@linuxonhyperv.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1665012128-20520-1-git-send-email-longli@linuxonhyperv.com> References: <1663987546-15982-1-git-send-email-longli@linuxonhyperv.com> <1665012128-20520-1-git-send-email-longli@linuxonhyperv.com> X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: longli@microsoft.com Errors-To: dev-bounces@dpdk.org From: Long Li The hardware layer of MANA understands the device queue and doorbell formats. Those functions are implemented for use by packet RX/TX code. Signed-off-by: Long Li --- Change log: v2: Remove unused header files. Rename a camel case. v5: Use RTE_BIT32() instead of defining a new BIT() v6: add rte_rmb() after reading owner bits v8: fix coding style to function definitions. use capital letters for all enum names v9: Add back RTE_BIT32() in v5 (rebase accident) Move data definitoins from earlier patch. v10: Use enum for DOORBELL_OFFSET_XXX drivers/net/mana/gdma.c | 303 +++++++++++++++++++++++++++++++++++ drivers/net/mana/mana.h | 191 ++++++++++++++++++++++ drivers/net/mana/meson.build | 1 + 3 files changed, 495 insertions(+) create mode 100644 drivers/net/mana/gdma.c diff --git a/drivers/net/mana/gdma.c b/drivers/net/mana/gdma.c new file mode 100644 index 0000000000..370324208a --- /dev/null +++ b/drivers/net/mana/gdma.c @@ -0,0 +1,303 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2022 Microsoft Corporation + */ + +#include +#include + +#include "mana.h" + +uint8_t * +gdma_get_wqe_pointer(struct mana_gdma_queue *queue) +{ + uint32_t offset_in_bytes = + (queue->head * GDMA_WQE_ALIGNMENT_UNIT_SIZE) & + (queue->size - 1); + + DRV_LOG(DEBUG, "txq sq_head %u sq_size %u offset_in_bytes %u", + queue->head, queue->size, offset_in_bytes); + + if (offset_in_bytes + GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue->size) + DRV_LOG(ERR, "fatal error: offset_in_bytes %u too big", + offset_in_bytes); + + return ((uint8_t *)queue->buffer) + offset_in_bytes; +} + +static uint32_t +write_dma_client_oob(uint8_t *work_queue_buffer_pointer, + const struct gdma_work_request *work_request, + uint32_t client_oob_size) +{ + uint8_t *p = work_queue_buffer_pointer; + + struct gdma_wqe_dma_oob *header = (struct gdma_wqe_dma_oob *)p; + + memset(header, 0, sizeof(struct gdma_wqe_dma_oob)); + header->num_sgl_entries = work_request->num_sgl_elements; + header->inline_client_oob_size_in_dwords = + client_oob_size / sizeof(uint32_t); + header->client_data_unit = work_request->client_data_unit; + + DRV_LOG(DEBUG, "queue buf %p sgl %u oob_h %u du %u oob_buf %p oob_b %u", + work_queue_buffer_pointer, header->num_sgl_entries, + header->inline_client_oob_size_in_dwords, + header->client_data_unit, work_request->inline_oob_data, + work_request->inline_oob_size_in_bytes); + + p += sizeof(struct gdma_wqe_dma_oob); + if (work_request->inline_oob_data && + work_request->inline_oob_size_in_bytes > 0) { + memcpy(p, work_request->inline_oob_data, + work_request->inline_oob_size_in_bytes); + if (client_oob_size > work_request->inline_oob_size_in_bytes) + memset(p + work_request->inline_oob_size_in_bytes, 0, + client_oob_size - + work_request->inline_oob_size_in_bytes); + } + + return sizeof(struct gdma_wqe_dma_oob) + client_oob_size; +} + +static uint32_t +write_scatter_gather_list(uint8_t *work_queue_head_pointer, + uint8_t *work_queue_end_pointer, + uint8_t *work_queue_cur_pointer, + struct gdma_work_request *work_request) +{ + struct gdma_sgl_element *sge_list; + struct gdma_sgl_element dummy_sgl[1]; + uint8_t *address; + uint32_t size; + uint32_t num_sge; + uint32_t size_to_queue_end; + uint32_t sge_list_size; + + DRV_LOG(DEBUG, "work_queue_cur_pointer %p work_request->flags %x", + work_queue_cur_pointer, work_request->flags); + + num_sge = work_request->num_sgl_elements; + sge_list = work_request->sgl; + size_to_queue_end = (uint32_t)(work_queue_end_pointer - + work_queue_cur_pointer); + + if (num_sge == 0) { + /* Per spec, the case of an empty SGL should be handled as + * follows to avoid corrupted WQE errors: + * Write one dummy SGL entry + * Set the address to 1, leave the rest as 0 + */ + dummy_sgl[num_sge].address = 1; + dummy_sgl[num_sge].size = 0; + dummy_sgl[num_sge].memory_key = 0; + num_sge++; + sge_list = dummy_sgl; + } + + sge_list_size = 0; + { + address = (uint8_t *)sge_list; + size = sizeof(struct gdma_sgl_element) * num_sge; + if (size_to_queue_end < size) { + memcpy(work_queue_cur_pointer, address, + size_to_queue_end); + work_queue_cur_pointer = work_queue_head_pointer; + address += size_to_queue_end; + size -= size_to_queue_end; + } + + memcpy(work_queue_cur_pointer, address, size); + sge_list_size = size; + } + + DRV_LOG(DEBUG, "sge %u address 0x%" PRIx64 " size %u key %u list_s %u", + num_sge, sge_list->address, sge_list->size, + sge_list->memory_key, sge_list_size); + + return sge_list_size; +} + +/* + * Post a work request to queue. + */ +int +gdma_post_work_request(struct mana_gdma_queue *queue, + struct gdma_work_request *work_req, + struct gdma_posted_wqe_info *wqe_info) +{ + uint32_t client_oob_size = + work_req->inline_oob_size_in_bytes > + INLINE_OOB_SMALL_SIZE_IN_BYTES ? + INLINE_OOB_LARGE_SIZE_IN_BYTES : + INLINE_OOB_SMALL_SIZE_IN_BYTES; + + uint32_t sgl_data_size = sizeof(struct gdma_sgl_element) * + RTE_MAX((uint32_t)1, work_req->num_sgl_elements); + uint32_t wqe_size = + RTE_ALIGN(sizeof(struct gdma_wqe_dma_oob) + + client_oob_size + sgl_data_size, + GDMA_WQE_ALIGNMENT_UNIT_SIZE); + uint8_t *wq_buffer_pointer; + uint32_t queue_free_units = queue->count - (queue->head - queue->tail); + + if (wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue_free_units) { + DRV_LOG(DEBUG, "WQE size %u queue count %u head %u tail %u", + wqe_size, queue->count, queue->head, queue->tail); + return -EBUSY; + } + + DRV_LOG(DEBUG, "client_oob_size %u sgl_data_size %u wqe_size %u", + client_oob_size, sgl_data_size, wqe_size); + + if (wqe_info) { + wqe_info->wqe_index = + ((queue->head * GDMA_WQE_ALIGNMENT_UNIT_SIZE) & + (queue->size - 1)) / GDMA_WQE_ALIGNMENT_UNIT_SIZE; + wqe_info->unmasked_queue_offset = queue->head; + wqe_info->wqe_size_in_bu = + wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE; + } + + wq_buffer_pointer = gdma_get_wqe_pointer(queue); + wq_buffer_pointer += write_dma_client_oob(wq_buffer_pointer, work_req, + client_oob_size); + if (wq_buffer_pointer >= ((uint8_t *)queue->buffer) + queue->size) + wq_buffer_pointer -= queue->size; + + write_scatter_gather_list((uint8_t *)queue->buffer, + (uint8_t *)queue->buffer + queue->size, + wq_buffer_pointer, work_req); + + queue->head += wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE; + + return 0; +} + +union gdma_doorbell_entry { + uint64_t as_uint64; + + struct { + uint64_t id : 24; + uint64_t reserved : 8; + uint64_t tail_ptr : 31; + uint64_t arm : 1; + } cq; + + struct { + uint64_t id : 24; + uint64_t wqe_cnt : 8; + uint64_t tail_ptr : 32; + } rq; + + struct { + uint64_t id : 24; + uint64_t reserved : 8; + uint64_t tail_ptr : 32; + } sq; + + struct { + uint64_t id : 16; + uint64_t reserved : 16; + uint64_t tail_ptr : 31; + uint64_t arm : 1; + } eq; +}; /* HW DATA */ + +enum { + DOORBELL_OFFSET_SQ = 0x0, + DOORBELL_OFFSET_RQ = 0x400, + DOORBELL_OFFSET_CQ = 0x800, + DOORBELL_OFFSET_EQ = 0xFF8, +}; + +/* + * Write to hardware doorbell to notify new activity. + */ +int +mana_ring_doorbell(void *db_page, enum gdma_queue_types queue_type, + uint32_t queue_id, uint32_t tail) +{ + uint8_t *addr = db_page; + union gdma_doorbell_entry e = {}; + + switch (queue_type) { + case GDMA_QUEUE_SEND: + e.sq.id = queue_id; + e.sq.tail_ptr = tail; + addr += DOORBELL_OFFSET_SQ; + break; + + case GDMA_QUEUE_RECEIVE: + e.rq.id = queue_id; + e.rq.tail_ptr = tail; + e.rq.wqe_cnt = 1; + addr += DOORBELL_OFFSET_RQ; + break; + + case GDMA_QUEUE_COMPLETION: + e.cq.id = queue_id; + e.cq.tail_ptr = tail; + e.cq.arm = 1; + addr += DOORBELL_OFFSET_CQ; + break; + + default: + DRV_LOG(ERR, "Unsupported queue type %d", queue_type); + return -1; + } + + /* Ensure all writes are done before ringing doorbell */ + rte_wmb(); + + DRV_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u", + db_page, addr, queue_id, queue_type, tail); + + rte_write64(e.as_uint64, addr); + return 0; +} + +/* + * Poll completion queue for completions. + */ +int +gdma_poll_completion_queue(struct mana_gdma_queue *cq, struct gdma_comp *comp) +{ + struct gdma_hardware_completion_entry *cqe; + uint32_t head = cq->head % cq->count; + uint32_t new_owner_bits, old_owner_bits; + uint32_t cqe_owner_bits; + struct gdma_hardware_completion_entry *buffer = cq->buffer; + + cqe = &buffer[head]; + new_owner_bits = (cq->head / cq->count) & COMPLETION_QUEUE_OWNER_MASK; + old_owner_bits = (cq->head / cq->count - 1) & + COMPLETION_QUEUE_OWNER_MASK; + cqe_owner_bits = cqe->owner_bits; + + DRV_LOG(DEBUG, "comp cqe bits 0x%x owner bits 0x%x", + cqe_owner_bits, old_owner_bits); + + if (cqe_owner_bits == old_owner_bits) + return 0; /* No new entry */ + + if (cqe_owner_bits != new_owner_bits) { + DRV_LOG(ERR, "CQ overflowed, ID %u cqe 0x%x new 0x%x", + cq->id, cqe_owner_bits, new_owner_bits); + return -1; + } + + /* Ensure checking owner bits happens before reading from CQE */ + rte_rmb(); + + comp->work_queue_number = cqe->wq_num; + comp->send_work_queue = cqe->is_sq; + + memcpy(comp->completion_data, cqe->dma_client_data, GDMA_COMP_DATA_SIZE); + + cq->head++; + + DRV_LOG(DEBUG, "comp new 0x%x old 0x%x cqe 0x%x wq %u sq %u head %u", + new_owner_bits, old_owner_bits, cqe_owner_bits, + comp->work_queue_number, comp->send_work_queue, cq->head); + return 1; +} diff --git a/drivers/net/mana/mana.h b/drivers/net/mana/mana.h index 1ef9897d12..09e2fc3e61 100644 --- a/drivers/net/mana/mana.h +++ b/drivers/net/mana/mana.h @@ -44,6 +44,177 @@ struct mana_shared_data { #define MAX_RECEIVE_BUFFERS_PER_QUEUE 256 #define MAX_SEND_BUFFERS_PER_QUEUE 256 +#define GDMA_WQE_ALIGNMENT_UNIT_SIZE 32 + +#define COMP_ENTRY_SIZE 64 +#define MAX_TX_WQE_SIZE 512 +#define MAX_RX_WQE_SIZE 256 + +/* Values from the GDMA specification document, WQE format description */ +#define INLINE_OOB_SMALL_SIZE_IN_BYTES 8 +#define INLINE_OOB_LARGE_SIZE_IN_BYTES 24 + +#define NOT_USING_CLIENT_DATA_UNIT 0 + +enum gdma_queue_types { + GDMA_QUEUE_TYPE_INVALID = 0, + GDMA_QUEUE_SEND, + GDMA_QUEUE_RECEIVE, + GDMA_QUEUE_COMPLETION, + GDMA_QUEUE_EVENT, + GDMA_QUEUE_TYPE_MAX = 16, + /*Room for expansion */ + + /* This enum can be expanded to add more queue types but + * it's expected to be done in a contiguous manner. + * Failing that will result in unexpected behavior. + */ +}; + +#define WORK_QUEUE_NUMBER_BASE_BITS 10 + +struct gdma_header { + /* size of the entire gdma structure, including the entire length of + * the struct that is formed by extending other gdma struct. i.e. + * GDMA_BASE_SPEC extends gdma_header, GDMA_EVENT_QUEUE_SPEC extends + * GDMA_BASE_SPEC, StructSize for GDMA_EVENT_QUEUE_SPEC will be size of + * GDMA_EVENT_QUEUE_SPEC which includes size of GDMA_BASE_SPEC and size + * of gdma_header. + * Above example is for illustration purpose and is not in code + */ + size_t struct_size; +}; + +/* The following macros are from GDMA SPEC 3.6, "Table 2: CQE data structure" + * and "Table 4: Event Queue Entry (EQE) data format" + */ +#define GDMA_COMP_DATA_SIZE 0x3C /* Must be a multiple of 4 */ +#define GDMA_COMP_DATA_SIZE_IN_UINT32 (GDMA_COMP_DATA_SIZE / 4) + +#define COMPLETION_QUEUE_ENTRY_WORK_QUEUE_INDEX 0 +#define COMPLETION_QUEUE_ENTRY_WORK_QUEUE_SIZE 24 +#define COMPLETION_QUEUE_ENTRY_SEND_WORK_QUEUE_INDEX 24 +#define COMPLETION_QUEUE_ENTRY_SEND_WORK_QUEUE_SIZE 1 +#define COMPLETION_QUEUE_ENTRY_OWNER_BITS_INDEX 29 +#define COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE 3 + +#define COMPLETION_QUEUE_OWNER_MASK \ + ((1 << (COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE)) - 1) + +struct gdma_comp { + struct gdma_header gdma_header; + + /* Filled by GDMA core */ + uint32_t completion_data[GDMA_COMP_DATA_SIZE_IN_UINT32]; + + /* Filled by GDMA core */ + uint32_t work_queue_number; + + /* Filled by GDMA core */ + bool send_work_queue; +}; + +struct gdma_hardware_completion_entry { + char dma_client_data[GDMA_COMP_DATA_SIZE]; + union { + uint32_t work_queue_owner_bits; + struct { + uint32_t wq_num : 24; + uint32_t is_sq : 1; + uint32_t reserved : 4; + uint32_t owner_bits : 3; + }; + }; +}; /* HW DATA */ + +struct gdma_posted_wqe_info { + struct gdma_header gdma_header; + + /* size of the written wqe in basic units (32B), filled by GDMA core. + * Use this value to progress the work queue after the wqe is processed + * by hardware. + */ + uint32_t wqe_size_in_bu; + + /* At the time of writing the wqe to the work queue, the offset in the + * work queue buffer where by the wqe will be written. Each unit + * represents 32B of buffer space. + */ + uint32_t wqe_index; + + /* Unmasked offset in the queue to which the WQE was written. + * In 32 byte units. + */ + uint32_t unmasked_queue_offset; +}; + +struct gdma_sgl_element { + uint64_t address; + uint32_t memory_key; + uint32_t size; +}; + +#define MAX_SGL_ENTRIES_FOR_TRANSMIT 30 + +struct one_sgl { + struct gdma_sgl_element gdma_sgl[MAX_SGL_ENTRIES_FOR_TRANSMIT]; +}; + +struct gdma_work_request { + struct gdma_header gdma_header; + struct gdma_sgl_element *sgl; + uint32_t num_sgl_elements; + uint32_t inline_oob_size_in_bytes; + void *inline_oob_data; + uint32_t flags; /* From _gdma_work_request_FLAGS */ + uint32_t client_data_unit; /* For LSO, this is the MTU of the data */ +}; + +enum mana_cqe_type { + CQE_INVALID = 0, +}; + +struct mana_cqe_header { + uint32_t cqe_type : 6; + uint32_t client_type : 2; + uint32_t vendor_err : 24; +}; /* HW DATA */ + +/* NDIS HASH Types */ +#define NDIS_HASH_IPV4 RTE_BIT32(0) +#define NDIS_HASH_TCP_IPV4 RTE_BIT32(1) +#define NDIS_HASH_UDP_IPV4 RTE_BIT32(2) +#define NDIS_HASH_IPV6 RTE_BIT32(3) +#define NDIS_HASH_TCP_IPV6 RTE_BIT32(4) +#define NDIS_HASH_UDP_IPV6 RTE_BIT32(5) +#define NDIS_HASH_IPV6_EX RTE_BIT32(6) +#define NDIS_HASH_TCP_IPV6_EX RTE_BIT32(7) +#define NDIS_HASH_UDP_IPV6_EX RTE_BIT32(8) + +#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX) +#define MANA_HASH_L4 \ + (NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \ + NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX) + +struct gdma_wqe_dma_oob { + uint32_t reserved:24; + uint32_t last_v_bytes:8; + union { + uint32_t flags; + struct { + uint32_t num_sgl_entries:8; + uint32_t inline_client_oob_size_in_dwords:3; + uint32_t client_oob_in_sgl:1; + uint32_t consume_credit:1; + uint32_t fence:1; + uint32_t reserved1:2; + uint32_t client_data_unit:14; + uint32_t check_sn:1; + uint32_t sgl_direct:1; + }; + }; +}; + struct mana_mr_cache { uint32_t lkey; uintptr_t addr; @@ -103,6 +274,15 @@ struct mana_rxq_desc { uint32_t wqe_size_in_bu; }; +struct mana_gdma_queue { + void *buffer; + uint32_t count; /* in entries */ + uint32_t size; /* in bytes */ + uint32_t id; + uint32_t head; + uint32_t tail; +}; + #define MANA_MR_BTREE_PER_QUEUE_N 64 struct mana_txq { @@ -152,12 +332,23 @@ extern int mana_logtype_init; #define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") +int mana_ring_doorbell(void *db_page, enum gdma_queue_types queue_type, + uint32_t queue_id, uint32_t tail); + +int gdma_post_work_request(struct mana_gdma_queue *queue, + struct gdma_work_request *work_req, + struct gdma_posted_wqe_info *wqe_info); +uint8_t *gdma_get_wqe_pointer(struct mana_gdma_queue *queue); + uint16_t mana_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); uint16_t mana_tx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); +int gdma_poll_completion_queue(struct mana_gdma_queue *cq, + struct gdma_comp *comp); + struct mana_mr_cache *mana_find_pmd_mr(struct mana_mr_btree *local_tree, struct mana_priv *priv, struct rte_mbuf *mbuf); diff --git a/drivers/net/mana/meson.build b/drivers/net/mana/meson.build index c4a19ad745..dea8b97afb 100644 --- a/drivers/net/mana/meson.build +++ b/drivers/net/mana/meson.build @@ -10,6 +10,7 @@ endif deps += ['pci', 'bus_pci', 'net', 'eal', 'kvargs'] sources += files( + 'gdma.c', 'mana.c', 'mp.c', 'mr.c', -- 2.17.1