From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id B1AF11B448 for ; Thu, 27 Sep 2018 16:20:24 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from dekelp@mellanox.com) with ESMTPS (AES256-SHA encrypted); 27 Sep 2018 16:24:52 +0200 Received: from mtl-vdi-280.wap.labs.mlnx. (mtl-vdi-280.wap.labs.mlnx [10.128.130.87]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id w8REKIj8000460; Thu, 27 Sep 2018 17:20:18 +0300 From: Dekel Peled To: dev@dpdk.org, shahafs@mellanox.com, yskoh@mellanox.com Cc: orika@mellanox.com Date: Thu, 27 Sep 2018 17:18:55 +0300 Message-Id: <1538057935-34663-1-git-send-email-dekelp@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1537105328-9367-1-git-send-email-dekelp@mellanox.com> References: <1537105328-9367-1-git-send-email-dekelp@mellanox.com> Subject: [dpdk-dev] [PATCH v2] net/mlx5: support metadata as flow rule criteria X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 27 Sep 2018 14:20:25 -0000 As described in series starting at [1], it adds option to set metadata value as match pattern when creating a new flow rule. This patch adds metadata support in mlx5 driver, in two parts: - Add the setting of metadata value in matcher when creating a new flow rule. - Add the passing of metadata value from mbuf to wqe when indicated by ol_flag, in different burst functions. [1] "ethdev: support metadata as flow rule criteria" http://mails.dpdk.org/archives/dev/2018-September/113270.html Signed-off-by: Dekel Peled --- V2: Split the support of egress rules to a different patch. --- drivers/net/mlx5/mlx5_flow.c | 2 +- drivers/net/mlx5/mlx5_flow.h | 8 +++ drivers/net/mlx5/mlx5_flow_dv.c | 96 +++++++++++++++++++++++++++++++++++ drivers/net/mlx5/mlx5_prm.h | 2 +- drivers/net/mlx5/mlx5_rxtx.c | 35 +++++++++++-- drivers/net/mlx5/mlx5_rxtx_vec.c | 31 ++++++++--- drivers/net/mlx5/mlx5_rxtx_vec.h | 1 + drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 4 +- drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 4 +- drivers/net/mlx5/mlx5_txq.c | 6 +++ 10 files changed, 174 insertions(+), 15 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 8007bf1..9581691 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -417,7 +417,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority, * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ -static int +int mlx5_flow_item_acceptable(const struct rte_flow_item *item, const uint8_t *mask, const uint8_t *nic_mask, diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 10d700a..d91ae17 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -42,6 +42,9 @@ #define MLX5_FLOW_LAYER_GRE (1u << 14) #define MLX5_FLOW_LAYER_MPLS (1u << 15) +/* General pattern items bits. */ +#define MLX5_FLOW_ITEM_METADATA (1u << 16) + /* Outer Masks. */ #define MLX5_FLOW_LAYER_OUTER_L3 \ (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6) @@ -299,6 +302,11 @@ int mlx5_flow_validate_action_rss(const struct rte_flow_action *action, int mlx5_flow_validate_attributes(struct rte_eth_dev *dev, const struct rte_flow_attr *attributes, struct rte_flow_error *error); +int mlx5_flow_item_acceptable(const struct rte_flow_item *item, + const uint8_t *mask, + const uint8_t *nic_mask, + unsigned int size, + struct rte_flow_error *error); int mlx5_flow_validate_item_eth(const struct rte_flow_item *item, uint64_t item_flags, struct rte_flow_error *error); diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index cf663cd..2439f5e 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -36,6 +36,55 @@ #ifdef HAVE_IBV_FLOW_DV_SUPPORT /** + * Validate META item. + * + * @param[in] item + * Item specification. + * @param[in] attr + * Attributes of flow that includes this item. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_flow_validate_item_meta(const struct rte_flow_item *item, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + const struct rte_flow_item_meta *spec = item->spec; + const struct rte_flow_item_meta *mask = item->mask; + + const struct rte_flow_item_meta nic_mask = { + .data = RTE_BE32(UINT32_MAX) + }; + + int ret; + + if (!spec) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "data cannot be empty"); + if (!mask) + mask = &rte_flow_item_meta_mask; + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, + (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_meta), + error); + if (ret < 0) + return ret; + + if (attr->ingress) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + NULL, + "pattern not supported for ingress"); + return 0; +} + +/** * Verify the @p attributes will be correctly understood by the NIC and store * them in the @p flow if everything is correct. * @@ -216,6 +265,12 @@ return ret; item_flags |= MLX5_FLOW_LAYER_MPLS; break; + case RTE_FLOW_ITEM_TYPE_META: + ret = mlx5_flow_validate_item_meta(items, attr, error); + if (ret < 0) + return ret; + item_flags |= MLX5_FLOW_ITEM_METADATA; + break; default: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, @@ -853,6 +908,43 @@ } /** + * Add META item to matcher + * + * @param[in, out] matcher + * Flow matcher. + * @param[in, out] key + * Flow matcher value. + * @param[in] item + * Flow pattern to translate. + * @param[in] inner + * Item is inner pattern. + */ +static void +flow_dv_translate_item_meta(void *matcher, void *key, + const struct rte_flow_item *item) +{ + const struct rte_flow_item_meta *metam; + const struct rte_flow_item_meta *metav; + + void *misc2_m = + MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters_2); + void *misc2_v = + MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2); + + metam = (const void *)item->mask; + if (!metam) + metam = &rte_flow_item_meta_mask; + + metav = (const void *)item->spec; + if (metav) { + MLX5_SET(fte_match_set_misc2, misc2_m, metadata_reg_a, + RTE_BE32(metam->data)); + MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_a, + RTE_BE32(metav->data)); + } +} + +/** * Update the matcher and the value based the selected item. * * @param[in, out] matcher @@ -938,6 +1030,10 @@ flow_dv_translate_item_vxlan(tmatcher->mask.buf, key, item, inner); break; + case RTE_FLOW_ITEM_TYPE_META: + flow_dv_translate_item_meta(tmatcher->mask.buf, key, item); + break; + default: break; } diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h index 4e2f9f4..a905397 100644 --- a/drivers/net/mlx5/mlx5_prm.h +++ b/drivers/net/mlx5/mlx5_prm.h @@ -159,7 +159,7 @@ struct mlx5_wqe_eth_seg_small { uint8_t cs_flags; uint8_t rsvd1; uint16_t mss; - uint32_t rsvd2; + uint32_t flow_table_metadata; uint16_t inline_hdr_sz; uint8_t inline_hdr[2]; } __rte_aligned(MLX5_WQE_DWORD_SIZE); diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 558e6b6..d596e4e 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -523,6 +523,7 @@ uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG); uint32_t swp_offsets = 0; uint8_t swp_types = 0; + uint32_t metadata = 0; uint16_t tso_segsz = 0; #ifdef MLX5_PMD_SOFT_COUNTERS uint32_t total_length = 0; @@ -566,6 +567,9 @@ cs_flags = txq_ol_cksum_to_cs(buf); txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types); raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; + /* Copy metadata from mbuf if valid */ + if (buf->ol_flags & PKT_TX_METADATA) + metadata = buf->hash.fdir.hi; /* Replace the Ethernet type by the VLAN if necessary. */ if (buf->ol_flags & PKT_TX_VLAN_PKT) { uint32_t vlan = rte_cpu_to_be_32(0x81000000 | @@ -781,7 +785,7 @@ swp_offsets, cs_flags | (swp_types << 8) | (rte_cpu_to_be_16(tso_segsz) << 16), - 0, + rte_cpu_to_be_32(metadata), (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz), }; } else { @@ -795,7 +799,7 @@ wqe->eseg = (rte_v128u32_t){ swp_offsets, cs_flags | (swp_types << 8), - 0, + rte_cpu_to_be_32(metadata), (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz), }; } @@ -861,7 +865,7 @@ mpw->wqe->eseg.inline_hdr_sz = 0; mpw->wqe->eseg.rsvd0 = 0; mpw->wqe->eseg.rsvd1 = 0; - mpw->wqe->eseg.rsvd2 = 0; + mpw->wqe->eseg.flow_table_metadata = 0; mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | (txq->wqe_ci << 8) | MLX5_OPCODE_TSO); @@ -971,6 +975,8 @@ if ((mpw.state == MLX5_MPW_STATE_OPENED) && ((mpw.len != length) || (segs_n != 1) || + (mpw.wqe->eseg.flow_table_metadata != + rte_cpu_to_be_32(buf->hash.fdir.hi)) || (mpw.wqe->eseg.cs_flags != cs_flags))) mlx5_mpw_close(txq, &mpw); if (mpw.state == MLX5_MPW_STATE_CLOSED) { @@ -984,6 +990,8 @@ max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; + mpw.wqe->eseg.flow_table_metadata = + rte_cpu_to_be_32(buf->hash.fdir.hi); } /* Multi-segment packets must be alone in their MPW. */ assert((segs_n == 1) || (mpw.pkts_n == 0)); @@ -1082,7 +1090,7 @@ mpw->wqe->eseg.cs_flags = 0; mpw->wqe->eseg.rsvd0 = 0; mpw->wqe->eseg.rsvd1 = 0; - mpw->wqe->eseg.rsvd2 = 0; + mpw->wqe->eseg.flow_table_metadata = 0; inl = (struct mlx5_wqe_inl_small *) (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); mpw->data.raw = (uint8_t *)&inl->raw; @@ -1199,12 +1207,16 @@ if (mpw.state == MLX5_MPW_STATE_OPENED) { if ((mpw.len != length) || (segs_n != 1) || + (mpw.wqe->eseg.flow_table_metadata != + rte_cpu_to_be_32(buf->hash.fdir.hi)) || (mpw.wqe->eseg.cs_flags != cs_flags)) mlx5_mpw_close(txq, &mpw); } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { if ((mpw.len != length) || (segs_n != 1) || (length > inline_room) || + (mpw.wqe->eseg.flow_table_metadata != + rte_cpu_to_be_32(buf->hash.fdir.hi)) || (mpw.wqe->eseg.cs_flags != cs_flags)) { mlx5_mpw_inline_close(txq, &mpw); inline_room = @@ -1224,12 +1236,21 @@ max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; + /* Copy metadata from mbuf if valid */ + if (buf->ol_flags & PKT_TX_METADATA) + mpw.wqe->eseg.flow_table_metadata = + rte_cpu_to_be_32(buf->hash.fdir.hi); + } else { if (unlikely(max_wqe < wqe_inl_n)) break; max_wqe -= wqe_inl_n; mlx5_mpw_inline_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; + /* Copy metadata from mbuf if valid */ + if (buf->ol_flags & PKT_TX_METADATA) + mpw.wqe->eseg.flow_table_metadata = + rte_cpu_to_be_32(buf->hash.fdir.hi); } } /* Multi-segment packets must be alone in their MPW. */ @@ -1482,6 +1503,8 @@ (length <= txq->inline_max_packet_sz && inl_pad + sizeof(inl_hdr) + length > mpw_room) || + (mpw.wqe->eseg.flow_table_metadata != + rte_cpu_to_be_32(buf->hash.fdir.hi)) || (mpw.wqe->eseg.cs_flags != cs_flags)) max_wqe -= mlx5_empw_close(txq, &mpw); } @@ -1505,6 +1528,10 @@ sizeof(inl_hdr) + length <= mpw_room && !txq->mpw_hdr_dseg; mpw.wqe->eseg.cs_flags = cs_flags; + /* Copy metadata from mbuf if valid */ + if (buf->ol_flags & PKT_TX_METADATA) + mpw.wqe->eseg.flow_table_metadata = + rte_cpu_to_be_32(buf->hash.fdir.hi); } else { /* Evaluate whether the next packet can be inlined. * Inlininig is possible when: diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c index 0a4aed8..132943a 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.c +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c @@ -41,6 +41,8 @@ /** * Count the number of packets having same ol_flags and calculate cs_flags. + * If PKT_TX_METADATA is set in ol_flags, packets must have same metadata + * as well. * * @param pkts * Pointer to array of packets. @@ -48,25 +50,38 @@ * Number of packets. * @param cs_flags * Pointer of flags to be returned. + * @param txq_offloads + * Offloads enabled on Tx queue * * @return - * Number of packets having same ol_flags. + * Number of packets having same ol_flags and metadata, if relevant. */ static inline unsigned int -txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags) +txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags, + const uint64_t txq_offloads) { unsigned int pos; const uint64_t ol_mask = PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE | - PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM; + PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM | PKT_TX_METADATA; if (!pkts_n) return 0; /* Count the number of packets having same ol_flags. */ - for (pos = 1; pos < pkts_n; ++pos) - if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask) + for (pos = 1; pos < pkts_n; ++pos) { + if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) && + ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)) break; + /* If the metadata ol_flag is set, + * metadata must be same in all packets. + */ + if ((txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) && + (pkts[pos]->ol_flags & PKT_TX_METADATA) && + pkts[0]->hash.fdir.hi != pkts[pos]->hash.fdir.hi) + break; + } + *cs_flags = txq_ol_cksum_to_cs(pkts[0]); return pos; } @@ -137,8 +152,10 @@ n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST); if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) n = txq_count_contig_single_seg(&pkts[nb_tx], n); - if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) - n = txq_calc_offload(&pkts[nb_tx], n, &cs_flags); + if (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | + DEV_TX_OFFLOAD_MATCH_METADATA)) + n = txq_calc_offload(&pkts[nb_tx], n, + &cs_flags, txq->offloads); ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags); nb_tx += ret; if (!ret) diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h index fb884f9..fda7004 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec.h @@ -22,6 +22,7 @@ /* HW offload capabilities of vectorized Tx. */ #define MLX5_VEC_TX_OFFLOAD_CAP \ (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | \ + DEV_TX_OFFLOAD_MATCH_METADATA | \ DEV_TX_OFFLOAD_MULTI_SEGS) /* diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index b37b738..20c9427 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -237,6 +237,7 @@ uint8x16_t *t_wqe; uint8_t *dseg; uint8x16_t ctrl; + uint32_t md; /* metadata */ /* Make sure all packets can fit into a single WQE. */ assert(elts_n > pkts_n); @@ -293,10 +294,11 @@ ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m); vst1q_u8((void *)t_wqe, ctrl); /* Fill ESEG in the header. */ + md = pkts[0]->hash.fdir.hi; vst1q_u8((void *)(t_wqe + 1), ((uint8x16_t) { 0, 0, 0, 0, cs_flags, 0, 0, 0, - 0, 0, 0, 0, + md >> 24, md >> 16, md >> 8, md, 0, 0, 0, 0 })); #ifdef MLX5_PMD_SOFT_COUNTERS txq->stats.opackets += pkts_n; diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index 54b3783..7c8535c 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -236,6 +236,7 @@ 0, 1, 2, 3 /* bswap32 */); __m128i *t_wqe, *dseg; __m128i ctrl; + uint32_t md; /* metadata */ /* Make sure all packets can fit into a single WQE. */ assert(elts_n > pkts_n); @@ -292,9 +293,10 @@ ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl); _mm_store_si128(t_wqe, ctrl); /* Fill ESEG in the header. */ + md = pkts[0]->hash.fdir.hi; _mm_store_si128(t_wqe + 1, _mm_set_epi8(0, 0, 0, 0, - 0, 0, 0, 0, + md, md >> 8, md >> 16, md >> 24, 0, 0, 0, cs_flags, 0, 0, 0, 0)); #ifdef MLX5_PMD_SOFT_COUNTERS diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index f9bc473..7263fb1 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -128,6 +128,12 @@ offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | DEV_TX_OFFLOAD_GRE_TNL_TSO); } + +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + if (config->dv_flow_en) + offloads |= DEV_TX_OFFLOAD_MATCH_METADATA; +#endif + return offloads; } -- 1.8.3.1