From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id BBCBDA04B7; Tue, 13 Oct 2020 16:18:50 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 1A3E31DCC1; Tue, 13 Oct 2020 16:17:14 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id A02EF1C131 for ; Tue, 13 Oct 2020 16:17:06 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from dekelp@nvidia.com) with SMTP; 13 Oct 2020 17:17:02 +0300 Received: from mtl-vdi-280.wap.labs.mlnx. (mtl-vdi-280.wap.labs.mlnx [10.228.134.250]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 09DEH1O6024493; Tue, 13 Oct 2020 17:17:02 +0300 From: Dekel Peled To: orika@nvidia.com, thomas@monjalon.net, ferruh.yigit@intel.com, arybchenko@solarflare.com, konstantin.ananyev@intel.com, olivier.matz@6wind.com, wenzhuo.lu@intel.com, beilei.xing@intel.com, bernard.iremonger@intel.com, matan@nvidia.com, shahafs@nvidia.com, viacheslavo@nvidia.com Cc: dev@dpdk.org Date: Tue, 13 Oct 2020 17:16:12 +0300 Message-Id: <43e94a6e60e869aa6c99e32cf75a4998b4593d45.1602598055.git.dekelp@nvidia.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: References: Subject: [dpdk-dev] [PATCH 2/5] net/mlx5: support match on IPv4 fragment packets X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds to MLX5 PMD the support of matching on IPv4 fragmented and non-fragmented packets, using the IPv4 header fragment_offset field. Signed-off-by: Dekel Peled Acked-by: Ori Kam --- doc/guides/rel_notes/release_20_11.rst | 5 ++ drivers/net/mlx5/mlx5_flow.c | 48 ++++++---- drivers/net/mlx5/mlx5_flow.h | 10 +++ drivers/net/mlx5/mlx5_flow_dv.c | 156 ++++++++++++++++++++++++++++----- drivers/net/mlx5/mlx5_flow_verbs.c | 9 +- 5 files changed, 183 insertions(+), 45 deletions(-) diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst index a01552c..792d547 100644 --- a/doc/guides/rel_notes/release_20_11.rst +++ b/doc/guides/rel_notes/release_20_11.rst @@ -148,6 +148,11 @@ New Features * Extern objects and functions can be plugged into the pipeline. * Transaction-oriented table updates. +* **Updated Mellanox mlx5 driver.** + + Updated Mellanox mlx5 driver with new features and improvements, including: + + * Added support for matching on fragmented/non-fragmented IPv4 packets. Removed Items ------------- diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 0a54818..38cfd0f 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -800,6 +800,8 @@ struct mlx5_flow_tunnel_info { * Bit-masks covering supported fields by the NIC to compare with user mask. * @param[in] size * Bit-masks size in bytes. + * @param[in] range_accepted + * True if range of values is accepted for specific fields, false otherwise. * @param[out] error * Pointer to error structure. * @@ -811,6 +813,7 @@ struct mlx5_flow_tunnel_info { const uint8_t *mask, const uint8_t *nic_mask, unsigned int size, + bool range_accepted, struct rte_flow_error *error) { unsigned int i; @@ -828,7 +831,7 @@ struct mlx5_flow_tunnel_info { RTE_FLOW_ERROR_TYPE_ITEM, item, "mask/last without a spec is not" " supported"); - if (item->spec && item->last) { + if (item->spec && item->last && !range_accepted) { uint8_t spec[size]; uint8_t last[size]; unsigned int i; @@ -1603,7 +1606,8 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_icmp6_mask, - sizeof(struct rte_flow_item_icmp6), error); + sizeof(struct rte_flow_item_icmp6), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; @@ -1661,7 +1665,8 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, - sizeof(struct rte_flow_item_icmp), error); + sizeof(struct rte_flow_item_icmp), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; @@ -1716,7 +1721,7 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_eth), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); return ret; } @@ -1770,7 +1775,7 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_vlan), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret) return ret; if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { @@ -1822,6 +1827,8 @@ struct mlx5_flow_tunnel_info { * @param[in] acc_mask * Acceptable mask, if NULL default internal default mask * will be used to check whether item fields are supported. + * @param[in] range_accepted + * True if range of values is accepted for specific fields, false otherwise. * @param[out] error * Pointer to error structure. * @@ -1834,6 +1841,7 @@ struct mlx5_flow_tunnel_info { uint64_t last_item, uint16_t ether_type, const struct rte_flow_item_ipv4 *acc_mask, + bool range_accepted, struct rte_flow_error *error) { const struct rte_flow_item_ipv4 *mask = item->mask; @@ -1904,7 +1912,7 @@ struct mlx5_flow_tunnel_info { acc_mask ? (const uint8_t *)acc_mask : (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_ipv4), - error); + range_accepted, error); if (ret < 0) return ret; return 0; @@ -2003,7 +2011,7 @@ struct mlx5_flow_tunnel_info { acc_mask ? (const uint8_t *)acc_mask : (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_ipv6), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; @@ -2058,7 +2066,8 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_udp_mask, - sizeof(struct rte_flow_item_udp), error); + sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED, + error); if (ret < 0) return ret; return 0; @@ -2113,7 +2122,8 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)flow_mask, - sizeof(struct rte_flow_item_tcp), error); + sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED, + error); if (ret < 0) return ret; return 0; @@ -2167,7 +2177,7 @@ struct mlx5_flow_tunnel_info { (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_vxlan_mask, sizeof(struct rte_flow_item_vxlan), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; if (spec) { @@ -2238,7 +2248,7 @@ struct mlx5_flow_tunnel_info { (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, sizeof(struct rte_flow_item_vxlan_gpe), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; if (spec) { @@ -2312,7 +2322,7 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&gre_key_default_mask, - sizeof(rte_be32_t), error); + sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error); return ret; } @@ -2364,7 +2374,8 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, - sizeof(struct rte_flow_item_gre), error); + sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED, + error); if (ret < 0) return ret; #ifndef HAVE_MLX5DV_DR @@ -2439,7 +2450,8 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, - sizeof(struct rte_flow_item_geneve), error); + sizeof(struct rte_flow_item_geneve), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret) return ret; if (spec) { @@ -2522,7 +2534,8 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_mpls_mask, - sizeof(struct rte_flow_item_mpls), error); + sizeof(struct rte_flow_item_mpls), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; @@ -2577,7 +2590,8 @@ struct mlx5_flow_tunnel_info { ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_nvgre_mask, - sizeof(struct rte_flow_item_nvgre), error); + sizeof(struct rte_flow_item_nvgre), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; @@ -2671,7 +2685,7 @@ struct mlx5_flow_tunnel_info { acc_mask ? (const uint8_t *)acc_mask : (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_ecpri), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); } /* Allocate unique ID for the split Q/RSS subflows. */ diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 279daf2..1e30c93 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -330,6 +330,14 @@ enum mlx5_feature_name { #define MLX5_ENCAPSULATION_DECISION_SIZE (sizeof(struct rte_flow_item_eth) + \ sizeof(struct rte_flow_item_ipv4)) +/* IPv4 fragment_offset field contains relevant data in bits 2 to 15. */ +#define MLX5_IPV4_FRAG_OFFSET_MASK \ + (RTE_IPV4_HDR_OFFSET_MASK | RTE_IPV4_HDR_MF_FLAG) + +/* Specific item's fields can accept a range of values (using spec and last). */ +#define MLX5_ITEM_RANGE_NOT_ACCEPTED false +#define MLX5_ITEM_RANGE_ACCEPTED true + /* Software header modify action numbers of a flow. */ #define MLX5_ACT_NUM_MDF_IPV4 1 #define MLX5_ACT_NUM_MDF_IPV6 4 @@ -985,6 +993,7 @@ int mlx5_flow_item_acceptable(const struct rte_flow_item *item, const uint8_t *mask, const uint8_t *nic_mask, unsigned int size, + bool range_accepted, struct rte_flow_error *error); int mlx5_flow_validate_item_eth(const struct rte_flow_item *item, uint64_t item_flags, @@ -1002,6 +1011,7 @@ int mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, uint64_t last_item, uint16_t ether_type, const struct rte_flow_item_ipv4 *acc_mask, + bool range_accepted, struct rte_flow_error *error); int mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, uint64_t item_flags, diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index c0fb311..08e6f74 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -1418,7 +1418,7 @@ struct field_modify_info modify_tcp[] = { ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_mark), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; @@ -1494,7 +1494,7 @@ struct field_modify_info modify_tcp[] = { ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_meta), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); return ret; } @@ -1547,7 +1547,7 @@ struct field_modify_info modify_tcp[] = { ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_tag), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; if (mask->index != 0xff) @@ -1618,7 +1618,7 @@ struct field_modify_info modify_tcp[] = { (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_port_id_mask, sizeof(struct rte_flow_item_port_id), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret) return ret; if (!spec) @@ -1691,7 +1691,7 @@ struct field_modify_info modify_tcp[] = { ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_vlan), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret) return ret; if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { @@ -1778,11 +1778,126 @@ struct field_modify_info modify_tcp[] = { RTE_FLOW_ERROR_TYPE_ITEM, item, "Match is supported for GTP" " flags only"); - return mlx5_flow_item_acceptable - (item, (const uint8_t *)mask, - (const uint8_t *)&nic_mask, - sizeof(struct rte_flow_item_gtp), - error); + return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, + (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_gtp), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); +} + +/** + * Validate IPV4 item. + * Use existing validation function mlx5_flow_validate_item_ipv4(), and + * add specific validation of fragment_offset field, + * + * @param[in] item + * Item specification. + * @param[in] item_flags + * Bit-fields that holds the items detected until now. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_item_ipv4(const struct rte_flow_item *item, + uint64_t item_flags, + uint64_t last_item, + uint16_t ether_type, + struct rte_flow_error *error) +{ + int ret; + const struct rte_flow_item_ipv4 *spec = item->spec; + const struct rte_flow_item_ipv4 *last = item->last; + const struct rte_flow_item_ipv4 *mask = item->mask; + rte_be16_t fragment_offset_spec = 0; + rte_be16_t fragment_offset_last = 0; + const struct rte_flow_item_ipv4 nic_ipv4_mask = { + .hdr = { + .src_addr = RTE_BE32(0xffffffff), + .dst_addr = RTE_BE32(0xffffffff), + .type_of_service = 0xff, + .fragment_offset = RTE_BE16(0xffff), + .next_proto_id = 0xff, + .time_to_live = 0xff, + }, + }; + + ret = mlx5_flow_validate_item_ipv4(item, item_flags, last_item, + ether_type, &nic_ipv4_mask, + MLX5_ITEM_RANGE_ACCEPTED, error); + if (ret < 0) + return ret; + if (spec && mask) + fragment_offset_spec = spec->hdr.fragment_offset & + mask->hdr.fragment_offset; + if (!fragment_offset_spec) + return 0; + /* + * spec and mask are valid, enforce using full mask to make sure the + * complete value is used correctly. + */ + if ((mask->hdr.fragment_offset & RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK)) + != RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, + item, "must use full mask for" + " fragment_offset"); + /* + * Match on fragment_offset 0x2000 means MF is 1 and frag-offset is 0, + * indicating this is 1st fragment of fragmented packet. + * This is not yet supported in MLX5, return appropriate error message. + */ + if (fragment_offset_spec == RTE_BE16(RTE_IPV4_HDR_MF_FLAG)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "match on first fragment not " + "supported"); + if (fragment_offset_spec && !last) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "specified value not supported"); + /* spec and last are valid, validate the specified range. */ + fragment_offset_last = last->hdr.fragment_offset & + mask->hdr.fragment_offset; + /* + * Match on fragment_offset spec 0x2001 and last 0x3fff + * means MF is 1 and frag-offset is > 0. + * This packet is fragment 2nd and onward, excluding last. + * This is not yet supported in MLX5, return appropriate + * error message. + */ + if (fragment_offset_spec == RTE_BE16(RTE_IPV4_HDR_MF_FLAG + 1) && + fragment_offset_last == RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_LAST, + last, "match on following " + "fragments not supported"); + /* + * Match on fragment_offset spec 0x0001 and last 0x1fff + * means MF is 0 and frag-offset is > 0. + * This packet is last fragment of fragmented packet. + * This is not yet supported in MLX5, return appropriate + * error message. + */ + if (fragment_offset_spec == RTE_BE16(1) && + fragment_offset_last == RTE_BE16(RTE_IPV4_HDR_OFFSET_MASK)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_LAST, + last, "match on last " + "fragment not supported"); + /* + * Match on fragment_offset spec 0x0001 and last 0x3fff + * means MF and/or frag-offset is not 0. + * This is a fragmented packet. + * Other range values are invalid and rejected. + */ + if (!(fragment_offset_spec == RTE_BE16(1) && + fragment_offset_last == RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK))) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_LAST, last, + "specified range not supported"); + return 0; } /** @@ -5084,15 +5199,6 @@ struct field_modify_info modify_tcp[] = { .dst_port = RTE_BE16(UINT16_MAX), } }; - const struct rte_flow_item_ipv4 nic_ipv4_mask = { - .hdr = { - .src_addr = RTE_BE32(0xffffffff), - .dst_addr = RTE_BE32(0xffffffff), - .type_of_service = 0xff, - .next_proto_id = 0xff, - .time_to_live = 0xff, - }, - }; const struct rte_flow_item_ipv6 nic_ipv6_mask = { .hdr = { .src_addr = @@ -5192,11 +5298,9 @@ struct field_modify_info modify_tcp[] = { case RTE_FLOW_ITEM_TYPE_IPV4: mlx5_flow_tunnel_ip_check(items, next_protocol, &item_flags, &tunnel); - ret = mlx5_flow_validate_item_ipv4(items, item_flags, - last_item, - ether_type, - &nic_ipv4_mask, - error); + ret = flow_dv_validate_item_ipv4(items, item_flags, + last_item, ether_type, + error); if (ret < 0) return ret; last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : @@ -6296,6 +6400,10 @@ struct field_modify_info modify_tcp[] = { ipv4_m->hdr.time_to_live); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ttl_hoplimit, ipv4_v->hdr.time_to_live & ipv4_m->hdr.time_to_live); + MLX5_SET(fte_match_set_lyr_2_4, headers_m, frag, + !!(ipv4_m->hdr.fragment_offset)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, + !!(ipv4_v->hdr.fragment_offset & ipv4_m->hdr.fragment_offset)); } /** diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index 62c18b8..276bcb5 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -1312,10 +1312,11 @@ } break; case RTE_FLOW_ITEM_TYPE_IPV4: - ret = mlx5_flow_validate_item_ipv4(items, item_flags, - last_item, - ether_type, NULL, - error); + ret = mlx5_flow_validate_item_ipv4 + (items, item_flags, + last_item, ether_type, NULL, + MLX5_ITEM_RANGE_NOT_ACCEPTED, + error); if (ret < 0) return ret; last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : -- 1.8.3.1