patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Ori Kam <orika@nvidia.com>
To: Michael Baum <michaelba@nvidia.com>, "dev@dpdk.org" <dev@dpdk.org>
Cc: Matan Azrad <matan@nvidia.com>,
	NBU-Contact-Thomas Monjalon <thomas@monjalon.net>,
	"stable@dpdk.org" <stable@dpdk.org>
Subject: Re: [dpdk-stable] [PATCH_v4 1/3] regex/mlx5: fix memory region unregistration
Date: Wed, 21 Jul 2021 06:25:54 +0000	[thread overview]
Message-ID: <DM8PR12MB5400CE842F20B41DD3B188F7D6E39@DM8PR12MB5400.namprd12.prod.outlook.com> (raw)
In-Reply-To: <20210712070644.2848418-2-michaelba@nvidia.com>

Hi Michael,

> -----Original Message-----
> From: Michael Baum <michaelba@nvidia.com>
> Sent: Monday, July 12, 2021 10:07 AM
> 
> The issue can cause illegal physical address access while a huge-page A is
> released and huge-page B is allocated on the same virtual address.
> The old MR can be matched using the virtual address of huge-page B but the
> HW will access the physical address of huge-page A which is no more part of
> the DPDK process.
> 
> Register a driver callback for memory event in order to free out all the MRs of
> memory that is going to be freed from the dpdk process.
> 
> Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to
> datapath")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Michael Baum <michaelba@nvidia.com>
> ---
>  drivers/regex/mlx5/mlx5_regex.c          | 55 ++++++++++++++++++++++++
>  drivers/regex/mlx5/mlx5_regex.h          |  2 +
>  drivers/regex/mlx5/mlx5_regex_control.c  |  2 +
> drivers/regex/mlx5/mlx5_regex_fastpath.c | 50 +++++++++++++++------
>  4 files changed, 97 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/regex/mlx5/mlx5_regex.c
> b/drivers/regex/mlx5/mlx5_regex.c index dcb2ced88e..0f12d94d7e 100644
> --- a/drivers/regex/mlx5/mlx5_regex.c
> +++ b/drivers/regex/mlx5/mlx5_regex.c
> @@ -11,6 +11,7 @@
>  #include <rte_regexdev_driver.h>
> 
>  #include <mlx5_common_pci.h>
> +#include <mlx5_common_mr.h>
>  #include <mlx5_common.h>
>  #include <mlx5_glue.h>
>  #include <mlx5_devx_cmds.h>
> @@ -24,6 +25,10 @@
> 
>  int mlx5_regex_logtype;
> 
> +TAILQ_HEAD(regex_mem_event, mlx5_regex_priv) mlx5_mem_event_list
> =
> +
> 	TAILQ_HEAD_INITIALIZER(mlx5_mem_event_list);
> +static pthread_mutex_t mem_event_list_lock =
> PTHREAD_MUTEX_INITIALIZER;
> +
>  const struct rte_regexdev_ops mlx5_regexdev_ops = {
>  	.dev_info_get = mlx5_regex_info_get,
>  	.dev_configure = mlx5_regex_configure, @@ -82,6 +87,40 @@
> mlx5_regex_get_name(char *name, struct rte_pci_device *pci_dev
> __rte_unused)
>  		pci_dev->addr.devid, pci_dev->addr.function);  }
> 
> +/**
> + * Callback for memory event.
> + *
> + * @param event_type
> + *   Memory event type.
> + * @param addr
> + *   Address of memory.
> + * @param len
> + *   Size of memory.
> + */
> +static void
> +mlx5_regex_mr_mem_event_cb(enum rte_mem_event event_type,
> const void *addr,
> +			   size_t len, void *arg __rte_unused) {
> +	struct mlx5_regex_priv *priv;
> +
> +	/* Must be called from the primary process. */
> +	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +	switch (event_type) {
> +	case RTE_MEM_EVENT_FREE:
> +		pthread_mutex_lock(&mem_event_list_lock);
> +		/* Iterate all the existing mlx5 devices. */
> +		TAILQ_FOREACH(priv, &mlx5_mem_event_list,
> mem_event_cb)
> +			mlx5_free_mr_by_addr(&priv->mr_scache,
> +					     priv->ctx->device->name,
> +					     addr, len);
> +		pthread_mutex_unlock(&mem_event_list_lock);
> +		break;
> +	case RTE_MEM_EVENT_ALLOC:
> +	default:
> +		break;
> +	}
> +}
> +
>  static int
>  mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>  		     struct rte_pci_device *pci_dev)
> @@ -193,6 +232,15 @@ mlx5_regex_pci_probe(struct rte_pci_driver
> *pci_drv __rte_unused,
>  	    rte_errno = ENOMEM;
>  		goto error;
>  	}
> +	/* Register callback function for global shared MR cache
> management. */
> +	if (TAILQ_EMPTY(&mlx5_mem_event_list))
> +
> 	rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
> +
> 	mlx5_regex_mr_mem_event_cb,
> +						NULL);
> +	/* Add device to memory callback list. */
> +	pthread_mutex_lock(&mem_event_list_lock);
> +	TAILQ_INSERT_TAIL(&mlx5_mem_event_list, priv, mem_event_cb);
> +	pthread_mutex_unlock(&mem_event_list_lock);
>  	DRV_LOG(INFO, "RegEx GGA is %s.",
>  		priv->has_umr ? "supported" : "unsupported");
>  	return 0;
> @@ -225,6 +273,13 @@ mlx5_regex_pci_remove(struct rte_pci_device
> *pci_dev)
>  		return 0;
>  	priv = dev->data->dev_private;
>  	if (priv) {
> +		/* Remove from memory callback device list. */
> +		pthread_mutex_lock(&mem_event_list_lock);
> +		TAILQ_REMOVE(&mlx5_mem_event_list, priv,
> mem_event_cb);
> +		pthread_mutex_unlock(&mem_event_list_lock);
> +		if (TAILQ_EMPTY(&mlx5_mem_event_list))
> +
> 	rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
> +							  NULL);
>  		if (priv->pd)
>  			mlx5_glue->dealloc_pd(priv->pd);
>  		if (priv->uar)
> diff --git a/drivers/regex/mlx5/mlx5_regex.h
> b/drivers/regex/mlx5/mlx5_regex.h index 51a2101e53..61f59ba873 100644
> --- a/drivers/regex/mlx5/mlx5_regex.h
> +++ b/drivers/regex/mlx5/mlx5_regex.h
> @@ -70,6 +70,8 @@ struct mlx5_regex_priv {
>  	uint32_t nb_engines; /* Number of RegEx engines. */
>  	struct mlx5dv_devx_uar *uar; /* UAR object. */
>  	struct ibv_pd *pd;
> +	TAILQ_ENTRY(mlx5_regex_priv) mem_event_cb;
> +	/**< Called by memory event callback. */
>  	struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache.
> */
>  	uint8_t is_bf2; /* The device is BF2 device. */
>  	uint8_t sq_ts_format; /* Whether SQ supports timestamp formats.
> */ diff --git a/drivers/regex/mlx5/mlx5_regex_control.c
> b/drivers/regex/mlx5/mlx5_regex_control.c
> index eef0fe579d..8ce2dabb55 100644
> --- a/drivers/regex/mlx5/mlx5_regex_control.c
> +++ b/drivers/regex/mlx5/mlx5_regex_control.c
> @@ -246,6 +246,8 @@ mlx5_regex_qp_setup(struct rte_regexdev *dev,
> uint16_t qp_ind,
>  		nb_sq_config++;
>  	}
> 
> +	/* Save pointer of global generation number to check memory
> event. */
> +	qp->mr_ctrl.dev_gen_ptr = &priv->mr_scache.dev_gen;
>  	ret = mlx5_mr_btree_init(&qp->mr_ctrl.cache_bh,
> MLX5_MR_BTREE_CACHE_N,
>  				 rte_socket_id());
>  	if (ret) {
> diff --git a/drivers/regex/mlx5/mlx5_regex_fastpath.c
> b/drivers/regex/mlx5/mlx5_regex_fastpath.c
> index b57e7d7794..6d5096701f 100644
> --- a/drivers/regex/mlx5/mlx5_regex_fastpath.c
> +++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c
> @@ -109,6 +109,40 @@ set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg,
> uint16_t pi, uint8_t opcode,
>  	seg->imm = imm;
>  }
> 
> +/**
> + * Query LKey from a packet buffer for QP. If not found, add the mempool.
> + *
> + * @param priv
> + *   Pointer to the priv object.
> + * @param mr_ctrl
> + *   Pointer to per-queue MR control structure.
> + * @param mbuf
> + *   Pointer to source mbuf, to search in.
> + *
> + * @return
> + *   Searched LKey on success, UINT32_MAX on no match.
> + */
> +static inline uint32_t
> +mlx5_regex_addr2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl
> *mr_ctrl,
> +		   struct rte_mbuf *mbuf)
> +{
> +	uintptr_t addr = rte_pktmbuf_mtod(mbuf, uintptr_t);
> +	uint32_t lkey;
> +
> +	/* Check generation bit to see if there's any change on existing MRs.
> */
> +	if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
> +		mlx5_mr_flush_local_cache(mr_ctrl);
> +	/* Linear search on MR cache array. */
> +	lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
> +				   MLX5_MR_CACHE_N, addr);
> +	if (likely(lkey != UINT32_MAX))
> +		return lkey;
> +	/* Take slower bottom-half on miss. */
> +	return mlx5_mr_addr2mr_bh(priv->pd, 0, &priv->mr_scache,
> mr_ctrl, addr,
> +				  !!(mbuf->ol_flags &
> EXT_ATTACHED_MBUF)); }
> +
> +
>  static inline void
>  __prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_sq *sq,
>  	   struct rte_regex_ops *op, struct mlx5_regex_job *job, @@ -160,10
> +194,7 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp
> *qp,
>  	struct mlx5_klm klm;
> 
>  	klm.byte_count = rte_pktmbuf_data_len(op->mbuf);
> -	klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, 0,
> -				  &priv->mr_scache, &qp->mr_ctrl,
> -				  rte_pktmbuf_mtod(op->mbuf, uintptr_t),
> -				  !!(op->mbuf->ol_flags &
> EXT_ATTACHED_MBUF));
> +	klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, op->mbuf);
>  	klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
>  	__prep_one(priv, sq, op, job, sq->pi, &klm);
>  	sq->db_pi = sq->pi;
> @@ -329,10 +360,8 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv
> *priv, struct mlx5_regex_qp *qp,
>  					(qp->jobs[mkey_job_id].imkey->id);
>  			while (mbuf) {
>  				/* Build indirect mkey seg's KLM. */
> -				mkey_klm->mkey =
> mlx5_mr_addr2mr_bh(priv->pd,
> -					NULL, &priv->mr_scache, &qp-
> >mr_ctrl,
> -					rte_pktmbuf_mtod(mbuf, uintptr_t),
> -					!!(mbuf->ol_flags &
> EXT_ATTACHED_MBUF));
> +				mkey_klm->mkey = mlx5_regex_addr2mr
> +						(priv, &qp->mr_ctrl, mbuf);
>  				mkey_klm->address = rte_cpu_to_be_64
>  					(rte_pktmbuf_mtod(mbuf,
> uintptr_t));
>  				mkey_klm->byte_count = rte_cpu_to_be_32
> @@ -350,10 +379,7 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv
> *priv, struct mlx5_regex_qp *qp,
>  			klm.byte_count = scatter_size;
>  		} else {
>  			/* The single mubf case. Build the KLM directly. */
> -			klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, NULL,
> -					&priv->mr_scache, &qp->mr_ctrl,
> -					rte_pktmbuf_mtod(mbuf, uintptr_t),
> -					!!(mbuf->ol_flags &
> EXT_ATTACHED_MBUF));
> +			klm.mkey = mlx5_regex_addr2mr(priv, &qp-
> >mr_ctrl, mbuf);
>  			klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t);
>  			klm.byte_count = rte_pktmbuf_data_len(mbuf);
>  		}
> --
> 2.25.1

Acked-by: Ori Kam <orika@nvidia.com>
Thanks,
Ori


  reply	other threads:[~2021-07-21  6:25 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-28 19:23 [dpdk-stable] [PATCH " Michael Baum
2021-06-28 19:23 ` [dpdk-stable] [PATCH 2/3] regex/mlx5: fix leak in PCI remove function Michael Baum
2021-06-28 19:23 ` [dpdk-stable] [PATCH 3/3] regex/mlx5: fix redundancy " Michael Baum
2021-06-30  5:52 ` [dpdk-stable] [PATCH 1/3] regex/mlx5: fix memory region unregistration Matan Azrad
2021-07-05  5:27 ` [dpdk-stable] [PATCH_v2 " Michael Baum
2021-07-05  5:27   ` [dpdk-stable] [PATCH_v2 2/3] regex/mlx5: fix leak in PCI remove function Michael Baum
2021-07-05  5:27   ` [dpdk-stable] [PATCH_v2 3/3] regex/mlx5: fix redundancy " Michael Baum
     [not found]   ` <20210707120303.2490006-1-michaelba@nvidia.com>
2021-07-07 12:03     ` [dpdk-stable] [PATCH_v3 1/3] regex/mlx5: fix memory region unregistration Michael Baum
     [not found]       ` <20210712070644.2848418-1-michaelba@nvidia.com>
2021-07-12  7:06         ` [dpdk-stable] [PATCH_v4 " Michael Baum
2021-07-21  6:25           ` Ori Kam [this message]
2021-07-12  7:06         ` [dpdk-stable] [PATCH_v4 2/3] regex/mlx5: fix leak in PCI remove function Michael Baum
2021-07-21  6:21           ` [dpdk-stable] [dpdk-dev] " Ori Kam
2021-07-12  7:06         ` [dpdk-stable] [PATCH_v4 3/3] regex/mlx5: fix redundancy " Michael Baum
2021-07-21  6:23           ` Ori Kam
2021-07-07 12:03     ` [dpdk-stable] [PATCH_v3 2/3] regex/mlx5: fix leak " Michael Baum
2021-07-07 12:03     ` [dpdk-stable] [PATCH_v3 3/3] regex/mlx5: fix redundancy " Michael Baum

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DM8PR12MB5400CE842F20B41DD3B188F7D6E39@DM8PR12MB5400.namprd12.prod.outlook.com \
    --to=orika@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=michaelba@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).