* [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
@ 2020-07-19 10:07 Thomas Monjalon
  2020-07-19 10:11 ` Slava Ovsiienko
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Thomas Monjalon @ 2020-07-19 10:07 UTC (permalink / raw)
  To: dev
  Cc: rasland, shirik, stable, Matan Azrad, Shahaf Shuler,
	Viacheslav Ovsiienko, Ray Kinsella, Neil Horman
The detection of the CPU was done in a constructor and shared
in a global variable.
This variable may not be visible in the net PMD
because it was not exported as part of the .map file.
It is fixed by exporting a function, which is cleaner than a variable.
By checking the CPU only at the first call of the function,
doing the check in a constructor becomes useless.
Note: the priority of the constructor was probably irrelevant.
At the same time, the comments are reworded or dropped if useless.
Fixes: 4c204fe5e5d2 ("common/mlx5: disable relaxed ordering in unsuitable CPUs")
Cc: shirik@mellanox.com
Cc: stable@dpdk.org
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
 drivers/common/mlx5/linux/mlx5_common_verbs.c |  2 +-
 drivers/common/mlx5/mlx5_common.c             | 53 ++++++++-----------
 drivers/common/mlx5/mlx5_common.h             |  4 +-
 .../common/mlx5/rte_common_mlx5_version.map   |  2 +
 drivers/net/mlx5/mlx5_flow_dv.c               |  2 +-
 5 files changed, 28 insertions(+), 35 deletions(-)
diff --git a/drivers/common/mlx5/linux/mlx5_common_verbs.c b/drivers/common/mlx5/linux/mlx5_common_verbs.c
index a2fc7a36bd..31ac20fe09 100644
--- a/drivers/common/mlx5/linux/mlx5_common_verbs.c
+++ b/drivers/common/mlx5/linux/mlx5_common_verbs.c
@@ -55,7 +55,7 @@ mlx5_common_verbs_reg_mr(void *pd, void *addr, size_t length,
 	memset(pmd_mr, 0, sizeof(*pmd_mr));
 	ibv_mr = mlx5_glue->reg_mr(pd, addr, length,
 				   IBV_ACCESS_LOCAL_WRITE |
-				   (haswell_broadwell_cpu ? 0 :
+				   (mlx5_cpu_is_haswell_broadwell() ? 0 :
 				   IBV_ACCESS_RELAXED_ORDERING));
 	if (!ibv_mr)
 		return -1;
diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c
index 693e2c68c8..7232d5131d 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -20,8 +20,6 @@ int mlx5_common_logtype;
 const struct mlx5_glue *mlx5_glue;
 #endif
 
-uint8_t haswell_broadwell_cpu;
-
 static int
 mlx5_class_check_handler(__rte_unused const char *key, const char *value,
 			 void *opaque)
@@ -59,19 +57,8 @@ mlx5_class_get(struct rte_devargs *devargs)
 }
 
 
-/* In case this is an x86_64 intel processor to check if
- * we should use relaxed ordering.
- */
 #ifdef RTE_ARCH_X86_64
-/**
- * This function returns processor identification and feature information
- * into the registers.
- *
- * @param eax, ebx, ecx, edx
- *		Pointers to the registers that will hold cpu information.
- * @param level
- *		The main category of information returned.
- */
+/* Processor identification and feature information filled in registers. */
 static inline void mlx5_cpu_id(unsigned int level,
 				unsigned int *eax, unsigned int *ebx,
 				unsigned int *ecx, unsigned int *edx)
@@ -97,17 +84,7 @@ RTE_INIT_PRIO(mlx5_glue_init, CLASS)
 	mlx5_glue_constructor();
 }
 
-/**
- * This function is responsible of initializing the variable
- *  haswell_broadwell_cpu by checking if the cpu is intel
- *  and reading the data returned from mlx5_cpu_id().
- *  since haswell and broadwell cpus don't have improved performance
- *  when using relaxed ordering we want to check the cpu type before
- *  before deciding whether to enable RO or not.
- *  if the cpu is haswell or broadwell the variable will be set to 1
- *  otherwise it will be 0.
- */
-RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
+static bool mlx5_x86_is_haswell_broadwell(void)
 {
 #ifdef RTE_ARCH_X86_64
 	unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56};
@@ -125,8 +102,7 @@ RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
 	vendor = ebx;
 	max_level = eax;
 	if (max_level < 1) {
-		haswell_broadwell_cpu = 0;
-		return;
+		return false;
 	}
 	mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx);
 	model = (eax >> 4) & 0x0f;
@@ -140,18 +116,31 @@ RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
 		if (brand_id == 0 && family == 0x6) {
 			for (i = 0; i < RTE_DIM(broadwell_models); i++)
 				if (model == broadwell_models[i]) {
-					haswell_broadwell_cpu = 1;
-					return;
+					return true;
 				}
 			for (i = 0; i < RTE_DIM(haswell_models); i++)
 				if (model == haswell_models[i]) {
-					haswell_broadwell_cpu = 1;
-					return;
+					return true;
 				}
 		}
 	}
 #endif
-	haswell_broadwell_cpu = 0;
+	return false;
+}
+
+/*
+ * Check if the CPU is Intel Haswell or Broadwell,
+ * because PCI relaxed ordering has no performance benefit with these CPUs.
+ */
+bool mlx5_cpu_is_haswell_broadwell(void)
+{
+	static bool haswell_broadwell_cpu;
+	static bool once = false;
+
+	if (once)
+		return haswell_broadwell_cpu;
+	once = true;
+	return haswell_broadwell_cpu = mlx5_x86_is_haswell_broadwell();
 }
 
 /**
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index 2851507058..d453e0b3d8 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -243,6 +243,9 @@ struct mlx5_klm {
 
 LIST_HEAD(mlx5_dbr_page_list, mlx5_devx_dbr_page);
 
+__rte_internal
+bool mlx5_cpu_is_haswell_broadwell(void);
+
 __rte_internal
 enum mlx5_class mlx5_class_get(struct rte_devargs *devargs);
 __rte_internal
@@ -255,6 +258,5 @@ int64_t mlx5_get_dbr(void *ctx,  struct mlx5_dbr_page_list *head,
 __rte_internal
 int32_t mlx5_release_dbr(struct mlx5_dbr_page_list *head, uint32_t umem_id,
 			 uint64_t offset);
-extern uint8_t haswell_broadwell_cpu;
 
 #endif /* RTE_PMD_MLX5_COMMON_H_ */
diff --git a/drivers/common/mlx5/rte_common_mlx5_version.map b/drivers/common/mlx5/rte_common_mlx5_version.map
index ae57ebdba5..501b9fff3b 100644
--- a/drivers/common/mlx5/rte_common_mlx5_version.map
+++ b/drivers/common/mlx5/rte_common_mlx5_version.map
@@ -6,6 +6,8 @@ INTERNAL {
 	mlx5_common_verbs_reg_mr;
 	mlx5_common_verbs_dereg_mr;
 
+	mlx5_cpu_is_haswell_broadwell;
+
 	mlx5_create_mr_ext;
 
 	mlx5_dev_to_pci_addr;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 8b5b6838fa..f1109ae095 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4201,7 +4201,7 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
 	mkey_attr.klm_num = 0;
 	if (priv->config.hca_attr.relaxed_ordering_write &&
 		priv->config.hca_attr.relaxed_ordering_read  &&
-		!haswell_broadwell_cpu)
+		!mlx5_cpu_is_haswell_broadwell())
 		mkey_attr.relaxed_ordering = 1;
 	mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
 	if (!mem_mng->dm) {
-- 
2.27.0
^ permalink raw reply	[flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
  2020-07-19 10:07 [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering Thomas Monjalon
@ 2020-07-19 10:11 ` Slava Ovsiienko
  2020-07-19 10:56 ` Matan Azrad
  2020-07-24 14:53 ` David Marchand
  2 siblings, 0 replies; 10+ messages in thread
From: Slava Ovsiienko @ 2020-07-19 10:11 UTC (permalink / raw)
  To: Thomas Monjalon, dev
  Cc: Raslan Darawsheh, Shiri Kuzin, stable, Matan Azrad,
	Shahaf Shuler, Ray Kinsella, Neil Horman
> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Sunday, July 19, 2020 13:08
> To: dev@dpdk.org
> Cc: Raslan Darawsheh <rasland@mellanox.com>; Shiri Kuzin
> <shirik@mellanox.com>; stable@dpdk.org; Matan Azrad
> <matan@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>; Slava
> Ovsiienko <viacheslavo@mellanox.com>; Ray Kinsella <mdr@ashroe.eu>;
> Neil Horman <nhorman@tuxdriver.com>
> Subject: [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
> 
> The detection of the CPU was done in a constructor and shared in a global
> variable.
> 
> This variable may not be visible in the net PMD because it was not exported
> as part of the .map file.
> It is fixed by exporting a function, which is cleaner than a variable.
> 
> By checking the CPU only at the first call of the function, doing the check in a
> constructor becomes useless.
> Note: the priority of the constructor was probably irrelevant.
> 
> At the same time, the comments are reworded or dropped if useless.
> 
> Fixes: 4c204fe5e5d2 ("common/mlx5: disable relaxed ordering in unsuitable
> CPUs")
> Cc: shirik@mellanox.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
^ permalink raw reply	[flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
  2020-07-19 10:07 [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering Thomas Monjalon
  2020-07-19 10:11 ` Slava Ovsiienko
@ 2020-07-19 10:56 ` Matan Azrad
  2020-07-19 11:13   ` Thomas Monjalon
  2020-07-24 14:53 ` David Marchand
  2 siblings, 1 reply; 10+ messages in thread
From: Matan Azrad @ 2020-07-19 10:56 UTC (permalink / raw)
  To: Thomas Monjalon, dev
  Cc: Raslan Darawsheh, Shiri Kuzin, stable, Shahaf Shuler,
	Slava Ovsiienko, Ray Kinsella, Neil Horman
From: Thomas Monjalon
> The detection of the CPU was done in a constructor and shared in a global
> variable.
> 
> This variable may not be visible in the net PMD because it was not exported
> as part of the .map file.
Can you explain exactly when it is not visible?
> It is fixed by exporting a function, which is cleaner than a variable.
Can you explain why?
We have classic example - rte_eth_devices.
> By checking the CPU only at the first call of the function, doing the check in a
> constructor becomes useless.
Yes, but why not to do it in constructor? this variable is initialized only once and doesn't depend in any parameter.
> Note: the priority of the constructor was probably irrelevant.
> 
> At the same time, the comments are reworded or dropped if useless.
> 
> Fixes: 4c204fe5e5d2 ("common/mlx5: disable relaxed ordering in unsuitable
> CPUs")
> Cc: shirik@mellanox.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> ---
>  drivers/common/mlx5/linux/mlx5_common_verbs.c |  2 +-
>  drivers/common/mlx5/mlx5_common.c             | 53 ++++++++-----------
>  drivers/common/mlx5/mlx5_common.h             |  4 +-
>  .../common/mlx5/rte_common_mlx5_version.map   |  2 +
>  drivers/net/mlx5/mlx5_flow_dv.c               |  2 +-
>  5 files changed, 28 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/common/mlx5/linux/mlx5_common_verbs.c
> b/drivers/common/mlx5/linux/mlx5_common_verbs.c
> index a2fc7a36bd..31ac20fe09 100644
> --- a/drivers/common/mlx5/linux/mlx5_common_verbs.c
> +++ b/drivers/common/mlx5/linux/mlx5_common_verbs.c
> @@ -55,7 +55,7 @@ mlx5_common_verbs_reg_mr(void *pd, void *addr,
> size_t length,
>  	memset(pmd_mr, 0, sizeof(*pmd_mr));
>  	ibv_mr = mlx5_glue->reg_mr(pd, addr, length,
>  				   IBV_ACCESS_LOCAL_WRITE |
> -				   (haswell_broadwell_cpu ? 0 :
> +				   (mlx5_cpu_is_haswell_broadwell() ? 0 :
>  				   IBV_ACCESS_RELAXED_ORDERING));
>  	if (!ibv_mr)
>  		return -1;
> diff --git a/drivers/common/mlx5/mlx5_common.c
> b/drivers/common/mlx5/mlx5_common.c
> index 693e2c68c8..7232d5131d 100644
> --- a/drivers/common/mlx5/mlx5_common.c
> +++ b/drivers/common/mlx5/mlx5_common.c
> @@ -20,8 +20,6 @@ int mlx5_common_logtype;  const struct mlx5_glue
> *mlx5_glue;  #endif
> 
> -uint8_t haswell_broadwell_cpu;
> -
>  static int
>  mlx5_class_check_handler(__rte_unused const char *key, const char
> *value,
>  			 void *opaque)
> @@ -59,19 +57,8 @@ mlx5_class_get(struct rte_devargs *devargs)  }
> 
> 
> -/* In case this is an x86_64 intel processor to check if
> - * we should use relaxed ordering.
> - */
>  #ifdef RTE_ARCH_X86_64
> -/**
> - * This function returns processor identification and feature information
> - * into the registers.
> - *
> - * @param eax, ebx, ecx, edx
> - *		Pointers to the registers that will hold cpu information.
> - * @param level
> - *		The main category of information returned.
> - */
> +/* Processor identification and feature information filled in
> +registers. */
>  static inline void mlx5_cpu_id(unsigned int level,
>  				unsigned int *eax, unsigned int *ebx,
>  				unsigned int *ecx, unsigned int *edx) @@ -
> 97,17 +84,7 @@ RTE_INIT_PRIO(mlx5_glue_init, CLASS)
>  	mlx5_glue_constructor();
>  }
> 
> -/**
> - * This function is responsible of initializing the variable
> - *  haswell_broadwell_cpu by checking if the cpu is intel
> - *  and reading the data returned from mlx5_cpu_id().
> - *  since haswell and broadwell cpus don't have improved performance
> - *  when using relaxed ordering we want to check the cpu type before
> - *  before deciding whether to enable RO or not.
> - *  if the cpu is haswell or broadwell the variable will be set to 1
> - *  otherwise it will be 0.
> - */
> -RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
> +static bool mlx5_x86_is_haswell_broadwell(void)
>  {
>  #ifdef RTE_ARCH_X86_64
>  	unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; @@ -
> 125,8 +102,7 @@ RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
>  	vendor = ebx;
>  	max_level = eax;
>  	if (max_level < 1) {
> -		haswell_broadwell_cpu = 0;
> -		return;
> +		return false;
>  	}
>  	mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx);
>  	model = (eax >> 4) & 0x0f;
> @@ -140,18 +116,31 @@ RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu,
> LOG)
>  		if (brand_id == 0 && family == 0x6) {
>  			for (i = 0; i < RTE_DIM(broadwell_models); i++)
>  				if (model == broadwell_models[i]) {
> -					haswell_broadwell_cpu = 1;
> -					return;
> +					return true;
>  				}
>  			for (i = 0; i < RTE_DIM(haswell_models); i++)
>  				if (model == haswell_models[i]) {
> -					haswell_broadwell_cpu = 1;
> -					return;
> +					return true;
>  				}
>  		}
>  	}
>  #endif
> -	haswell_broadwell_cpu = 0;
> +	return false;
> +}
> +
> +/*
> + * Check if the CPU is Intel Haswell or Broadwell,
> + * because PCI relaxed ordering has no performance benefit with these
> CPUs.
> + */
> +bool mlx5_cpu_is_haswell_broadwell(void)
> +{
> +	static bool haswell_broadwell_cpu;
> +	static bool once = false;
> +
> +	if (once)
> +		return haswell_broadwell_cpu;
> +	once = true;
> +	return haswell_broadwell_cpu = mlx5_x86_is_haswell_broadwell();
>  }
> 
>  /**
> diff --git a/drivers/common/mlx5/mlx5_common.h
> b/drivers/common/mlx5/mlx5_common.h
> index 2851507058..d453e0b3d8 100644
> --- a/drivers/common/mlx5/mlx5_common.h
> +++ b/drivers/common/mlx5/mlx5_common.h
> @@ -243,6 +243,9 @@ struct mlx5_klm {
> 
>  LIST_HEAD(mlx5_dbr_page_list, mlx5_devx_dbr_page);
> 
> +__rte_internal
> +bool mlx5_cpu_is_haswell_broadwell(void);
> +
>  __rte_internal
>  enum mlx5_class mlx5_class_get(struct rte_devargs *devargs);
> __rte_internal @@ -255,6 +258,5 @@ int64_t mlx5_get_dbr(void *ctx,  struct
> mlx5_dbr_page_list *head,  __rte_internal  int32_t mlx5_release_dbr(struct
> mlx5_dbr_page_list *head, uint32_t umem_id,
>  			 uint64_t offset);
> -extern uint8_t haswell_broadwell_cpu;
> 
>  #endif /* RTE_PMD_MLX5_COMMON_H_ */
> diff --git a/drivers/common/mlx5/rte_common_mlx5_version.map
> b/drivers/common/mlx5/rte_common_mlx5_version.map
> index ae57ebdba5..501b9fff3b 100644
> --- a/drivers/common/mlx5/rte_common_mlx5_version.map
> +++ b/drivers/common/mlx5/rte_common_mlx5_version.map
> @@ -6,6 +6,8 @@ INTERNAL {
>  	mlx5_common_verbs_reg_mr;
>  	mlx5_common_verbs_dereg_mr;
> 
> +	mlx5_cpu_is_haswell_broadwell;
> +
>  	mlx5_create_mr_ext;
> 
>  	mlx5_dev_to_pci_addr;
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c index 8b5b6838fa..f1109ae095 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -4201,7 +4201,7 @@ flow_dv_create_counter_stat_mem_mng(struct
> rte_eth_dev *dev, int raws_n)
>  	mkey_attr.klm_num = 0;
>  	if (priv->config.hca_attr.relaxed_ordering_write &&
>  		priv->config.hca_attr.relaxed_ordering_read  &&
> -		!haswell_broadwell_cpu)
> +		!mlx5_cpu_is_haswell_broadwell())
>  		mkey_attr.relaxed_ordering = 1;
>  	mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx,
> &mkey_attr);
>  	if (!mem_mng->dm) {
> --
> 2.27.0
^ permalink raw reply	[flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
  2020-07-19 10:56 ` Matan Azrad
@ 2020-07-19 11:13   ` Thomas Monjalon
  2020-07-19 11:41     ` Matan Azrad
  0 siblings, 1 reply; 10+ messages in thread
From: Thomas Monjalon @ 2020-07-19 11:13 UTC (permalink / raw)
  To: Matan Azrad
  Cc: dev, Raslan Darawsheh, Shiri Kuzin, stable, Shahaf Shuler,
	Slava Ovsiienko, Ray Kinsella, Neil Horman
19/07/2020 12:56, Matan Azrad:
> 
> From: Thomas Monjalon
> > The detection of the CPU was done in a constructor and shared in a global
> > variable.
> > 
> > This variable may not be visible in the net PMD because it was not exported
> > as part of the .map file.
> 
> Can you explain exactly when it is not visible?
I depends on linker options.
> > It is fixed by exporting a function, which is cleaner than a variable.
> 
> Can you explain why?
> We have classic example - rte_eth_devices.
There is more control and more abstraction in functions,
it can provide futre-proof abstraction.
We should not export variables at all,
it is a basic rule of writing API.
Having a bad example in ethdev doesn't mean we should follow it.
> > By checking the CPU only at the first call of the function, doing the check in a
> > constructor becomes useless.
> 
> Yes, but why not to do it in constructor? this variable is initialized only once and doesn't depend in any parameter.
Constructor must remain minimal.
If constructor can be avoided, it must be.
This is a golden rule.
> > Note: the priority of the constructor was probably irrelevant.
No comment about the constructor priority which was set as LOG
for no good reason, proving that this code was not well reviewed?
> > At the same time, the comments are reworded or dropped if useless.
> > 
> > Fixes: 4c204fe5e5d2 ("common/mlx5: disable relaxed ordering in unsuitable
> > CPUs")
> > Cc: shirik@mellanox.com
> > Cc: stable@dpdk.org
> > 
> > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
^ permalink raw reply	[flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
  2020-07-19 11:13   ` Thomas Monjalon
@ 2020-07-19 11:41     ` Matan Azrad
  2020-07-19 13:33       ` Thomas Monjalon
  0 siblings, 1 reply; 10+ messages in thread
From: Matan Azrad @ 2020-07-19 11:41 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Raslan Darawsheh, Shiri Kuzin, stable, Shahaf Shuler,
	Slava Ovsiienko, Ray Kinsella, Neil Horman
From: Thomas Monjalon:
> 19/07/2020 12:56, Matan Azrad:
> >
> > From: Thomas Monjalon
> > > The detection of the CPU was done in a constructor and shared in a
> > > global variable.
> > >
> > > This variable may not be visible in the net PMD because it was not
> > > exported as part of the .map file.
> >
> > Can you explain exactly when it is not visible?
> 
> I depends on linker options.
> 
> > > It is fixed by exporting a function, which is cleaner than a variable.
> >
> > Can you explain why?
> > We have classic example - rte_eth_devices.
> 
> There is more control and more abstraction in functions, it can provide futre-
> proof abstraction.
Also variable have more abstraction - struct.
In future, if it will be needed, we can change it.
> We should not export variables at all,
> it is a basic rule of writing API.
It is variable which is depended only in the running CPU - almost like compile time condition,
so it is not regular case.
I think it makes sense also to use a singleton variable as internal API.
> Having a bad example in ethdev doesn't mean we should follow it.
If ethdev rte_eth_devices is bad API, Are you going to change it?
> > > By checking the CPU only at the first call of the function, doing
> > > the check in a constructor becomes useless.
> >
> > Yes, but why not to do it in constructor? this variable is initialized only once
> and doesn't depend in any parameter.
> 
> Constructor must remain minimal.
> If constructor can be avoided, it must be.
> This is a golden rule.
The cpu detection is a fast code.
Using constructor here makes sense:
1. we need only one initialization for all the program.
2. no need to take care of multithreading on the single initialization (are your code thread safe?).
3. no parameters are required.
> 
> > > Note: the priority of the constructor was probably irrelevant.
> 
> No comment about the constructor priority which was set as LOG for no good
> reason, proving that this code was not well reviewed?
I guess  you mean that comment is missing - you right.
We want to be sure that the variable is ready before any usage of it in the drivers (even in driver contractors).
> 
> > > At the same time, the comments are reworded or dropped if useless.
> > >
> > > Fixes: 4c204fe5e5d2 ("common/mlx5: disable relaxed ordering in
> > > unsuitable
> > > CPUs")
> > > Cc: shirik@mellanox.com
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> 
> 
^ permalink raw reply	[flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
  2020-07-19 11:41     ` Matan Azrad
@ 2020-07-19 13:33       ` Thomas Monjalon
  2020-07-24 15:43         ` Matan Azrad
  0 siblings, 1 reply; 10+ messages in thread
From: Thomas Monjalon @ 2020-07-19 13:33 UTC (permalink / raw)
  To: Matan Azrad
  Cc: dev, Raslan Darawsheh, Shiri Kuzin, stable, Shahaf Shuler,
	Slava Ovsiienko, Ray Kinsella, Neil Horman
19/07/2020 13:41, Matan Azrad:
> 
> From: Thomas Monjalon:
> > 19/07/2020 12:56, Matan Azrad:
> > >
> > > From: Thomas Monjalon
> > > > The detection of the CPU was done in a constructor and shared in a
> > > > global variable.
> > > >
> > > > This variable may not be visible in the net PMD because it was not
> > > > exported as part of the .map file.
> > >
> > > Can you explain exactly when it is not visible?
> > 
> > I depends on linker options.
> > 
> > > > It is fixed by exporting a function, which is cleaner than a variable.
> > >
> > > Can you explain why?
> > > We have classic example - rte_eth_devices.
> > 
> > There is more control and more abstraction in functions, it can provide futre-
> > proof abstraction.
> 
> Also variable have more abstraction - struct.
> In future, if it will be needed, we can change it.
> 
> > We should not export variables at all,
> > it is a basic rule of writing API.
> 
> It is variable which is depended only in the running CPU - almost like compile time condition,
> so it is not regular case.
> I think it makes sense also to use a singleton variable as internal API.
> 
> > Having a bad example in ethdev doesn't mean we should follow it.
> 
> If ethdev rte_eth_devices is bad API, Are you going to change it?
No, we avoid changing API.
> > > > By checking the CPU only at the first call of the function, doing
> > > > the check in a constructor becomes useless.
> > >
> > > Yes, but why not to do it in constructor? this variable is initialized only once
> > and doesn't depend in any parameter.
> > 
> > Constructor must remain minimal.
> > If constructor can be avoided, it must be.
> > This is a golden rule.
> 
> The cpu detection is a fast code.
> 
> Using constructor here makes sense:
> 1. we need only one initialization for all the program.
> 2. no need to take care of multithreading on the single initialization (are your code thread safe?).
I don't see what could be the issue.
> 3. no parameters are required.
> 
> > > > Note: the priority of the constructor was probably irrelevant.
> > 
> > No comment about the constructor priority which was set as LOG for no good
> > reason, proving that this code was not well reviewed?
> 
> I guess  you mean that comment is missing - you right.
No I mean this constructor is declared with LOG priority,
but it is not doing any log registration.
> We want to be sure that the variable is ready before any usage of it in the drivers (even in driver contractors).
It is not used by other constructors.
And avoiding constructor dependencies is exactly why we avoid using constructors at all.
> > > > At the same time, the comments are reworded or dropped if useless.
> > > >
> > > > Fixes: 4c204fe5e5d2 ("common/mlx5: disable relaxed ordering in
> > > > unsuitable
> > > > CPUs")
> > > > Cc: shirik@mellanox.com
> > > > Cc: stable@dpdk.org
> > > >
> > > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
^ permalink raw reply	[flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
  2020-07-19 10:07 [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering Thomas Monjalon
  2020-07-19 10:11 ` Slava Ovsiienko
  2020-07-19 10:56 ` Matan Azrad
@ 2020-07-24 14:53 ` David Marchand
  2 siblings, 0 replies; 10+ messages in thread
From: David Marchand @ 2020-07-24 14:53 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Raslan, shirik, dpdk stable, Matan Azrad, Shahaf Shuler,
	Viacheslav Ovsiienko, Ray Kinsella, Neil Horman
On Sun, Jul 19, 2020 at 12:09 PM Thomas Monjalon <thomas@monjalon.net> wrote:
>
> The detection of the CPU was done in a constructor and shared
> in a global variable.
>
> This variable may not be visible in the net PMD
> because it was not exported as part of the .map file.
> It is fixed by exporting a function, which is cleaner than a variable.
>
> By checking the CPU only at the first call of the function,
> doing the check in a constructor becomes useless.
> Note: the priority of the constructor was probably irrelevant.
>
> At the same time, the comments are reworded or dropped if useless.
>
> Fixes: 4c204fe5e5d2 ("common/mlx5: disable relaxed ordering in unsuitable CPUs")
> Cc: shirik@mellanox.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> ---
>  drivers/common/mlx5/linux/mlx5_common_verbs.c |  2 +-
>  drivers/common/mlx5/mlx5_common.c             | 53 ++++++++-----------
>  drivers/common/mlx5/mlx5_common.h             |  4 +-
>  .../common/mlx5/rte_common_mlx5_version.map   |  2 +
>  drivers/net/mlx5/mlx5_flow_dv.c               |  2 +-
>  5 files changed, 28 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/common/mlx5/linux/mlx5_common_verbs.c b/drivers/common/mlx5/linux/mlx5_common_verbs.c
> index a2fc7a36bd..31ac20fe09 100644
> --- a/drivers/common/mlx5/linux/mlx5_common_verbs.c
> +++ b/drivers/common/mlx5/linux/mlx5_common_verbs.c
> @@ -55,7 +55,7 @@ mlx5_common_verbs_reg_mr(void *pd, void *addr, size_t length,
>         memset(pmd_mr, 0, sizeof(*pmd_mr));
>         ibv_mr = mlx5_glue->reg_mr(pd, addr, length,
>                                    IBV_ACCESS_LOCAL_WRITE |
> -                                  (haswell_broadwell_cpu ? 0 :
> +                                  (mlx5_cpu_is_haswell_broadwell() ? 0 :
>                                    IBV_ACCESS_RELAXED_ORDERING));
>         if (!ibv_mr)
>                 return -1;
> diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c
> index 693e2c68c8..7232d5131d 100644
> --- a/drivers/common/mlx5/mlx5_common.c
> +++ b/drivers/common/mlx5/mlx5_common.c
> @@ -20,8 +20,6 @@ int mlx5_common_logtype;
>  const struct mlx5_glue *mlx5_glue;
>  #endif
>
> -uint8_t haswell_broadwell_cpu;
> -
>  static int
>  mlx5_class_check_handler(__rte_unused const char *key, const char *value,
>                          void *opaque)
> @@ -59,19 +57,8 @@ mlx5_class_get(struct rte_devargs *devargs)
>  }
>
>
> -/* In case this is an x86_64 intel processor to check if
> - * we should use relaxed ordering.
> - */
>  #ifdef RTE_ARCH_X86_64
> -/**
> - * This function returns processor identification and feature information
> - * into the registers.
> - *
> - * @param eax, ebx, ecx, edx
> - *             Pointers to the registers that will hold cpu information.
> - * @param level
> - *             The main category of information returned.
> - */
> +/* Processor identification and feature information filled in registers. */
Nit, no need for inline.
>  static inline void mlx5_cpu_id(unsigned int level,
>                                 unsigned int *eax, unsigned int *ebx,
>                                 unsigned int *ecx, unsigned int *edx)
> @@ -97,17 +84,7 @@ RTE_INIT_PRIO(mlx5_glue_init, CLASS)
>         mlx5_glue_constructor();
>  }
>
> -/**
> - * This function is responsible of initializing the variable
> - *  haswell_broadwell_cpu by checking if the cpu is intel
> - *  and reading the data returned from mlx5_cpu_id().
> - *  since haswell and broadwell cpus don't have improved performance
> - *  when using relaxed ordering we want to check the cpu type before
> - *  before deciding whether to enable RO or not.
> - *  if the cpu is haswell or broadwell the variable will be set to 1
> - *  otherwise it will be 0.
> - */
> -RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
> +static bool mlx5_x86_is_haswell_broadwell(void)
>  {
>  #ifdef RTE_ARCH_X86_64
>         unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56};
> @@ -125,8 +102,7 @@ RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
>         vendor = ebx;
>         max_level = eax;
>         if (max_level < 1) {
> -               haswell_broadwell_cpu = 0;
> -               return;
> +               return false;
>         }
>         mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx);
>         model = (eax >> 4) & 0x0f;
> @@ -140,18 +116,31 @@ RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
>                 if (brand_id == 0 && family == 0x6) {
>                         for (i = 0; i < RTE_DIM(broadwell_models); i++)
>                                 if (model == broadwell_models[i]) {
> -                                       haswell_broadwell_cpu = 1;
> -                                       return;
> +                                       return true;
>                                 }
>                         for (i = 0; i < RTE_DIM(haswell_models); i++)
>                                 if (model == haswell_models[i]) {
> -                                       haswell_broadwell_cpu = 1;
> -                                       return;
> +                                       return true;
>                                 }
>                 }
>         }
>  #endif
> -       haswell_broadwell_cpu = 0;
> +       return false;
> +}
> +
> +/*
> + * Check if the CPU is Intel Haswell or Broadwell,
> + * because PCI relaxed ordering has no performance benefit with these CPUs.
> + */
> +bool mlx5_cpu_is_haswell_broadwell(void)
> +{
> +       static bool haswell_broadwell_cpu;
> +       static bool once = false;
> +
> +       if (once)
> +               return haswell_broadwell_cpu;
> +       once = true;
> +       return haswell_broadwell_cpu = mlx5_x86_is_haswell_broadwell();
>  }
>
>  /**
> diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
> index 2851507058..d453e0b3d8 100644
> --- a/drivers/common/mlx5/mlx5_common.h
> +++ b/drivers/common/mlx5/mlx5_common.h
> @@ -243,6 +243,9 @@ struct mlx5_klm {
>
>  LIST_HEAD(mlx5_dbr_page_list, mlx5_devx_dbr_page);
>
> +__rte_internal
> +bool mlx5_cpu_is_haswell_broadwell(void);
> +
>  __rte_internal
>  enum mlx5_class mlx5_class_get(struct rte_devargs *devargs);
>  __rte_internal
> @@ -255,6 +258,5 @@ int64_t mlx5_get_dbr(void *ctx,  struct mlx5_dbr_page_list *head,
>  __rte_internal
>  int32_t mlx5_release_dbr(struct mlx5_dbr_page_list *head, uint32_t umem_id,
>                          uint64_t offset);
> -extern uint8_t haswell_broadwell_cpu;
>
>  #endif /* RTE_PMD_MLX5_COMMON_H_ */
> diff --git a/drivers/common/mlx5/rte_common_mlx5_version.map b/drivers/common/mlx5/rte_common_mlx5_version.map
> index ae57ebdba5..501b9fff3b 100644
> --- a/drivers/common/mlx5/rte_common_mlx5_version.map
> +++ b/drivers/common/mlx5/rte_common_mlx5_version.map
> @@ -6,6 +6,8 @@ INTERNAL {
>         mlx5_common_verbs_reg_mr;
>         mlx5_common_verbs_dereg_mr;
>
> +       mlx5_cpu_is_haswell_broadwell;
> +
>         mlx5_create_mr_ext;
>
>         mlx5_dev_to_pci_addr;
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
> index 8b5b6838fa..f1109ae095 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -4201,7 +4201,7 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
>         mkey_attr.klm_num = 0;
>         if (priv->config.hca_attr.relaxed_ordering_write &&
>                 priv->config.hca_attr.relaxed_ordering_read  &&
> -               !haswell_broadwell_cpu)
> +               !mlx5_cpu_is_haswell_broadwell())
>                 mkey_attr.relaxed_ordering = 1;
>         mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
>         if (!mem_mng->dm) {
> --
> 2.27.0
>
I ended up on this constructor hack while looking at the common code.
This patch makes sense.
Reviewed-by: David Marchand <david.marchand@redhat.com>
-- 
David Marchand
^ permalink raw reply	[flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
  2020-07-19 13:33       ` Thomas Monjalon
@ 2020-07-24 15:43         ` Matan Azrad
  2020-07-28 10:21           ` Thomas Monjalon
  0 siblings, 1 reply; 10+ messages in thread
From: Matan Azrad @ 2020-07-24 15:43 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Raslan Darawsheh, Shiri Kuzin, stable, Shahaf Shuler,
	Slava Ovsiienko, Ray Kinsella, Neil Horman
Hi Thomas
From: Thomas Monjalon:
> 19/07/2020 13:41, Matan Azrad:
> >
> > From: Thomas Monjalon:
> > > 19/07/2020 12:56, Matan Azrad:
> > > >
> > > > From: Thomas Monjalon
> > > > > The detection of the CPU was done in a constructor and shared in
> > > > > a global variable.
> > > > >
> > > > > This variable may not be visible in the net PMD because it was
> > > > > not exported as part of the .map file.
> > > >
> > > > Can you explain exactly when it is not visible?
> > >
> > > I depends on linker options.
It will be good to add here the compiling command which failed for you...
> > > > > It is fixed by exporting a function, which is cleaner than a variable.
> > > >
> > > > Can you explain why?
> > > > We have classic example - rte_eth_devices.
> > >
> > > There is more control and more abstraction in functions, it can
> > > provide futre- proof abstraction.
> >
> > Also variable have more abstraction - struct.
> > In future, if it will be needed, we can change it.
> >
> > > We should not export variables at all, it is a basic rule of writing
> > > API.
> > It is variable which is depended only in the running CPU - almost like
> > compile time condition, so it is not regular case.
> > I think it makes sense also to use a singleton variable as internal API.
> >
> > > Having a bad example in ethdev doesn't mean we should follow it.
> >
> > If ethdev rte_eth_devices is bad API, Are you going to change it?
> 
> No, we avoid changing API.
> 
It is internal API, I don't understand your concern...
> > > > > By checking the CPU only at the first call of the function,
> > > > > doing the check in a constructor becomes useless.
> > > >
> > > > Yes, but why not to do it in constructor? this variable is
> > > > initialized only once
> > > and doesn't depend in any parameter.
> > >
> > > Constructor must remain minimal.
> > > If constructor can be avoided, it must be.
> > > This is a golden rule.
> >
> > The cpu detection is a fast code.
> >
> > Using constructor here makes sense:
> > 1. we need only one initialization for all the program.
> > 2. no need to take care of multithreading on the single initialization (are
> your code thread safe?).
> 
> I don't see what could be the issue.
2 mlx5 devices running configuration from 2 different threads.
The first MR from each one of the devices can be created at the same time.
The ask for the relaxed ordering cpu can be happened at the same time.  
> > 3. no parameters are required.
> >
> > > > > Note: the priority of the constructor was probably irrelevant.
> > >
> > > No comment about the constructor priority which was set as LOG for
> > > no good reason, proving that this code was not well reviewed?
> >
> > I guess  you mean that comment is missing - you right.
> 
> No I mean this constructor is declared with LOG priority, but it is not doing
> any log registration.
>
I hope you understand the motivation for higher priority,
The LOG is just the one above.
 
> > We want to be sure that the variable is ready before any usage of it in the
> drivers (even in driver contractors).
> 
> It is not used by other constructors.
> And avoiding constructor dependencies is exactly why we avoid using
> constructors at all.
Yes, It is for future, because it makes sense the cpu detection query will be done at initialization time.
Now, when I understand the community relevant guys don't like priorities(also I didn't convinced on the reasons), I think you can call it from common init function because it is the first call of mlx5 constructors.
 
> > > > > At the same time, the comments are reworded or dropped if useless.
> > > > >
> > > > > Fixes: 4c204fe5e5d2 ("common/mlx5: disable relaxed ordering in
> > > > > unsuitable
> > > > > CPUs")
> > > > > Cc: shirik@mellanox.com
> > > > > Cc: stable@dpdk.org
> > > > >
> > > > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
 
 We need to fix the race issue introduced by this patch.
My favor is to call it from constructor.
In any case we should validate the patch on ARM and Intel cores...
The relaxes ordering original patch had a lot of testing before applying,
Need to be sure we don't break nothing.
Matan
^ permalink raw reply	[flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
  2020-07-24 15:43         ` Matan Azrad
@ 2020-07-28 10:21           ` Thomas Monjalon
  2020-07-28 10:38             ` Matan Azrad
  0 siblings, 1 reply; 10+ messages in thread
From: Thomas Monjalon @ 2020-07-28 10:21 UTC (permalink / raw)
  To: Matan Azrad
  Cc: dev, Raslan Darawsheh, Shiri Kuzin, stable, Shahaf Shuler,
	Slava Ovsiienko, Ray Kinsella, Neil Horman
24/07/2020 17:43, Matan Azrad:
> Hi Thomas
> 
> From: Thomas Monjalon:
> > 19/07/2020 13:41, Matan Azrad:
> > >
> > > From: Thomas Monjalon:
> > > > 19/07/2020 12:56, Matan Azrad:
> > > > >
> > > > > From: Thomas Monjalon
> > > > > > The detection of the CPU was done in a constructor and shared in
> > > > > > a global variable.
> > > > > >
> > > > > > This variable may not be visible in the net PMD because it was
> > > > > > not exported as part of the .map file.
> > > > >
> > > > > Can you explain exactly when it is not visible?
> > > >
> > > > I depends on linker options.
> 
> It will be good to add here the compiling command which failed for you...
> 
> > > > > > It is fixed by exporting a function, which is cleaner than a variable.
> > > > >
> > > > > Can you explain why?
> > > > > We have classic example - rte_eth_devices.
> > > >
> > > > There is more control and more abstraction in functions, it can
> > > > provide futre- proof abstraction.
> > >
> > > Also variable have more abstraction - struct.
> > > In future, if it will be needed, we can change it.
> > >
> > > > We should not export variables at all, it is a basic rule of writing
> > > > API.
> 
> > > It is variable which is depended only in the running CPU - almost like
> > > compile time condition, so it is not regular case.
> > > I think it makes sense also to use a singleton variable as internal API.
> > >
> > > > Having a bad example in ethdev doesn't mean we should follow it.
> > >
> > > If ethdev rte_eth_devices is bad API, Are you going to change it?
> > 
> > No, we avoid changing API.
> > 
> 
> It is internal API, I don't understand your concern...
> 
> > > > > > By checking the CPU only at the first call of the function,
> > > > > > doing the check in a constructor becomes useless.
> > > > >
> > > > > Yes, but why not to do it in constructor? this variable is
> > > > > initialized only once
> > > > and doesn't depend in any parameter.
> > > >
> > > > Constructor must remain minimal.
> > > > If constructor can be avoided, it must be.
> > > > This is a golden rule.
> > >
> > > The cpu detection is a fast code.
> > >
> > > Using constructor here makes sense:
> > > 1. we need only one initialization for all the program.
> > > 2. no need to take care of multithreading on the single initialization (are
> > your code thread safe?).
> > 
> > I don't see what could be the issue.
> 
> 2 mlx5 devices running configuration from 2 different threads.
> The first MR from each one of the devices can be created at the same time.
> The ask for the relaxed ordering cpu can be happened at the same time.  
DPDK configuration is not thread safe.
Do you know any DPDK application configuring devices
in 2 different threads?
> > > 3. no parameters are required.
> > >
> > > > > > Note: the priority of the constructor was probably irrelevant.
> > > >
> > > > No comment about the constructor priority which was set as LOG for
> > > > no good reason, proving that this code was not well reviewed?
> > >
> > > I guess  you mean that comment is missing - you right.
> > 
> > No I mean this constructor is declared with LOG priority, but it is not doing
> > any log registration.
> >
> I hope you understand the motivation for higher priority,
> The LOG is just the one above.
>  
> > > We want to be sure that the variable is ready before any usage of it in the
> > drivers (even in driver contractors).
> > 
> > It is not used by other constructors.
> > And avoiding constructor dependencies is exactly why we avoid using
> > constructors at all.
> 
> Yes, It is for future, because it makes sense the cpu detection query will be done at initialization time.
> 
> Now, when I understand the community relevant guys don't like priorities(also I didn't convinced on the reasons), I think you can call it from common init function because it is the first call of mlx5 constructors.
We want to avoid priorities, and more importantly,
we want to avoid having too much code in constructors.
>  We need to fix the race issue introduced by this patch.
> My favor is to call it from constructor.
Initialization and configuration is supposed to be done by a single thread.
There should not have any race condition.
^ permalink raw reply	[flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering
  2020-07-28 10:21           ` Thomas Monjalon
@ 2020-07-28 10:38             ` Matan Azrad
  0 siblings, 0 replies; 10+ messages in thread
From: Matan Azrad @ 2020-07-28 10:38 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Raslan Darawsheh, Shiri Kuzin, stable, Shahaf Shuler,
	Slava Ovsiienko, Ray Kinsella, Neil Horman
Hi Thomas
From: Thomas Monjalon:
> 24/07/2020 17:43, Matan Azrad:
> > Hi Thomas
> >
> > From: Thomas Monjalon:
> > > 19/07/2020 13:41, Matan Azrad:
> > > >
> > > > From: Thomas Monjalon:
> > > > > 19/07/2020 12:56, Matan Azrad:
> > > > > >
> > > > > > From: Thomas Monjalon
> > > > > > > The detection of the CPU was done in a constructor and
> > > > > > > shared in a global variable.
> > > > > > >
> > > > > > > This variable may not be visible in the net PMD because it
> > > > > > > was not exported as part of the .map file.
> > > > > >
> > > > > > Can you explain exactly when it is not visible?
> > > > >
> > > > > I depends on linker options.
> >
> > It will be good to add here the compiling command which failed for you...
?
> > > > > > > It is fixed by exporting a function, which is cleaner than a variable.
> > > > > >
> > > > > > Can you explain why?
> > > > > > We have classic example - rte_eth_devices.
> > > > >
> > > > > There is more control and more abstraction in functions, it can
> > > > > provide futre- proof abstraction.
> > > >
> > > > Also variable have more abstraction - struct.
> > > > In future, if it will be needed, we can change it.
> > > >
> > > > > We should not export variables at all, it is a basic rule of
> > > > > writing API.
> >
> > > > It is variable which is depended only in the running CPU - almost
> > > > like compile time condition, so it is not regular case.
> > > > I think it makes sense also to use a singleton variable as internal API.
> > > >
> > > > > Having a bad example in ethdev doesn't mean we should follow it.
> > > >
> > > > If ethdev rte_eth_devices is bad API, Are you going to change it?
> > >
> > > No, we avoid changing API.
> > >
> >
> > It is internal API, I don't understand your concern...
> >
> > > > > > > By checking the CPU only at the first call of the function,
> > > > > > > doing the check in a constructor becomes useless.
> > > > > >
> > > > > > Yes, but why not to do it in constructor? this variable is
> > > > > > initialized only once
> > > > > and doesn't depend in any parameter.
> > > > >
> > > > > Constructor must remain minimal.
> > > > > If constructor can be avoided, it must be.
> > > > > This is a golden rule.
> > > >
> > > > The cpu detection is a fast code.
> > > >
> > > > Using constructor here makes sense:
> > > > 1. we need only one initialization for all the program.
> > > > 2. no need to take care of multithreading on the single
> > > > initialization (are
> > > your code thread safe?).
> > >
> > > I don't see what could be the issue.
> >
> > 2 mlx5 devices running configuration from 2 different threads.
> > The first MR from each one of the devices can be created at the same time.
> > The ask for the relaxed ordering cpu can be happened at the same time.
> 
> DPDK configuration is not thread safe.
> Do you know any DPDK application configuring devices in 2 different threads?
A lot of effort were done in our driver and in other areas in dpdk in order to protect this scenario, we need to follow.
One case is failsafe, configuration can be done from either host thread or the caller thread.
Also, we may configure MR from data-path cores.
 
> > > > 3. no parameters are required.
> > > >
> > > > > > > Note: the priority of the constructor was probably irrelevant.
> > > > >
> > > > > No comment about the constructor priority which was set as LOG
> > > > > for no good reason, proving that this code was not well reviewed?
> > > >
> > > > I guess  you mean that comment is missing - you right.
> > >
> > > No I mean this constructor is declared with LOG priority, but it is
> > > not doing any log registration.
> > >
> > I hope you understand the motivation for higher priority, The LOG is
> > just the one above.
> >
> > > > We want to be sure that the variable is ready before any usage of
> > > > it in the
> > > drivers (even in driver contractors).
> > >
> > > It is not used by other constructors.
> > > And avoiding constructor dependencies is exactly why we avoid using
> > > constructors at all.
> >
> > Yes, It is for future, because it makes sense the cpu detection query will be
> done at initialization time.
> >
> > Now, when I understand the community relevant guys don't like
> priorities(also I didn't convinced on the reasons), I think you can call it from
> common init function because it is the first call of mlx5 constructors.
> 
> We want to avoid priorities, and more importantly, we want to avoid having
> too much code in constructors.
The code here is not too much and as I said it makes sense to do this prior code in constructor.
 
> >  We need to fix the race issue introduced by this patch.
> > My favor is to call it from constructor.
> 
> Initialization and configuration is supposed to be done by a single thread.
> There should not have any race condition.
So, please explain all the dpdk effort to protect it....
^ permalink raw reply	[flat|nested] 10+ messages in thread
end of thread, other threads:[~2020-07-28 10:38 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-19 10:07 [dpdk-dev] [PATCH] common/mlx5: fix CPU detection for PCI relaxed ordering Thomas Monjalon
2020-07-19 10:11 ` Slava Ovsiienko
2020-07-19 10:56 ` Matan Azrad
2020-07-19 11:13   ` Thomas Monjalon
2020-07-19 11:41     ` Matan Azrad
2020-07-19 13:33       ` Thomas Monjalon
2020-07-24 15:43         ` Matan Azrad
2020-07-28 10:21           ` Thomas Monjalon
2020-07-28 10:38             ` Matan Azrad
2020-07-24 14:53 ` David Marchand
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).