* [PATCH 2/2] common/mlx5: update log format after devx_obj_create error
2022-06-08 11:58 [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Gregory Etelson
@ 2022-06-08 11:58 ` Gregory Etelson
2022-06-14 15:02 ` [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Raslan Darawsheh
2022-11-08 12:46 ` David Marchand
2 siblings, 0 replies; 5+ messages in thread
From: Gregory Etelson @ 2022-06-08 11:58 UTC (permalink / raw)
To: dev, getelson; +Cc: rasland, Matan Azrad, Viacheslav Ovsiienko
Application can fetch syndrome value after FW operation failure
starting from Mellanox OFED-5.6.
The patch updates log data after devx_obj_create error.
Signed-off-by: Gregory Etelson <getelson@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.c | 77 ++++++++++------------------
1 file changed, 26 insertions(+), 51 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index bc06aeccc7..d4220a863b 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -199,8 +199,7 @@ mlx5_devx_cmd_flow_counter_alloc(void *ctx, uint32_t bulk_n_128)
dcs->obj = mlx5_glue->devx_obj_create(ctx, in,
sizeof(in), out, sizeof(out));
if (!dcs->obj) {
- DRV_LOG(ERR, "Can't allocate counters - error %d", errno);
- rte_errno = errno;
+ mlx5_devx_err_log(out, "allocate counters", NULL, 0);
mlx5_free(dcs);
return NULL;
}
@@ -378,9 +377,9 @@ mlx5_devx_cmd_mkey_create(void *ctx,
mkey->obj = mlx5_glue->devx_obj_create(ctx, in, in_size_dw * 4, out,
sizeof(out));
if (!mkey->obj) {
- DRV_LOG(ERR, "Can't create %sdirect mkey - error %d",
- klm_num ? "an in" : "a ", errno);
- rte_errno = errno;
+ mlx5_devx_err_log(out,
+ klm_num ? "create indirect mkey" : "create direct key",
+ NULL, 0);
mlx5_free(mkey);
return NULL;
}
@@ -709,9 +708,7 @@ mlx5_devx_cmd_create_flex_parser(void *ctx,
parse_flex_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!parse_flex_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create FLEX PARSE GRAPH object "
- "by using DevX.");
+ mlx5_devx_err_log(out, "create FLEX PARSE GRAPH", NULL, 0);
mlx5_free(parse_flex_obj);
return NULL;
}
@@ -1283,8 +1280,7 @@ mlx5_devx_cmd_create_rq(void *ctx,
rq->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!rq->obj) {
- DRV_LOG(ERR, "Failed to create RQ using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create RQ", NULL, 0);
mlx5_free(rq);
return NULL;
}
@@ -1383,8 +1379,7 @@ mlx5_devx_cmd_create_rmp(void *ctx,
rmp->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!rmp->obj) {
- DRV_LOG(ERR, "Failed to create RMP using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create RMP", NULL, 0);
mlx5_free(rmp);
return NULL;
}
@@ -1452,8 +1447,7 @@ mlx5_devx_cmd_create_tir(void *ctx,
tir->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!tir->obj) {
- DRV_LOG(ERR, "Failed to create TIR using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create TIR", NULL, 0);
mlx5_free(tir);
return NULL;
}
@@ -1591,8 +1585,7 @@ mlx5_devx_cmd_create_rqt(void *ctx,
rqt->obj = mlx5_glue->devx_obj_create(ctx, in, inlen, out, sizeof(out));
mlx5_free(in);
if (!rqt->obj) {
- DRV_LOG(ERR, "Failed to create RQT using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create RQT", NULL, 0);
mlx5_free(rqt);
return NULL;
}
@@ -1706,8 +1699,7 @@ mlx5_devx_cmd_create_sq(void *ctx,
sq->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!sq->obj) {
- DRV_LOG(ERR, "Failed to create SQ using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create SQ", NULL, 0);
mlx5_free(sq);
return NULL;
}
@@ -1790,8 +1782,7 @@ mlx5_devx_cmd_create_tis(void *ctx,
tis->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!tis->obj) {
- DRV_LOG(ERR, "Failed to create TIS using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create TIS", NULL, 0);
mlx5_free(tis);
return NULL;
}
@@ -1825,8 +1816,7 @@ mlx5_devx_cmd_create_td(void *ctx)
td->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!td->obj) {
- DRV_LOG(ERR, "Failed to create TIS using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create TIS", NULL, 0);
mlx5_free(td);
return NULL;
}
@@ -1946,8 +1936,7 @@ mlx5_devx_cmd_create_cq(void *ctx, struct mlx5_devx_cq_attr *attr)
cq_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!cq_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create CQ using DevX errno=%d.", errno);
+ mlx5_devx_err_log(out, "create CQ", NULL, 0);
mlx5_free(cq_obj);
return NULL;
}
@@ -2023,8 +2012,7 @@ mlx5_devx_cmd_create_virtq(void *ctx,
virtq_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!virtq_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create VIRTQ Obj using DevX.");
+ mlx5_devx_err_log(out, "create VIRTQ", NULL, 0);
mlx5_free(virtq_obj);
return NULL;
}
@@ -2218,8 +2206,7 @@ mlx5_devx_cmd_create_qp(void *ctx,
qp_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!qp_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create QP Obj using DevX.");
+ mlx5_devx_err_log(out, "create QP", NULL, 0);
mlx5_free(qp_obj);
return NULL;
}
@@ -2333,9 +2320,8 @@ mlx5_devx_cmd_create_virtio_q_counters(void *ctx)
couners_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!couners_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create virtio queue counters Obj using"
- " DevX.");
+ mlx5_devx_err_log(out, "create virtio queue counters Obj",
+ NULL, 0);
mlx5_free(couners_obj);
return NULL;
}
@@ -2417,8 +2403,7 @@ mlx5_devx_cmd_create_flow_hit_aso_obj(void *ctx, uint32_t pd)
flow_hit_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!flow_hit_aso_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create FLOW_HIT_ASO obj using DevX.");
+ mlx5_devx_err_log(out, "create FLOW_HIT_ASO", NULL, 0);
mlx5_free(flow_hit_aso_obj);
return NULL;
}
@@ -2505,8 +2490,7 @@ mlx5_devx_cmd_create_flow_meter_aso_obj(void *ctx, uint32_t pd,
ctx, in, sizeof(in),
out, sizeof(out));
if (!flow_meter_aso_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create FLOW_METER_ASO obj using DevX.");
+ mlx5_devx_err_log(out, "create FLOW_METTER_ASO", NULL, 0);
mlx5_free(flow_meter_aso_obj);
return NULL;
}
@@ -2556,8 +2540,7 @@ mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx, uint32_t pd,
ct_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!ct_aso_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create CONN_TRACK_OFFLOAD obj by using DevX.");
+ mlx5_devx_err_log(out, "create CONN_TRACK_OFFLOAD", NULL, 0);
mlx5_free(ct_aso_obj);
return NULL;
}
@@ -2609,9 +2592,7 @@ mlx5_devx_cmd_create_geneve_tlv_option(void *ctx,
geneve_tlv_opt_obj->obj = mlx5_glue->devx_obj_create(ctx, in,
sizeof(in), out, sizeof(out));
if (!geneve_tlv_opt_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create Geneve tlv option "
- "Obj using DevX.");
+ mlx5_devx_err_log(out, "create GENEVE TLV", NULL, 0);
mlx5_free(geneve_tlv_opt_obj);
return NULL;
}
@@ -2673,9 +2654,7 @@ mlx5_devx_cmd_queue_counter_alloc(void *ctx)
dcs->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!dcs->obj) {
- DRV_LOG(DEBUG, "Can't allocate q counter set by DevX - error "
- "%d.", errno);
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create q counter set", NULL, 0);
mlx5_free(dcs);
return NULL;
}
@@ -2762,8 +2741,7 @@ mlx5_devx_cmd_create_dek_obj(void *ctx, struct mlx5_devx_dek_attr *attr)
dek_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (dek_obj->obj == NULL) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create DEK obj using DevX.");
+ mlx5_devx_err_log(out, "create DEK", NULL, 0);
mlx5_free(dek_obj);
return NULL;
}
@@ -2810,8 +2788,7 @@ mlx5_devx_cmd_create_import_kek_obj(void *ctx,
import_kek_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (import_kek_obj->obj == NULL) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create IMPORT_KEK object using DevX.");
+ mlx5_devx_err_log(out, "create IMPORT_KEK", NULL, 0);
mlx5_free(import_kek_obj);
return NULL;
}
@@ -2859,8 +2836,7 @@ mlx5_devx_cmd_create_credential_obj(void *ctx,
credential_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (credential_obj->obj == NULL) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create CREDENTIAL object using DevX.");
+ mlx5_devx_err_log(out, "create CREDENTIAL", NULL, 0);
mlx5_free(credential_obj);
return NULL;
}
@@ -2911,8 +2887,7 @@ mlx5_devx_cmd_create_crypto_login_obj(void *ctx,
crypto_login_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (crypto_login_obj->obj == NULL) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create CRYPTO_LOGIN obj using DevX.");
+ mlx5_devx_err_log(out, "create CRYPTO_LOGIN", NULL, 0);
mlx5_free(crypto_login_obj);
return NULL;
}
--
2.35.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error
2022-06-08 11:58 [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Gregory Etelson
2022-06-08 11:58 ` [PATCH 2/2] common/mlx5: update log format after devx_obj_create error Gregory Etelson
2022-06-14 15:02 ` [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Raslan Darawsheh
@ 2022-11-08 12:46 ` David Marchand
2022-11-08 16:05 ` Gregory Etelson
2 siblings, 1 reply; 5+ messages in thread
From: David Marchand @ 2022-11-08 12:46 UTC (permalink / raw)
To: Gregory Etelson, rasland
Cc: dev, Matan Azrad, Viacheslav Ovsiienko, Thomas Monjalon
On Wed, Jun 8, 2022 at 1:58 PM Gregory Etelson <getelson@nvidia.com> wrote:
>
> Application can fetch syndrome value after FW operation failure
> starting from Mellanox OFED-5.6.
> The patch updates log data issued after devx_general_cmd error.
>
> Signed-off-by: Gregory Etelson <getelson@nvidia.com>
> Acked-by: Matan Azrad <matan@nvidia.com>
> ---
> drivers/common/mlx5/mlx5_devx_cmds.c | 103 ++++++++++++---------------
> 1 file changed, 44 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
> index c6bdbc12bb..bc06aeccc7 100644
> --- a/drivers/common/mlx5/mlx5_devx_cmds.c
> +++ b/drivers/common/mlx5/mlx5_devx_cmds.c
> @@ -13,39 +13,49 @@
> #include "mlx5_common_log.h"
> #include "mlx5_malloc.h"
>
> +/* FW writes status value to the OUT buffer at offset 00H */
> +#define MLX5_FW_STATUS(o) MLX5_GET(general_obj_out_cmd_hdr, (o), status)
> +/* FW writes syndrome value to the OUT buffer at offset 04H */
> +#define MLX5_FW_SYNDROME(o) MLX5_GET(general_obj_out_cmd_hdr, (o), syndrome)
> +
> +#define MLX5_DEVX_ERR_RC(x) ((x) > 0 ? -(x) : ((x) < 0 ? (x) : -1))
> +
> +static void
> +mlx5_devx_err_log(void *out, const char *reason,
> + const char *param, uint32_t value)
> +{
> + rte_errno = errno;
> + if (!param)
> + DRV_LOG(ERR, "DevX %s failed errno=%d status=%#x syndrome=%#x",
> + reason, errno, MLX5_FW_STATUS(out),
> + MLX5_FW_SYNDROME(out));
> + else
> + DRV_LOG(ERR, "DevX %s %s=%#X failed errno=%d status=%#x syndrome=%#x",
> + reason, param, value, errno, MLX5_FW_STATUS(out),
> + MLX5_FW_SYNDROME(out));
> +}
> +
> static void *
> mlx5_devx_get_hca_cap(void *ctx, uint32_t *in, uint32_t *out,
> int *err, uint32_t flags)
> {
> const size_t size_in = MLX5_ST_SZ_DW(query_hca_cap_in) * sizeof(int);
> const size_t size_out = MLX5_ST_SZ_DW(query_hca_cap_out) * sizeof(int);
> - int status, syndrome, rc;
> + int rc;
>
> - if (err)
> - *err = 0;
> memset(in, 0, size_in);
> memset(out, 0, size_out);
> MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
> MLX5_SET(query_hca_cap_in, in, op_mod, flags);
> rc = mlx5_glue->devx_general_cmd(ctx, in, size_in, out, size_out);
> - if (rc) {
> - DRV_LOG(ERR,
> - "Failed to query devx HCA capabilities func %#02x",
> - flags >> 1);
> + if (rc || MLX5_FW_STATUS(out)) {
> + mlx5_devx_err_log(out, "HCA capabilities", "func", flags >> 1);
> if (err)
> - *err = rc > 0 ? -rc : rc;
> - return NULL;
> - }
> - status = MLX5_GET(query_hca_cap_out, out, status);
> - syndrome = MLX5_GET(query_hca_cap_out, out, syndrome);
> - if (status) {
> - DRV_LOG(ERR,
> - "Failed to query devx HCA capabilities func %#02x status %x, syndrome = %x",
> - flags >> 1, status, syndrome);
> - if (err)
> - *err = -1;
> + *err = MLX5_DEVX_ERR_RC(rc);
> return NULL;
> }
> + if (err)
> + *err = 0;
> return MLX5_ADDR_OF(query_hca_cap_out, out, capability);
> }
>
> @@ -74,7 +84,7 @@ mlx5_devx_cmd_register_read(void *ctx, uint16_t reg_id, uint32_t arg,
> uint32_t in[MLX5_ST_SZ_DW(access_register_in)] = {0};
> uint32_t out[MLX5_ST_SZ_DW(access_register_out) +
> MLX5_ACCESS_REGISTER_DATA_DWORD_MAX] = {0};
> - int status, rc;
> + int rc;
>
> MLX5_ASSERT(data && dw_cnt);
> MLX5_ASSERT(dw_cnt <= MLX5_ACCESS_REGISTER_DATA_DWORD_MAX);
> @@ -91,23 +101,13 @@ mlx5_devx_cmd_register_read(void *ctx, uint16_t reg_id, uint32_t arg,
> rc = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), out,
> MLX5_ST_SZ_BYTES(access_register_out) +
> sizeof(uint32_t) * dw_cnt);
> - if (rc)
> - goto error;
> - status = MLX5_GET(access_register_out, out, status);
> - if (status) {
> - int syndrome = MLX5_GET(access_register_out, out, syndrome);
> -
> - DRV_LOG(DEBUG, "Failed to read access NIC register 0x%X, "
> - "status %x, syndrome = %x",
> - reg_id, status, syndrome);
> - return -1;
> + if (rc || MLX5_FW_STATUS(out)) {
> + mlx5_devx_err_log(out, "read access", "NIC register", reg_id);
> + return MLX5_DEVX_ERR_RC(rc);
I went back in the mailing list archive to find this change.
This patch makes the pmd spat some scary log messages...
I get this on a system with a CX5 nic (OVS dpdk-latest + 22.11-rc2) :
2022-11-08T12:26:09.218Z|00027|dpdk|ERR|mlx5_common: DevX read access
NIC register=0X9055 failed errno=22 status=0 syndrome=0
Should I be scared?
The system runs fine and nothing else changed in my setup (afaics), so
I'd expect it continues working..
This specific change of level on the "read access" check breaks OVS
system dpdk unit tests, as those tests fail on ERR level logs.
If this is harmless, please send a patch to revert to debug level (and
for other log messages that were updated in this patch if it was
unneeded).
Thanks.
--
David Marchand
^ permalink raw reply [flat|nested] 5+ messages in thread