* [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error
@ 2022-06-08 11:58 Gregory Etelson
2022-06-08 11:58 ` [PATCH 2/2] common/mlx5: update log format after devx_obj_create error Gregory Etelson
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Gregory Etelson @ 2022-06-08 11:58 UTC (permalink / raw)
To: dev, getelson; +Cc: rasland, Matan Azrad, Viacheslav Ovsiienko
Application can fetch syndrome value after FW operation failure
starting from Mellanox OFED-5.6.
The patch updates log data issued after devx_general_cmd error.
Signed-off-by: Gregory Etelson <getelson@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.c | 103 ++++++++++++---------------
1 file changed, 44 insertions(+), 59 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index c6bdbc12bb..bc06aeccc7 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -13,39 +13,49 @@
#include "mlx5_common_log.h"
#include "mlx5_malloc.h"
+/* FW writes status value to the OUT buffer at offset 00H */
+#define MLX5_FW_STATUS(o) MLX5_GET(general_obj_out_cmd_hdr, (o), status)
+/* FW writes syndrome value to the OUT buffer at offset 04H */
+#define MLX5_FW_SYNDROME(o) MLX5_GET(general_obj_out_cmd_hdr, (o), syndrome)
+
+#define MLX5_DEVX_ERR_RC(x) ((x) > 0 ? -(x) : ((x) < 0 ? (x) : -1))
+
+static void
+mlx5_devx_err_log(void *out, const char *reason,
+ const char *param, uint32_t value)
+{
+ rte_errno = errno;
+ if (!param)
+ DRV_LOG(ERR, "DevX %s failed errno=%d status=%#x syndrome=%#x",
+ reason, errno, MLX5_FW_STATUS(out),
+ MLX5_FW_SYNDROME(out));
+ else
+ DRV_LOG(ERR, "DevX %s %s=%#X failed errno=%d status=%#x syndrome=%#x",
+ reason, param, value, errno, MLX5_FW_STATUS(out),
+ MLX5_FW_SYNDROME(out));
+}
+
static void *
mlx5_devx_get_hca_cap(void *ctx, uint32_t *in, uint32_t *out,
int *err, uint32_t flags)
{
const size_t size_in = MLX5_ST_SZ_DW(query_hca_cap_in) * sizeof(int);
const size_t size_out = MLX5_ST_SZ_DW(query_hca_cap_out) * sizeof(int);
- int status, syndrome, rc;
+ int rc;
- if (err)
- *err = 0;
memset(in, 0, size_in);
memset(out, 0, size_out);
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, op_mod, flags);
rc = mlx5_glue->devx_general_cmd(ctx, in, size_in, out, size_out);
- if (rc) {
- DRV_LOG(ERR,
- "Failed to query devx HCA capabilities func %#02x",
- flags >> 1);
+ if (rc || MLX5_FW_STATUS(out)) {
+ mlx5_devx_err_log(out, "HCA capabilities", "func", flags >> 1);
if (err)
- *err = rc > 0 ? -rc : rc;
- return NULL;
- }
- status = MLX5_GET(query_hca_cap_out, out, status);
- syndrome = MLX5_GET(query_hca_cap_out, out, syndrome);
- if (status) {
- DRV_LOG(ERR,
- "Failed to query devx HCA capabilities func %#02x status %x, syndrome = %x",
- flags >> 1, status, syndrome);
- if (err)
- *err = -1;
+ *err = MLX5_DEVX_ERR_RC(rc);
return NULL;
}
+ if (err)
+ *err = 0;
return MLX5_ADDR_OF(query_hca_cap_out, out, capability);
}
@@ -74,7 +84,7 @@ mlx5_devx_cmd_register_read(void *ctx, uint16_t reg_id, uint32_t arg,
uint32_t in[MLX5_ST_SZ_DW(access_register_in)] = {0};
uint32_t out[MLX5_ST_SZ_DW(access_register_out) +
MLX5_ACCESS_REGISTER_DATA_DWORD_MAX] = {0};
- int status, rc;
+ int rc;
MLX5_ASSERT(data && dw_cnt);
MLX5_ASSERT(dw_cnt <= MLX5_ACCESS_REGISTER_DATA_DWORD_MAX);
@@ -91,23 +101,13 @@ mlx5_devx_cmd_register_read(void *ctx, uint16_t reg_id, uint32_t arg,
rc = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), out,
MLX5_ST_SZ_BYTES(access_register_out) +
sizeof(uint32_t) * dw_cnt);
- if (rc)
- goto error;
- status = MLX5_GET(access_register_out, out, status);
- if (status) {
- int syndrome = MLX5_GET(access_register_out, out, syndrome);
-
- DRV_LOG(DEBUG, "Failed to read access NIC register 0x%X, "
- "status %x, syndrome = %x",
- reg_id, status, syndrome);
- return -1;
+ if (rc || MLX5_FW_STATUS(out)) {
+ mlx5_devx_err_log(out, "read access", "NIC register", reg_id);
+ return MLX5_DEVX_ERR_RC(rc);
}
memcpy(data, &out[MLX5_ST_SZ_DW(access_register_out)],
dw_cnt * sizeof(uint32_t));
return 0;
-error:
- rc = (rc > 0) ? -rc : rc;
- return rc;
}
/**
@@ -134,7 +134,7 @@ mlx5_devx_cmd_register_write(void *ctx, uint16_t reg_id, uint32_t arg,
uint32_t in[MLX5_ST_SZ_DW(access_register_in) +
MLX5_ACCESS_REGISTER_DATA_DWORD_MAX] = {0};
uint32_t out[MLX5_ST_SZ_DW(access_register_out)] = {0};
- int status, rc;
+ int rc;
void *ptr;
MLX5_ASSERT(data && dw_cnt);
@@ -152,26 +152,19 @@ mlx5_devx_cmd_register_write(void *ctx, uint16_t reg_id, uint32_t arg,
ptr = MLX5_ADDR_OF(access_register_in, in, register_data);
memcpy(ptr, data, dw_cnt * sizeof(uint32_t));
rc = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), out, sizeof(out));
-
+ if (rc || MLX5_FW_STATUS(out)) {
+ mlx5_devx_err_log(out, "write access", "NIC register", reg_id);
+ return MLX5_DEVX_ERR_RC(rc);
+ }
rc = mlx5_glue->devx_general_cmd(ctx, in,
MLX5_ST_SZ_BYTES(access_register_in) +
dw_cnt * sizeof(uint32_t),
out, sizeof(out));
- if (rc)
- goto error;
- status = MLX5_GET(access_register_out, out, status);
- if (status) {
- int syndrome = MLX5_GET(access_register_out, out, syndrome);
-
- DRV_LOG(DEBUG, "Failed to write access NIC register 0x%X, "
- "status %x, syndrome = %x",
- reg_id, status, syndrome);
- return -1;
+ if (rc || MLX5_FW_STATUS(out)) {
+ mlx5_devx_err_log(out, "write access", "NIC register", reg_id);
+ return MLX5_DEVX_ERR_RC(rc);
}
return 0;
-error:
- rc = (rc > 0) ? -rc : rc;
- return rc;
}
/**
@@ -466,7 +459,7 @@ mlx5_devx_cmd_query_nic_vport_context(void *ctx,
uint32_t in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
uint32_t out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
void *vctx;
- int status, syndrome, rc;
+ int rc;
/* Query NIC vport context to determine inline mode. */
MLX5_SET(query_nic_vport_context_in, in, opcode,
@@ -477,23 +470,15 @@ mlx5_devx_cmd_query_nic_vport_context(void *ctx,
rc = mlx5_glue->devx_general_cmd(ctx,
in, sizeof(in),
out, sizeof(out));
- if (rc)
- goto error;
- status = MLX5_GET(query_nic_vport_context_out, out, status);
- syndrome = MLX5_GET(query_nic_vport_context_out, out, syndrome);
- if (status) {
- DRV_LOG(DEBUG, "Failed to query NIC vport context, "
- "status %x, syndrome = %x", status, syndrome);
- return -1;
+ if (rc || MLX5_FW_STATUS(out)) {
+ mlx5_devx_err_log(out, "query NIC vport context", NULL, 0);
+ return MLX5_DEVX_ERR_RC(rc);
}
vctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
nic_vport_context);
attr->vport_inline_mode = MLX5_GET(nic_vport_context, vctx,
min_wqe_inline_mode);
return 0;
-error:
- rc = (rc > 0) ? -rc : rc;
- return rc;
}
/**
--
2.35.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/2] common/mlx5: update log format after devx_obj_create error
2022-06-08 11:58 [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Gregory Etelson
@ 2022-06-08 11:58 ` Gregory Etelson
2022-06-14 15:02 ` [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Raslan Darawsheh
2022-11-08 12:46 ` David Marchand
2 siblings, 0 replies; 5+ messages in thread
From: Gregory Etelson @ 2022-06-08 11:58 UTC (permalink / raw)
To: dev, getelson; +Cc: rasland, Matan Azrad, Viacheslav Ovsiienko
Application can fetch syndrome value after FW operation failure
starting from Mellanox OFED-5.6.
The patch updates log data after devx_obj_create error.
Signed-off-by: Gregory Etelson <getelson@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.c | 77 ++++++++++------------------
1 file changed, 26 insertions(+), 51 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index bc06aeccc7..d4220a863b 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -199,8 +199,7 @@ mlx5_devx_cmd_flow_counter_alloc(void *ctx, uint32_t bulk_n_128)
dcs->obj = mlx5_glue->devx_obj_create(ctx, in,
sizeof(in), out, sizeof(out));
if (!dcs->obj) {
- DRV_LOG(ERR, "Can't allocate counters - error %d", errno);
- rte_errno = errno;
+ mlx5_devx_err_log(out, "allocate counters", NULL, 0);
mlx5_free(dcs);
return NULL;
}
@@ -378,9 +377,9 @@ mlx5_devx_cmd_mkey_create(void *ctx,
mkey->obj = mlx5_glue->devx_obj_create(ctx, in, in_size_dw * 4, out,
sizeof(out));
if (!mkey->obj) {
- DRV_LOG(ERR, "Can't create %sdirect mkey - error %d",
- klm_num ? "an in" : "a ", errno);
- rte_errno = errno;
+ mlx5_devx_err_log(out,
+ klm_num ? "create indirect mkey" : "create direct key",
+ NULL, 0);
mlx5_free(mkey);
return NULL;
}
@@ -709,9 +708,7 @@ mlx5_devx_cmd_create_flex_parser(void *ctx,
parse_flex_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!parse_flex_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create FLEX PARSE GRAPH object "
- "by using DevX.");
+ mlx5_devx_err_log(out, "create FLEX PARSE GRAPH", NULL, 0);
mlx5_free(parse_flex_obj);
return NULL;
}
@@ -1283,8 +1280,7 @@ mlx5_devx_cmd_create_rq(void *ctx,
rq->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!rq->obj) {
- DRV_LOG(ERR, "Failed to create RQ using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create RQ", NULL, 0);
mlx5_free(rq);
return NULL;
}
@@ -1383,8 +1379,7 @@ mlx5_devx_cmd_create_rmp(void *ctx,
rmp->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!rmp->obj) {
- DRV_LOG(ERR, "Failed to create RMP using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create RMP", NULL, 0);
mlx5_free(rmp);
return NULL;
}
@@ -1452,8 +1447,7 @@ mlx5_devx_cmd_create_tir(void *ctx,
tir->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!tir->obj) {
- DRV_LOG(ERR, "Failed to create TIR using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create TIR", NULL, 0);
mlx5_free(tir);
return NULL;
}
@@ -1591,8 +1585,7 @@ mlx5_devx_cmd_create_rqt(void *ctx,
rqt->obj = mlx5_glue->devx_obj_create(ctx, in, inlen, out, sizeof(out));
mlx5_free(in);
if (!rqt->obj) {
- DRV_LOG(ERR, "Failed to create RQT using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create RQT", NULL, 0);
mlx5_free(rqt);
return NULL;
}
@@ -1706,8 +1699,7 @@ mlx5_devx_cmd_create_sq(void *ctx,
sq->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!sq->obj) {
- DRV_LOG(ERR, "Failed to create SQ using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create SQ", NULL, 0);
mlx5_free(sq);
return NULL;
}
@@ -1790,8 +1782,7 @@ mlx5_devx_cmd_create_tis(void *ctx,
tis->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!tis->obj) {
- DRV_LOG(ERR, "Failed to create TIS using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create TIS", NULL, 0);
mlx5_free(tis);
return NULL;
}
@@ -1825,8 +1816,7 @@ mlx5_devx_cmd_create_td(void *ctx)
td->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!td->obj) {
- DRV_LOG(ERR, "Failed to create TIS using DevX");
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create TIS", NULL, 0);
mlx5_free(td);
return NULL;
}
@@ -1946,8 +1936,7 @@ mlx5_devx_cmd_create_cq(void *ctx, struct mlx5_devx_cq_attr *attr)
cq_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!cq_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create CQ using DevX errno=%d.", errno);
+ mlx5_devx_err_log(out, "create CQ", NULL, 0);
mlx5_free(cq_obj);
return NULL;
}
@@ -2023,8 +2012,7 @@ mlx5_devx_cmd_create_virtq(void *ctx,
virtq_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!virtq_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create VIRTQ Obj using DevX.");
+ mlx5_devx_err_log(out, "create VIRTQ", NULL, 0);
mlx5_free(virtq_obj);
return NULL;
}
@@ -2218,8 +2206,7 @@ mlx5_devx_cmd_create_qp(void *ctx,
qp_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!qp_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create QP Obj using DevX.");
+ mlx5_devx_err_log(out, "create QP", NULL, 0);
mlx5_free(qp_obj);
return NULL;
}
@@ -2333,9 +2320,8 @@ mlx5_devx_cmd_create_virtio_q_counters(void *ctx)
couners_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!couners_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create virtio queue counters Obj using"
- " DevX.");
+ mlx5_devx_err_log(out, "create virtio queue counters Obj",
+ NULL, 0);
mlx5_free(couners_obj);
return NULL;
}
@@ -2417,8 +2403,7 @@ mlx5_devx_cmd_create_flow_hit_aso_obj(void *ctx, uint32_t pd)
flow_hit_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!flow_hit_aso_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create FLOW_HIT_ASO obj using DevX.");
+ mlx5_devx_err_log(out, "create FLOW_HIT_ASO", NULL, 0);
mlx5_free(flow_hit_aso_obj);
return NULL;
}
@@ -2505,8 +2490,7 @@ mlx5_devx_cmd_create_flow_meter_aso_obj(void *ctx, uint32_t pd,
ctx, in, sizeof(in),
out, sizeof(out));
if (!flow_meter_aso_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create FLOW_METER_ASO obj using DevX.");
+ mlx5_devx_err_log(out, "create FLOW_METTER_ASO", NULL, 0);
mlx5_free(flow_meter_aso_obj);
return NULL;
}
@@ -2556,8 +2540,7 @@ mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx, uint32_t pd,
ct_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (!ct_aso_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create CONN_TRACK_OFFLOAD obj by using DevX.");
+ mlx5_devx_err_log(out, "create CONN_TRACK_OFFLOAD", NULL, 0);
mlx5_free(ct_aso_obj);
return NULL;
}
@@ -2609,9 +2592,7 @@ mlx5_devx_cmd_create_geneve_tlv_option(void *ctx,
geneve_tlv_opt_obj->obj = mlx5_glue->devx_obj_create(ctx, in,
sizeof(in), out, sizeof(out));
if (!geneve_tlv_opt_obj->obj) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create Geneve tlv option "
- "Obj using DevX.");
+ mlx5_devx_err_log(out, "create GENEVE TLV", NULL, 0);
mlx5_free(geneve_tlv_opt_obj);
return NULL;
}
@@ -2673,9 +2654,7 @@ mlx5_devx_cmd_queue_counter_alloc(void *ctx)
dcs->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out,
sizeof(out));
if (!dcs->obj) {
- DRV_LOG(DEBUG, "Can't allocate q counter set by DevX - error "
- "%d.", errno);
- rte_errno = errno;
+ mlx5_devx_err_log(out, "create q counter set", NULL, 0);
mlx5_free(dcs);
return NULL;
}
@@ -2762,8 +2741,7 @@ mlx5_devx_cmd_create_dek_obj(void *ctx, struct mlx5_devx_dek_attr *attr)
dek_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (dek_obj->obj == NULL) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create DEK obj using DevX.");
+ mlx5_devx_err_log(out, "create DEK", NULL, 0);
mlx5_free(dek_obj);
return NULL;
}
@@ -2810,8 +2788,7 @@ mlx5_devx_cmd_create_import_kek_obj(void *ctx,
import_kek_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (import_kek_obj->obj == NULL) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create IMPORT_KEK object using DevX.");
+ mlx5_devx_err_log(out, "create IMPORT_KEK", NULL, 0);
mlx5_free(import_kek_obj);
return NULL;
}
@@ -2859,8 +2836,7 @@ mlx5_devx_cmd_create_credential_obj(void *ctx,
credential_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (credential_obj->obj == NULL) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create CREDENTIAL object using DevX.");
+ mlx5_devx_err_log(out, "create CREDENTIAL", NULL, 0);
mlx5_free(credential_obj);
return NULL;
}
@@ -2911,8 +2887,7 @@ mlx5_devx_cmd_create_crypto_login_obj(void *ctx,
crypto_login_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
out, sizeof(out));
if (crypto_login_obj->obj == NULL) {
- rte_errno = errno;
- DRV_LOG(ERR, "Failed to create CRYPTO_LOGIN obj using DevX.");
+ mlx5_devx_err_log(out, "create CRYPTO_LOGIN", NULL, 0);
mlx5_free(crypto_login_obj);
return NULL;
}
--
2.35.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error
2022-06-08 11:58 [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Gregory Etelson
2022-06-08 11:58 ` [PATCH 2/2] common/mlx5: update log format after devx_obj_create error Gregory Etelson
@ 2022-06-14 15:02 ` Raslan Darawsheh
2022-11-08 12:46 ` David Marchand
2 siblings, 0 replies; 5+ messages in thread
From: Raslan Darawsheh @ 2022-06-14 15:02 UTC (permalink / raw)
To: Gregory Etelson, dev; +Cc: Matan Azrad, Slava Ovsiienko
Hi,
> -----Original Message-----
> From: Gregory Etelson <getelson@nvidia.com>
> Sent: Wednesday, June 8, 2022 2:58 PM
> To: dev@dpdk.org; Gregory Etelson <getelson@nvidia.com>
> Cc: Raslan Darawsheh <rasland@nvidia.com>; Matan Azrad
> <matan@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> Subject: [PATCH 1/2] common/mlx5: update log format after
> devx_general_cmd error
>
> Application can fetch syndrome value after FW operation failure
> starting from Mellanox OFED-5.6.
> The patch updates log data issued after devx_general_cmd error.
>
> Signed-off-by: Gregory Etelson <getelson@nvidia.com>
> Acked-by: Matan Azrad <matan@nvidia.com>
Series applied to next-net-mlx,
Kindest regards,
Raslan Darawsheh
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error
2022-06-08 11:58 [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Gregory Etelson
2022-06-08 11:58 ` [PATCH 2/2] common/mlx5: update log format after devx_obj_create error Gregory Etelson
2022-06-14 15:02 ` [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Raslan Darawsheh
@ 2022-11-08 12:46 ` David Marchand
2022-11-08 16:05 ` Gregory Etelson
2 siblings, 1 reply; 5+ messages in thread
From: David Marchand @ 2022-11-08 12:46 UTC (permalink / raw)
To: Gregory Etelson, rasland
Cc: dev, Matan Azrad, Viacheslav Ovsiienko, Thomas Monjalon
On Wed, Jun 8, 2022 at 1:58 PM Gregory Etelson <getelson@nvidia.com> wrote:
>
> Application can fetch syndrome value after FW operation failure
> starting from Mellanox OFED-5.6.
> The patch updates log data issued after devx_general_cmd error.
>
> Signed-off-by: Gregory Etelson <getelson@nvidia.com>
> Acked-by: Matan Azrad <matan@nvidia.com>
> ---
> drivers/common/mlx5/mlx5_devx_cmds.c | 103 ++++++++++++---------------
> 1 file changed, 44 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
> index c6bdbc12bb..bc06aeccc7 100644
> --- a/drivers/common/mlx5/mlx5_devx_cmds.c
> +++ b/drivers/common/mlx5/mlx5_devx_cmds.c
> @@ -13,39 +13,49 @@
> #include "mlx5_common_log.h"
> #include "mlx5_malloc.h"
>
> +/* FW writes status value to the OUT buffer at offset 00H */
> +#define MLX5_FW_STATUS(o) MLX5_GET(general_obj_out_cmd_hdr, (o), status)
> +/* FW writes syndrome value to the OUT buffer at offset 04H */
> +#define MLX5_FW_SYNDROME(o) MLX5_GET(general_obj_out_cmd_hdr, (o), syndrome)
> +
> +#define MLX5_DEVX_ERR_RC(x) ((x) > 0 ? -(x) : ((x) < 0 ? (x) : -1))
> +
> +static void
> +mlx5_devx_err_log(void *out, const char *reason,
> + const char *param, uint32_t value)
> +{
> + rte_errno = errno;
> + if (!param)
> + DRV_LOG(ERR, "DevX %s failed errno=%d status=%#x syndrome=%#x",
> + reason, errno, MLX5_FW_STATUS(out),
> + MLX5_FW_SYNDROME(out));
> + else
> + DRV_LOG(ERR, "DevX %s %s=%#X failed errno=%d status=%#x syndrome=%#x",
> + reason, param, value, errno, MLX5_FW_STATUS(out),
> + MLX5_FW_SYNDROME(out));
> +}
> +
> static void *
> mlx5_devx_get_hca_cap(void *ctx, uint32_t *in, uint32_t *out,
> int *err, uint32_t flags)
> {
> const size_t size_in = MLX5_ST_SZ_DW(query_hca_cap_in) * sizeof(int);
> const size_t size_out = MLX5_ST_SZ_DW(query_hca_cap_out) * sizeof(int);
> - int status, syndrome, rc;
> + int rc;
>
> - if (err)
> - *err = 0;
> memset(in, 0, size_in);
> memset(out, 0, size_out);
> MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
> MLX5_SET(query_hca_cap_in, in, op_mod, flags);
> rc = mlx5_glue->devx_general_cmd(ctx, in, size_in, out, size_out);
> - if (rc) {
> - DRV_LOG(ERR,
> - "Failed to query devx HCA capabilities func %#02x",
> - flags >> 1);
> + if (rc || MLX5_FW_STATUS(out)) {
> + mlx5_devx_err_log(out, "HCA capabilities", "func", flags >> 1);
> if (err)
> - *err = rc > 0 ? -rc : rc;
> - return NULL;
> - }
> - status = MLX5_GET(query_hca_cap_out, out, status);
> - syndrome = MLX5_GET(query_hca_cap_out, out, syndrome);
> - if (status) {
> - DRV_LOG(ERR,
> - "Failed to query devx HCA capabilities func %#02x status %x, syndrome = %x",
> - flags >> 1, status, syndrome);
> - if (err)
> - *err = -1;
> + *err = MLX5_DEVX_ERR_RC(rc);
> return NULL;
> }
> + if (err)
> + *err = 0;
> return MLX5_ADDR_OF(query_hca_cap_out, out, capability);
> }
>
> @@ -74,7 +84,7 @@ mlx5_devx_cmd_register_read(void *ctx, uint16_t reg_id, uint32_t arg,
> uint32_t in[MLX5_ST_SZ_DW(access_register_in)] = {0};
> uint32_t out[MLX5_ST_SZ_DW(access_register_out) +
> MLX5_ACCESS_REGISTER_DATA_DWORD_MAX] = {0};
> - int status, rc;
> + int rc;
>
> MLX5_ASSERT(data && dw_cnt);
> MLX5_ASSERT(dw_cnt <= MLX5_ACCESS_REGISTER_DATA_DWORD_MAX);
> @@ -91,23 +101,13 @@ mlx5_devx_cmd_register_read(void *ctx, uint16_t reg_id, uint32_t arg,
> rc = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), out,
> MLX5_ST_SZ_BYTES(access_register_out) +
> sizeof(uint32_t) * dw_cnt);
> - if (rc)
> - goto error;
> - status = MLX5_GET(access_register_out, out, status);
> - if (status) {
> - int syndrome = MLX5_GET(access_register_out, out, syndrome);
> -
> - DRV_LOG(DEBUG, "Failed to read access NIC register 0x%X, "
> - "status %x, syndrome = %x",
> - reg_id, status, syndrome);
> - return -1;
> + if (rc || MLX5_FW_STATUS(out)) {
> + mlx5_devx_err_log(out, "read access", "NIC register", reg_id);
> + return MLX5_DEVX_ERR_RC(rc);
I went back in the mailing list archive to find this change.
This patch makes the pmd spat some scary log messages...
I get this on a system with a CX5 nic (OVS dpdk-latest + 22.11-rc2) :
2022-11-08T12:26:09.218Z|00027|dpdk|ERR|mlx5_common: DevX read access
NIC register=0X9055 failed errno=22 status=0 syndrome=0
Should I be scared?
The system runs fine and nothing else changed in my setup (afaics), so
I'd expect it continues working..
This specific change of level on the "read access" check breaks OVS
system dpdk unit tests, as those tests fail on ERR level logs.
If this is harmless, please send a patch to revert to debug level (and
for other log messages that were updated in this patch if it was
unneeded).
Thanks.
--
David Marchand
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error
2022-11-08 12:46 ` David Marchand
@ 2022-11-08 16:05 ` Gregory Etelson
0 siblings, 0 replies; 5+ messages in thread
From: Gregory Etelson @ 2022-11-08 16:05 UTC (permalink / raw)
To: David Marchand, Raslan Darawsheh
Cc: dev, Matan Azrad, Slava Ovsiienko,
NBU-Contact-Thomas Monjalon (EXTERNAL)
Hello David,
> > + if (rc || MLX5_FW_STATUS(out)) {
> > + mlx5_devx_err_log(out, "read access", "NIC register", reg_id);
> > + return MLX5_DEVX_ERR_RC(rc);
>
> I went back in the mailing list archive to find this change.
> This patch makes the pmd spat some scary log messages...
>
> I get this on a system with a CX5 nic (OVS dpdk-latest + 22.11-rc2) :
> 2022-11-08T12:26:09.218Z|00027|dpdk|ERR|mlx5_common: DevX read
> access
> NIC register=0X9055 failed errno=22 status=0 syndrome=0
>
> Should I be scared?
> The system runs fine and nothing else changed in my setup (afaics), so
> I'd expect it continues working..
That failure is expected in MLX HBA before ConnectX-6
That it not a functional error in ConnectX-5 and can be ignored.
>
> This specific change of level on the "read access" check breaks OVS
> system dpdk unit tests, as those tests fail on ERR level logs.
> If this is harmless, please send a patch to revert to debug level (and
> for other log messages that were updated in this patch if it was
> unneeded).
>
I'll send a patch that removes that error message.
>
> Thanks.
>
> --
> David Marchand
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-11-08 16:05 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-08 11:58 [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Gregory Etelson
2022-06-08 11:58 ` [PATCH 2/2] common/mlx5: update log format after devx_obj_create error Gregory Etelson
2022-06-14 15:02 ` [PATCH 1/2] common/mlx5: update log format after devx_general_cmd error Raslan Darawsheh
2022-11-08 12:46 ` David Marchand
2022-11-08 16:05 ` Gregory Etelson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).