Hi,

 

Has anyone noticed that crypto_aesni_mb virtual crypto device has issue of memory crash caused by the scanning and probe on secondary process. Can anyone cast any lights on it.

What I encountered is:

On the primary process, the crypto_aesni_mb device is probed and created successfully and I got the mb_mgr set in the device private data. But during the packet process, the application crashes on accessing the mb_mgr. The deugging shows this mb_mgr address has been changed to an invalid address (non-NULL). Further digging shows this memory contamination occurs after the vdev_action replies the scan request.

In below code, the crash is gone by either disable sending message on VDEV_SCAN_REQ or skip processing the VDEV_SCAN_ONE. It seems the insert_vdev() on secondary process triggers another probe and break the existing device data?

It is also noticed there was an issue which was fixed by this patch https://review.spdk.io/gerrit/c/spdk/dpdk/+/1056 but this patch is cancelled. This patch was complaining the similar memory issue found during scanning and probing on the secondary process.

 

static int

vdev_action(const struct rte_mp_msg *mp_msg, const void *peer)

{

     struct rte_vdev_device *dev;

     struct rte_mp_msg mp_resp;

     struct vdev_param *ou = (struct vdev_param *)&mp_resp.param;

     const struct vdev_param *in = (const struct vdev_param *)mp_msg->param;

     const char *devname;

     int num;

     int ret;

 

     strlcpy(mp_resp.name, VDEV_MP_KEY, sizeof(mp_resp.name));

     mp_resp.len_param = sizeof(*ou);

     mp_resp.num_fds = 0;

 

     switch (in->type) {

     case VDEV_SCAN_REQ:

          VDEV_LOG(INFO, "changczh skip vdev, %s", devname);

          ou->type = VDEV_SCAN_ONE;

          ou->num = 1;

          num = 0;

 

          rte_spinlock_recursive_lock(&vdev_device_list_lock);

          TAILQ_FOREACH(dev, &vdev_device_list, next) {

              devname = rte_vdev_device_name(dev);

              if (strlen(devname) == 0) {

                   VDEV_LOG(INFO, "vdev with no name is not sent");

                   continue;

              }

              VDEV_LOG(INFO, "send vdev, %s", devname);

              strlcpy(ou->name, devname, RTE_DEV_NAME_MAX_LEN);

              if (rte_mp_sendmsg(&mp_resp) < 0)

                   VDEV_LOG(ERR, "send vdev, %s, failed, %s",

                        devname, strerror(rte_errno));

              num++;

          }

          rte_spinlock_recursive_unlock(&vdev_device_list_lock);

          ou->type = VDEV_SCAN_REP;

          ou->num = num;

          if (rte_mp_reply(&mp_resp, peer) < 0)

              VDEV_LOG(ERR, "Failed to reply a scan request");

          break;

     case VDEV_SCAN_ONE:

          VDEV_LOG(INFO, "receive vdev, %s", in->name);

          ret = insert_vdev(in->name, NULL, NULL, false);                                      

          if (ret == -EEXIST)

              VDEV_LOG(DEBUG, "device already exist, %s", in->name);

          else if (ret < 0)

              VDEV_LOG(ERR, "failed to add vdev, %s", in->name);

          break;

     default:

          VDEV_LOG(ERR, "vdev cannot recognize this message");

     }

 

     return 0;

}

 

 

Thanks,

Alex