On Mon, Jan 24, 2022 at 6:05 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
On 1/24/2022 12:12 PM, Kumara Parameshwaran wrote:
> From: Kumara Parameshwaran <kparameshwar@vmware.com>
>
> When a tap device is hotplugged to primary process which in turn
> adds the device to all secondary process, the secondary process
> does a tap_mp_attach_queues, but the fds are not populated in
> the primary during the probe they are populated during the queue_setup,
> added a fix to sync the queues during rte_eth_dev_start
>
> Fixes: 4852aa8f6e21 ("drivers/net: enable hotplug on secondary process")
> Cc: stable@dpdk.org
>
> Signed-off-by: Kumara Parameshwaran <kparameshwar@vmware.com>
> ---
>
> v2:
> * Addressed review comments to move the function declaration and version
>    map
>

Thanks for adding patch version.

>   drivers/net/tap/rte_eth_tap.c | 196 +++++++++++++---------------------
>   lib/ethdev/ethdev_driver.h    |  17 +++
>   lib/ethdev/rte_ethdev.c       |  11 ++
>   lib/ethdev/version.map        |   2 +
>   4 files changed, 102 insertions(+), 124 deletions(-)
>

Can you please separate etdev (API) changes to another patch, so this will be a patchset
with two patches,
first patch adds ethdev API
second patch is tap patch, that uses the API in the first patch 
    Sure, I will do it. 

> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index f1b48cae82..f6c25d7e21 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -66,7 +66,7 @@
>       (TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE)
>   
>   /* IPC key for queue fds sync */
> -#define TAP_MP_KEY "tap_mp_sync_queues"
> +#define TAP_MP_REQ_START_RXTX "tap_mp_req_start_rxtx"
>

We said we can drop "tap_mp_sync_queues", but thinking twice,
will current implementation cover following usecase:

- Primary applicaiton started with tap interface, all config, setup,
   start done
- Secondary app started without any parameter

Since primary already started, I think secondary fds will be wrong,
what do you think?
That is true, the fds would be unitialised, we should retain for the secondary attach case.
>   #define TAP_IOV_DEFAULT_MAX 1024
>   
> @@ -880,11 +880,48 @@ tap_link_set_up(struct rte_eth_dev *dev)
>       return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
>   }
>   
> +static int tap_mp_req_on_rxtx(struct rte_eth_dev *dev)

Can you please follow the coding convention:
sure, will do it.
static int
tap_mp_req_on_rxtx(struct rte_eth_dev *dev)
{

> +{
> +     struct rte_mp_msg msg;
> +     struct ipc_queues *request_param = (struct ipc_queues *)msg.param;
> +     int err;
> +     int fd_iterator = 0;
> +     struct pmd_process_private *process_private = dev->process_private;
> +     int i;
> +
> +     memset(&msg, 0, sizeof(msg));
> +     strlcpy(msg.name, TAP_MP_REQ_START_RXTX, sizeof(msg.name));
> +     strlcpy(request_param->port_name, dev->data->name, sizeof(request_param->port_name));
> +     msg.len_param = sizeof(*request_param);
> +     for (i = 0; i < dev->data->nb_tx_queues; i++) {
> +             msg.fds[fd_iterator++] = process_private->txq_fds[i];
> +             msg.num_fds++;
> +             request_param->txq_count++;
> +     }
> +     for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +             msg.fds[fd_iterator++] = process_private->rxq_fds[i];
> +             msg.num_fds++;
> +             request_param->rxq_count++;
> +     }
> +
> +     err = rte_mp_sendmsg(&msg);
> +     if (err < 0) {
> +             TAP_LOG(ERR, "Failed to send start req to secondary %d",
> +                     rte_errno);
> +             return -1;
> +     }
> +
> +     return 0;
> +}
> +
>   static int
>   tap_dev_start(struct rte_eth_dev *dev)
>   {
>       int err, i;
>   
> +     if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +             tap_mp_req_on_rxtx(dev);
> +
>       err = tap_intr_handle_set(dev, 1);
>       if (err)
>               return err;
> @@ -901,6 +938,34 @@ tap_dev_start(struct rte_eth_dev *dev)
>       return err;
>   }
>   
> +static int
> +tap_mp_req_start_rxtx(const struct rte_mp_msg *request, __rte_unused const void *peer)
> +{

I asked last time but I don't remember the response,
what should be in the 'peer' variable?

> +     struct rte_eth_dev *dev;
> +     const struct ipc_queues *request_param =
> +             (const struct ipc_queues *)request->param;
> +     int fd_iterator;
> +     int queue;
> +     struct pmd_process_private *process_private;
> +
> +     dev = rte_get_eth_dev_by_name(request_param->port_name);
> +     if (!dev) {
> +             TAP_LOG(ERR, "Failed to get dev for %s",
> +                     request_param->port_name);
> +             return -1;
> +     }
> +     process_private = dev->process_private;
> +     fd_iterator = 0;
> +     TAP_LOG(DEBUG, "tap_attach rx_q:%d tx_q:%d\n", request_param->rxq_count,
> +             request_param->txq_count);
> +     for (queue = 0; queue < request_param->txq_count; queue++)
> +             process_private->txq_fds[queue] = request->fds[fd_iterator++];
> +     for (queue = 0; queue < request_param->rxq_count; queue++)
> +             process_private->rxq_fds[queue] = request->fds[fd_iterator++];
> +
> +     return 0;
> +}
> +
>   /* This function gets called when the current port gets stopped.
>    */
>   static int
> @@ -1084,6 +1149,7 @@ tap_dev_close(struct rte_eth_dev *dev)
>   
>       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
>               rte_free(dev->process_private);
> +             rte_mp_action_unregister(TAP_MP_REQ_START_RXTX);
>               return 0;
>       }
>   
> @@ -1140,8 +1206,6 @@ tap_dev_close(struct rte_eth_dev *dev)
>               internals->ioctl_sock = -1;
>       }
>       rte_free(dev->process_private);
> -     if (tap_devices_count == 1)
> -             rte_mp_action_unregister(TAP_MP_KEY);
>       tap_devices_count--;
>       /*
>        * Since TUN device has no more opened file descriptors
> @@ -2292,113 +2356,6 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
>       return ret;
>   }
>   
> -/* Request queue file descriptors from secondary to primary. */
> -static int
> -tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
> -{
> -     int ret;
> -     struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
> -     struct rte_mp_msg request, *reply;
> -     struct rte_mp_reply replies;
> -     struct ipc_queues *request_param = (struct ipc_queues *)request.param;
> -     struct ipc_queues *reply_param;
> -     struct pmd_process_private *process_private = dev->process_private;
> -     int queue, fd_iterator;
> -
> -     /* Prepare the request */
> -     memset(&request, 0, sizeof(request));
> -     strlcpy(request.name, TAP_MP_KEY, sizeof(request.name));
> -     strlcpy(request_param->port_name, port_name,
> -             sizeof(request_param->port_name));
> -     request.len_param = sizeof(*request_param);
> -     /* Send request and receive reply */
> -     ret = rte_mp_request_sync(&request, &replies, &timeout);
> -     if (ret < 0 || replies.nb_received != 1) {
> -             TAP_LOG(ERR, "Failed to request queues from primary: %d",
> -                     rte_errno);
> -             return -1;
> -     }
> -     reply = &replies.msgs[0];
> -     reply_param = (struct ipc_queues *)reply->param;
> -     TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
> -
> -     /* Attach the queues from received file descriptors */
> -     if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
> -             TAP_LOG(ERR, "Unexpected number of fds received");
> -             return -1;
> -     }
> -
> -     dev->data->nb_rx_queues = reply_param->rxq_count;
> -     dev->data->nb_tx_queues = reply_param->txq_count;
> -     fd_iterator = 0;
> -     for (queue = 0; queue < reply_param->rxq_count; queue++)
> -             process_private->rxq_fds[queue] = reply->fds[fd_iterator++];
> -     for (queue = 0; queue < reply_param->txq_count; queue++)
> -             process_private->txq_fds[queue] = reply->fds[fd_iterator++];
> -     free(reply);
> -     return 0;
> -}
> -
> -/* Send the queue file descriptors from the primary process to secondary. */
> -static int
> -tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> -{
> -     struct rte_eth_dev *dev;
> -     struct pmd_process_private *process_private;
> -     struct rte_mp_msg reply;
> -     const struct ipc_queues *request_param =
> -             (const struct ipc_queues *)request->param;
> -     struct ipc_queues *reply_param =
> -             (struct ipc_queues *)reply.param;
> -     uint16_t port_id;
> -     int queue;
> -     int ret;
> -
> -     /* Get requested port */
> -     TAP_LOG(DEBUG, "Received IPC request for %s", request_param->port_name);
> -     ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> -     if (ret) {
> -             TAP_LOG(ERR, "Failed to get port id for %s",
> -                     request_param->port_name);
> -             return -1;
> -     }
> -     dev = &rte_eth_devices[port_id];
> -     process_private = dev->process_private;
> -
> -     /* Fill file descriptors for all queues */
> -     reply.num_fds = 0;
> -     reply_param->rxq_count = 0;
> -     if (dev->data->nb_rx_queues + dev->data->nb_tx_queues >
> -                     RTE_MP_MAX_FD_NUM){
> -             TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds");
> -             return -1;
> -     }
> -
> -     for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
> -             reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
> -             reply_param->rxq_count++;
> -     }
> -     RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
> -
> -     reply_param->txq_count = 0;
> -     for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
> -             reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
> -             reply_param->txq_count++;
> -     }
> -     RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
> -
> -     /* Send reply */
> -     strlcpy(reply.name, request->name, sizeof(reply.name));
> -     strlcpy(reply_param->port_name, request_param->port_name,
> -             sizeof(reply_param->port_name));
> -     reply.len_param = sizeof(*reply_param);
> -     if (rte_mp_reply(&reply, peer) < 0) {
> -             TAP_LOG(ERR, "Failed to reply an IPC request to sync queues");
> -             return -1;
> -     }
> -     return 0;
> -}
> -
>   /* Open a TAP interface device.
>    */
>   static int
> @@ -2442,9 +2399,11 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>                       return -1;
>               }
>   
> -             ret = tap_mp_attach_queues(name, eth_dev);
> -             if (ret != 0)
> -                     return -1;
> +             ret = rte_mp_action_register(TAP_MP_REQ_START_RXTX, tap_mp_req_start_rxtx);
> +             if (ret < 0 && rte_errno != ENOTSUP)
> +                     TAP_LOG(ERR, "tap: Failed to register IPC callback: %s",
> +                             strerror(rte_errno));
> +
>               rte_eth_dev_probing_finish(eth_dev);
>               return 0;
>       }
> @@ -2492,15 +2451,6 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>   
>       TAP_LOG(DEBUG, "Initializing pmd_tap for %s", name);
>   
> -     /* Register IPC feed callback */
> -     if (!tap_devices_count) {
> -             ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
> -             if (ret < 0 && rte_errno != ENOTSUP) {
> -                     TAP_LOG(ERR, "tap: Failed to register IPC callback: %s",
> -                             strerror(rte_errno));
> -                     goto leave;
> -             }
> -     }
>       tap_devices_count++;
>       tap_devices_count_increased = 1;
>       ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
> @@ -2511,8 +2461,6 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>               TAP_LOG(ERR, "Failed to create pmd for %s as %s",
>                       name, tap_name);
>               if (tap_devices_count_increased == 1) {
> -                     if (tap_devices_count == 1)
> -                             rte_mp_action_unregister(TAP_MP_KEY);
>                       tap_devices_count--;
>               }
>       }
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> index d95605a355..a08991bcdf 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -1629,6 +1629,23 @@ rte_eth_hairpin_queue_peer_bind(uint16_t cur_port, uint16_t cur_queue,
>                               struct rte_hairpin_peer_info *peer_info,
>                               uint32_t direction);
>   
> +/**

Please add '@internal' tag into doxygen comment.

> +* Get rte_eth_dev from device name. The device name should be specified
> +* as below:
> +* - PCIe address (Domain:Bus:Device.Function), for example- 0000:2:00.0
> +* - SoC device name, for example- fsl-gmac0
> +* - vdev dpdk name, for example- net_[pcap0|null0|tap0]
> +*
> +* @param name
> +*  pci address or name of the device
> +* @return
> +*   - rte_eth_dev if successful
> +*   - NULL on failure
> +*/
> +__rte_internal
> +struct rte_eth_dev*
> +rte_get_eth_dev_by_name(const char *name);

As the API name, better to start with 'rte_eth_' prefix to be consistent with
rest of the APIs.
I suggest 'rte_eth_dev_get_by_name' but feel free to chose better one.

> +
>   /**
>    * @internal
>    * Reset the current queue state and configuration to disconnect (unbind) it
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index a1d475a292..9192b0d664 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -894,6 +894,17 @@ rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id)
>       return -ENODEV;
>   }
>   
> +struct rte_eth_dev *
> +rte_get_eth_dev_by_name(const char *name)
> +{
> +     uint16_t pid;
> +
> +     if (rte_eth_dev_get_port_by_name(name, &pid))
> +             return NULL;
> +
> +     return &rte_eth_devices[pid];
> +}
> +
>   static int
>   eth_err(uint16_t port_id, int ret)
>   {
> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
> index c2fb0669a4..7e3797189b 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -256,6 +256,7 @@ EXPERIMENTAL {
>       rte_flow_flex_item_create;
>       rte_flow_flex_item_release;
>       rte_flow_pick_transfer_proxy;
> +

This is unintendent change.

>   };
>   
>   INTERNAL {
> @@ -282,4 +283,5 @@ INTERNAL {
>       rte_eth_representor_id_get;
>       rte_eth_switch_domain_alloc;
>       rte_eth_switch_domain_free;
> +     rte_get_eth_dev_by_name;

Please add in a sorted way.

>   };