Hi Ferruh,


Please find our explanation for your comment according to device initialization
by function nthw_pci_dev_init.

 

Our NIC handles multiple ports within the same PCI Bus Device Function (BDF) - hence, we need to create several ETH devices during the probe init.
Allocation of multiple eth devices is handled inside the nthw_pci_dev_init() function.
It appears to be similar to what the Octeontx driver does.

 

We believe the current implementation fits better for our PMD, so we would like to keep it as is, unless it is a blocking requirement for upstreaming.

Please let us know.

 

BR,

Serhii

 

From: Ferruh Yigit <ferruh.yigit@amd.com>
Date: Friday, 5 July 2024 at 01:44
To: Serhii Iliushyk <sil-plv@napatech.com>, dev@dpdk.org <dev@dpdk.org>
Cc: Mykola Kostenok <mko-plv@napatech.com>, Christian Koue Muf <ckm@napatech.com>, andrew.rybchenko@oktetlabs.ru <andrew.rybchenko@oktetlabs.ru>
Subject: Re: [PATCH v5 03/23] net/ntnic: add minimal initialization for PCI device

On 6/27/2024 8:38 AM, Serhii Iliushyk wrote:
> add implementation for probe/init and remove/deinit of the PCI device
>
> Signed-off-by: Serhii Iliushyk <sil-plv@napatech.com>
> ---
>  drivers/net/ntnic/ntnic_ethdev.c | 104 ++++++++++++++++++++++++++++++-
>  1 file changed, 103 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ntnic/ntnic_ethdev.c b/drivers/net/ntnic/ntnic_ethdev.c
> index 3079bd98e4..e9a584877f 100644
> --- a/drivers/net/ntnic/ntnic_ethdev.c
> +++ b/drivers/net/ntnic/ntnic_ethdev.c
> @@ -17,14 +17,63 @@
>  /* Global static variables: */

>  static int
> -nthw_pci_dev_init(struct rte_pci_device *pci_dev __rte_unused)
> +nthw_pci_dev_init(struct rte_pci_device *pci_dev)
>  {
> +     uint32_t n_port_mask = -1;      /* All ports enabled by default */
> +     int n_phy_ports;
> +     NT_LOG_DBGX(DEBUG, NTNIC, "Dev %s PF #%i Init : %02x:%02x:%i\n", pci_dev->name,
> +             pci_dev->addr.function, pci_dev->addr.bus, pci_dev->addr.devid,
> +             pci_dev->addr.function);
> +
> +     n_phy_ports = 0;
> +
> +     for (int n_intf_no = 0; n_intf_no < n_phy_ports; n_intf_no++) {
> +             struct rte_eth_dev *eth_dev = NULL;
> +             char name[32];
> +
> +             if ((1 << n_intf_no) & ~n_port_mask)
> +                     continue;
> +
> +             snprintf(name, sizeof(name), "ntnic%d", n_intf_no);
> +
> +             eth_dev = rte_eth_dev_allocate(name);   /* TODO: name */
>

Is this TODO still valid?

> +
> +             if (!eth_dev) {
> +                     NT_LOG_DBGX(ERR, NTNIC, "%s: %s: error=%d\n",
> +                             (pci_dev->name[0] ? pci_dev->name : "NA"), name, -1);
> +                     return -1;
> +             }
> +
> +             NT_LOG_DBGX(DEBUG, NTNIC, "eth_dev %p, port_id %u, if_index %u\n",
> +                                     eth_dev, eth_dev->data->port_id, n_intf_no);
> +
> +
> +             struct rte_eth_link pmd_link;
> +             pmd_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
> +             pmd_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
> +             pmd_link.link_status = RTE_ETH_LINK_DOWN;
> +             pmd_link.link_autoneg = RTE_ETH_LINK_AUTONEG;
> +
> +             eth_dev->device = &pci_dev->device;
> +             eth_dev->data->dev_link = pmd_link;
> +             eth_dev->data->numa_node = pci_dev->device.numa_node;
>

rte_eth_copy_pci_info() should be setting numa_node, no need to
duplicate here.

Please consider using 'rte_eth_dev_create()' to help these kind of
boilerplate initialization. I did same comment below.

> +             eth_dev->dev_ops = NULL;
> +             eth_dev->state = RTE_ETH_DEV_ATTACHED;
>

Shouldn't need to set state directly, please call
'rte_eth_dev_probing_finish()' as a last thing in probe().
This call will set the state, also will do some other required work.

> +
> +             rte_eth_copy_pci_info(eth_dev, pci_dev);
> +             /* performs rte_eth_copy_pci_info() */
> +             eth_dev_pci_specific_init(eth_dev, pci_dev);
>

As comment says, 'eth_dev_pci_specific_init()' calls the
'rte_eth_copy_pci_info()', so why calling it twice, can clean the init
and remove the comment.

> +
> +             /* increase initialized ethernet devices - PF */
>

Is this comment valid here?

> +     }
> +
>        return 0;
>  }

>  static int
>  nthw_pci_dev_deinit(struct rte_eth_dev *eth_dev __rte_unused)
>  {
> +     NT_LOG_DBGX(DEBUG, NTNIC, "PCI device deinitialization\n");
>        return 0;
>  }

> @@ -33,13 +82,65 @@ nthw_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>        struct rte_pci_device *pci_dev)
>  {
>        int res;
> +
> +     NT_LOG_DBGX(DEBUG, NTNIC, "pcidev: name: '%s'\n", pci_dev->name);
> +     NT_LOG_DBGX(DEBUG, NTNIC, "devargs: name: '%s'\n", pci_dev->https://linkprotect.cudasvc.com/url?a=https%3a%2f%2fdevice.name&c=E,1,V5OBhPhfiNSro_oj2bitVYkKYAABsPx-MKLgmN0q8g6JbMwgnOO1gKkhj3c3IxCQvEgNbC8ofBuQUVC8VRFgpnK79cZnIRMu2iT6BvGGlWlO-BxzlAK2eTwkKk9z&typo=1);
> +
> +     if (pci_dev->device.devargs) {
> +             NT_LOG_DBGX(DEBUG, NTNIC, "devargs: args: '%s'\n",
> +                     (pci_dev->device.devargs->args ? pci_dev->device.devargs->args : "NULL"));
> +             NT_LOG_DBGX(DEBUG, NTNIC, "devargs: data: '%s'\n",
> +                     (pci_dev->device.devargs->data ? pci_dev->device.devargs->data : "NULL"));
> +     }
> +
> +     const int n_rte_has_pci = rte_eal_has_pci();
> +     NT_LOG(DBG, NTNIC, "has_pci=%d\n", n_rte_has_pci);
> +
> +     if (n_rte_has_pci == 0) {
> +             NT_LOG(ERR, NTNIC, "has_pci=%d: this PMD needs hugepages\n", n_rte_has_pci);
>

It is checking PCI bus, but log is about hugepages.

> +             return -1;
> +     }
>

What is the intention here for the 'n_rte_has_pci' check?
If pci bus is disabled, this probe call should not be called at all, in
that manner this check is useless.

> +
> +     const int n_rte_vfio_no_io_mmu_enabled = rte_vfio_noiommu_is_enabled();
> +     NT_LOG(DBG, NTNIC, "vfio_no_iommu_enabled=%d\n", n_rte_vfio_no_io_mmu_enabled);
> +
> +     if (n_rte_vfio_no_io_mmu_enabled) {
> +             NT_LOG(ERR, NTNIC, "vfio_no_iommu_enabled=%d: this PMD needs VFIO IOMMU\n",
> +                     n_rte_vfio_no_io_mmu_enabled);
> +             return -1;
> +     }
> +
> +     const enum rte_iova_mode n_rte_io_va_mode = rte_eal_iova_mode();
> +     NT_LOG(DBG, NTNIC, "iova mode=%d\n", n_rte_io_va_mode);
> +
> +     if (n_rte_io_va_mode != RTE_IOVA_PA) {
> +             NT_LOG(WRN, NTNIC, "iova mode (%d) should be PA for performance reasons\n",
> +                     n_rte_io_va_mode);
> +     }
>

Is this comment valid?
Won't iommu be used for address translation both IOVA_VA and IOVA_PA
mode? How much performance improvement we are talking about?

> +
> +     NT_LOG(DBG, NTNIC,
> +             "busid=" PCI_PRI_FMT
> +             " pciid=%04x:%04x_%04x:%04x locstr=%s @ numanode=%d: drv=%s drvalias=%s\n",
> +             pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid,
> +             pci_dev->addr.function, pci_dev->id.vendor_id, pci_dev->id.device_id,
> +             pci_dev->id.subsystem_vendor_id, pci_dev->id.subsystem_device_id,
> +             pci_dev->name[0] ? pci_dev->name : "NA",        /* locstr */
> +             pci_dev->device.numa_node,
> +             pci_dev->driver->https://linkprotect.cudasvc.com/url?a=https%3a%2f%2fdriver.name&c=E,1,5_F-hniqJt0w3GPG-nclekQA5nc17FmonNihbX6JRyTd2j6RA7sGlIJ9gBTq_T3p01-6DsO244rVP-3PFWsjaqFJ44V77omLRmCrWio_VmtBudxV30mhcou9&typo=1 ? pci_dev->driver->https://linkprotect.cudasvc.com/url?a=https%3a%2f%2fdriver.name&c=E,1,frkLGUymGHY5ydpNKtk3f74kNLD7KupomSqM7BLD7aOyg83VIXzNCZfCJI9PDNuIjnexaqLnATwKsfczOq1DvmsNBhMbEgTT0QrJ7RKHxIw,&typo=1 : "NA",
> +             pci_dev->driver->driver.alias ? pci_dev->driver->driver.alias : "NA");
> +
> +
>        res = nthw_pci_dev_init(pci_dev);
>

Instead of calling 'nthw_pci_dev_init()' directly, you can use
'rte_eth_dev_create()' and pass 'nthw_pci_dev_init()' as paramter, this
helps on some set of boilerplate code.

> +
> +     NT_LOG_DBGX(DEBUG, NTNIC, "leave: res=%d\n", res);
>        return res;
>

Doesn't really matter but mostly 'ret' is used as short version of
"return value", what 'res' is?


>  }

>  static int
>  nthw_pci_remove(struct rte_pci_device *pci_dev)
>  {
> +     NT_LOG_DBGX(DEBUG, NTNIC);
> +
>        return rte_eth_dev_pci_generic_remove(pci_dev, nthw_pci_dev_deinit);
>  }

> @@ -48,6 +149,7 @@ static struct rte_pci_driver rte_nthw_pmd = {
>                .name = "net_ntnic",
>        },

> +     .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
>        .probe = nthw_pci_probe,
>        .remove = nthw_pci_remove,
>  };