gdb output from secondary process for port1 (non-working port, managed by secondary process)(gdb) p rte_eth_devices[1]->data[0]$3 = {name = "0000:00:08.0", '\000' <repeats 51 times>, rx_queues = 0x600177c28980, tx_queues = 0x600177c26900, nb_rx_queues = 1, nb_tx_queues = 1, sriov = {active = 0 '\000', nb_q_per_pool = 0 '\000', def_vmdq_idx = 0, def_pool_q_idx = 0}, dev_private = 0x600177c6d7c0, dev_link = {link_speed = 40000, link_duplex = 1, link_autoneg = 1, link_status = 1}, dev_conf = {link_speeds = 0, rxmode = {mq_mode = RTE_ETH_MQ_RX_NONE, mtu = 9000, max_lro_pkt_size = 0, offloads = 8193, reserved_64s = {0, 0}, reserved_ptrs = {0x0, 0x0}}, txmode = {mq_mode = RTE_ETH_MQ_TX_NONE,offloads = 32815, pvid = 0, hw_vlan_reject_tagged = 0 '\000', hw_vlan_reject_untagged = 0 '\000', hw_vlan_insert_pvid = 0 '\000', reserved_64s = {0, 0}, reserved_ptrs = {0x0, 0x0}}, lpbk_mode = 0, rx_adv_conf = {rss_conf = {rss_key = 0x0, rss_key_len = 0 '\000', rss_hf = 0}, vmdq_dcb_conf = {nb_queue_pools = (unknown: 0), enable_default_pool = 0 '\000', default_pool = 0 '\000', nb_pool_maps = 0 '\000', pool_map = {{vlan_id = 0, pools = 0} <repeats 64 times>}, dcb_tc = "\000\000\000\000\000\000\000"}, dcb_rx_conf = {nb_tcs = (unknown: 0),dcb_tc = "\000\000\000\000\000\000\000"}, vmdq_rx_conf = {nb_queue_pools = (unknown: 0), enable_default_pool = 0 '\000', default_pool = 0 '\000', enable_loop_back = 0 '\000', nb_pool_maps = 0 '\000', rx_mode = 0, pool_map = {{vlan_id = 0, pools = 0} <repeats 64 times>}}}, tx_adv_conf = {vmdq_dcb_tx_conf = {nb_queue_pools = (unknown: 0), dcb_tc = "\000\000\000\000\000\000\000"}, dcb_tx_conf = {nb_tcs = (unknown: 0), dcb_tc = "\000\000\000\000\000\000\000"}, vmdq_tx_conf = {nb_queue_pools = (unknown: 0)}}, dcb_capability_en = 0, intr_conf = {lsc = 1, rxq = 0, rmv = 0}},mtu = 1500, min_rx_buf_size = 4294967295, rx_mbuf_alloc_failed = 0, mac_addrs = 0x600177c6d7e0, mac_pool_sel = {0 <repeats 128 times>}, hash_mac_addrs = 0x0, port_id = 1, promiscuous = 1 '\001', scattered_rx = 0 '\000', all_multicast = 1 '\001', dev_started = 1 '\001', lro = 0 '\000',dev_configured = 1 '\001', flow_configured = 0 '\000', rx_queue_state = "\001", '\000' <repeats 1022 times>, tx_queue_state = "\001", '\000' <repeats 1022 times>, dev_flags = 75, numa_node = -1, vlan_filter_conf = {ids = {0 <repeats 64 times>}}, owner = {id = 0, name = '\000' <repeats 63 times>},representor_id = 0, backer_port_id = 32, flow_ops_mutex = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}}(gdb) p rte_eth_devices[1]$4 = {rx_pkt_burst = 0x5591d17ddfb2 <mlx5_rx_burst>, tx_pkt_burst = 0x5591d197268c <mlx5_tx_burst_full>, tx_pkt_prepare = 0x0, rx_queue_count = 0x0, rx_descriptor_status = 0x5591d17dc903 <mlx5_rx_descriptor_status>, tx_descriptor_status = 0x5591d17f8dd0 <mlx5_tx_descriptor_status>,data = 0x600177cfb500, process_private = 0x600179b1b500, dev_ops = 0x5591d20ac180 <mlx5_dev_sec_ops>, device = 0x5591d5a55be0, intr_handle = 0x5591d5a54dd0, link_intr_cbs = {tqh_first = 0x0, tqh_last = 0x5591d3c575d8 <rte_eth_devices+16600>}, post_rx_burst_cbs = {0x600177c00180,0x0 <repeats 1023 times>}, pre_tx_burst_cbs = {0x600179e52d80, 0x0 <repeats 1023 times>}, state = RTE_ETH_DEV_ATTACHED, security_ctx = 0x0}gdb output from primary process for port1 (non-working port, managed by secondary process)(gdb) p rte_eth_devices[1]$3 = {rx_pkt_burst = 0x5591d17ddfb2 <mlx5_rx_burst>, tx_pkt_burst = 0x5591d197268c <mlx5_tx_burst_full>, tx_pkt_prepare = 0x0, rx_queue_count = 0x5591d17dcd20 <mlx5_rx_queue_count>, rx_descriptor_status = 0x5591d17dc903 <mlx5_rx_descriptor_status>,tx_descriptor_status = 0x5591d17f8dd0 <mlx5_tx_descriptor_status>, data = 0x600177cfb500, process_private = 0x600177c68540, dev_ops = 0x5591d20abde0 <mlx5_dev_ops>, device = 0x5591d6076440, intr_handle = 0x5591d60d0350, link_intr_cbs = {tqh_first = 0x0,tqh_last = 0x5591d3c575d8 <rte_eth_devices+16600>}, post_rx_burst_cbs = {0x0 <repeats 1024 times>}, pre_tx_burst_cbs = {0x0 <repeats 1024 times>}, state = RTE_ETH_DEV_ATTACHED, security_ctx = 0x0}(gdb) p rte_eth_devices[1]->data[0]$4 = {name = "0000:00:08.0", '\000' <repeats 51 times>, rx_queues = 0x600177c28980, tx_queues = 0x600177c26900, nb_rx_queues = 1, nb_tx_queues = 1, sriov = {active = 0 '\000', nb_q_per_pool = 0 '\000', def_vmdq_idx = 0, def_pool_q_idx = 0}, dev_private = 0x600177c6d7c0, dev_link = {link_speed = 40000, link_duplex = 1, link_autoneg = 1, link_status = 1}, dev_conf = {link_speeds = 0, rxmode = {mq_mode = RTE_ETH_MQ_RX_NONE, mtu = 9000, max_lro_pkt_size = 0, offloads = 8193, reserved_64s = {0, 0}, reserved_ptrs = {0x0, 0x0}}, txmode = {mq_mode = RTE_ETH_MQ_TX_NONE,offloads = 32815, pvid = 0, hw_vlan_reject_tagged = 0 '\000', hw_vlan_reject_untagged = 0 '\000', hw_vlan_insert_pvid = 0 '\000', reserved_64s = {0, 0}, reserved_ptrs = {0x0, 0x0}}, lpbk_mode = 0, rx_adv_conf = {rss_conf = {rss_key = 0x0, rss_key_len = 0 '\000', rss_hf = 0}, vmdq_dcb_conf = {nb_queue_pools = (unknown: 0), enable_default_pool = 0 '\000', default_pool = 0 '\000', nb_pool_maps = 0 '\000', pool_map = {{vlan_id = 0, pools = 0} <repeats 64 times>}, dcb_tc = "\000\000\000\000\000\000\000"}, dcb_rx_conf = {nb_tcs = (unknown: 0),dcb_tc = "\000\000\000\000\000\000\000"}, vmdq_rx_conf = {nb_queue_pools = (unknown: 0), enable_default_pool = 0 '\000', default_pool = 0 '\000', enable_loop_back = 0 '\000', nb_pool_maps = 0 '\000', rx_mode = 0, pool_map = {{vlan_id = 0, pools = 0} <repeats 64 times>}}}, tx_adv_conf = {vmdq_dcb_tx_conf = {nb_queue_pools = (unknown: 0), dcb_tc = "\000\000\000\000\000\000\000"}, dcb_tx_conf = {nb_tcs = (unknown: 0), dcb_tc = "\000\000\000\000\000\000\000"}, vmdq_tx_conf = {nb_queue_pools = (unknown: 0)}}, dcb_capability_en = 0, intr_conf = {lsc = 1, rxq = 0, rmv = 0}},mtu = 1500, min_rx_buf_size = 4294967295, rx_mbuf_alloc_failed = 0, mac_addrs = 0x600177c6d7e0, mac_pool_sel = {0 <repeats 128 times>}, hash_mac_addrs = 0x0, port_id = 1, promiscuous = 1 '\001', scattered_rx = 0 '\000', all_multicast = 1 '\001', dev_started = 1 '\001', lro = 0 '\000',dev_configured = 1 '\001', flow_configured = 0 '\000', rx_queue_state = "\001", '\000' <repeats 1022 times>, tx_queue_state = "\001", '\000' <repeats 1022 times>, dev_flags = 75, numa_node = -1, vlan_filter_conf = {ids = {0 <repeats 64 times>}}, owner = {id = 0, name = '\000' <repeats 63 times>},representor_id = 0, backer_port_id = 32, flow_ops_mutex = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}}(gdb)gdb output from secondary process for port0 (working port, managed by primary process)(gdb) p rte_eth_devices[0]$1 = {rx_pkt_burst = 0x5591d17ddfb2 <mlx5_rx_burst>, tx_pkt_burst = 0x5591d197268c <mlx5_tx_burst_full>, tx_pkt_prepare = 0x0, rx_queue_count = 0x0, rx_descriptor_status = 0x5591d17dc903 <mlx5_rx_descriptor_status>, tx_descriptor_status = 0x5591d17f8dd0 <mlx5_tx_descriptor_status>,data = 0x600177cf9d00, process_private = 0x600179e49440, dev_ops = 0x5591d20ac180 <mlx5_dev_sec_ops>, device = 0x5591d5a55fa0, intr_handle = 0x5591d5a4dd20, link_intr_cbs = {tqh_first = 0x0, tqh_last = 0x5591d3c53558 <rte_eth_devices+88>}, post_rx_burst_cbs = {0x0 <repeats 1024 times>},pre_tx_burst_cbs = {0x0 <repeats 1024 times>}, state = RTE_ETH_DEV_ATTACHED, security_ctx = 0x0}(gdb) p rte_eth_devices[0]->data[0]$2 = {name = "0000:00:07.0", '\000' <repeats 51 times>, rx_queues = 0x600176c9c980, tx_queues = 0x600176c9a900, nb_rx_queues = 1, nb_tx_queues = 1, sriov = {active = 0 '\000', nb_q_per_pool = 0 '\000', def_vmdq_idx = 0, def_pool_q_idx = 0}, dev_private = 0x600177d29d80, dev_link = {link_speed = 40000, link_duplex = 1, link_autoneg = 1, link_status = 1}, dev_conf = {link_speeds = 0, rxmode = {mq_mode = RTE_ETH_MQ_RX_NONE, mtu = 9000, max_lro_pkt_size = 0, offloads = 8193, reserved_64s = {0, 0}, reserved_ptrs = {0x0, 0x0}}, txmode = {mq_mode = RTE_ETH_MQ_TX_NONE,offloads = 32815, pvid = 0, hw_vlan_reject_tagged = 0 '\000', hw_vlan_reject_untagged = 0 '\000', hw_vlan_insert_pvid = 0 '\000', reserved_64s = {0, 0}, reserved_ptrs = {0x0, 0x0}}, lpbk_mode = 0, rx_adv_conf = {rss_conf = {rss_key = 0x0, rss_key_len = 0 '\000', rss_hf = 0}, vmdq_dcb_conf = {nb_queue_pools = (unknown: 0), enable_default_pool = 0 '\000', default_pool = 0 '\000', nb_pool_maps = 0 '\000', pool_map = {{vlan_id = 0, pools = 0} <repeats 64 times>}, dcb_tc = "\000\000\000\000\000\000\000"}, dcb_rx_conf = {nb_tcs = (unknown: 0),dcb_tc = "\000\000\000\000\000\000\000"}, vmdq_rx_conf = {nb_queue_pools = (unknown: 0), enable_default_pool = 0 '\000', default_pool = 0 '\000', enable_loop_back = 0 '\000', nb_pool_maps = 0 '\000', rx_mode = 0, pool_map = {{vlan_id = 0, pools = 0} <repeats 64 times>}}}, tx_adv_conf = {vmdq_dcb_tx_conf = {nb_queue_pools = (unknown: 0), dcb_tc = "\000\000\000\000\000\000\000"}, dcb_tx_conf = {nb_tcs = (unknown: 0), dcb_tc = "\000\000\000\000\000\000\000"}, vmdq_tx_conf = {nb_queue_pools = (unknown: 0)}}, dcb_capability_en = 0, intr_conf = {lsc = 1, rxq = 0, rmv = 0}},mtu = 1500, min_rx_buf_size = 4294967295, rx_mbuf_alloc_failed = 0, mac_addrs = 0x600177d29da0, mac_pool_sel = {0 <repeats 128 times>}, hash_mac_addrs = 0x0, port_id = 0, promiscuous = 1 '\001', scattered_rx = 0 '\000', all_multicast = 1 '\001', dev_started = 1 '\001', lro = 0 '\000',dev_configured = 1 '\001', flow_configured = 0 '\000', rx_queue_state = "\001", '\000' <repeats 1022 times>, tx_queue_state = "\001", '\000' <repeats 1022 times>, dev_flags = 75, numa_node = -1, vlan_filter_conf = {ids = {0 <repeats 64 times>}}, owner = {id = 0, name = '\000' <repeats 63 times>},representor_id = 0, backer_port_id = 32, flow_ops_mutex = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}}
>> Only one process in any moment of time manages the queue, no shared (between process) queue data sending is allowed.gdb output from primary process for port0 (working port, managed by primary process)(gdb) p rte_eth_devices[0]$1 = {rx_pkt_burst = 0x5591d17ddfb2 <mlx5_rx_burst>, tx_pkt_burst = 0x5591d197268c <mlx5_tx_burst_full>, tx_pkt_prepare = 0x0, rx_queue_count = 0x5591d17dcd20 <mlx5_rx_queue_count>, rx_descriptor_status = 0x5591d17dc903 <mlx5_rx_descriptor_status>,tx_descriptor_status = 0x5591d17f8dd0 <mlx5_tx_descriptor_status>, data = 0x600177cf9d00, process_private = 0x600176cdc600, dev_ops = 0x5591d20abde0 <mlx5_dev_ops>, device = 0x5591d6076800, intr_handle = 0x5591d606e580, link_intr_cbs = {tqh_first = 0x0,tqh_last = 0x5591d3c53558 <rte_eth_devices+88>}, post_rx_burst_cbs = {0x600177c69100, 0x0 <repeats 1023 times>}, pre_tx_burst_cbs = {0x600179e52cc0, 0x0 <repeats 1023 times>}, state = RTE_ETH_DEV_ATTACHED, security_ctx = 0x0}(gdb) p rte_eth_devices[0]->data[0]$2 = {name = "0000:00:07.0", '\000' <repeats 51 times>, rx_queues = 0x600176c9c980, tx_queues = 0x600176c9a900, nb_rx_queues = 1, nb_tx_queues = 1, sriov = {active = 0 '\000', nb_q_per_pool = 0 '\000', def_vmdq_idx = 0, def_pool_q_idx = 0}, dev_private = 0x600177d29d80, dev_link = {link_speed = 40000, link_duplex = 1, link_autoneg = 1, link_status = 1}, dev_conf = {link_speeds = 0, rxmode = {mq_mode = RTE_ETH_MQ_RX_NONE, mtu = 9000, max_lro_pkt_size = 0, offloads = 8193, reserved_64s = {0, 0}, reserved_ptrs = {0x0, 0x0}}, txmode = {mq_mode = RTE_ETH_MQ_TX_NONE,offloads = 32815, pvid = 0, hw_vlan_reject_tagged = 0 '\000', hw_vlan_reject_untagged = 0 '\000', hw_vlan_insert_pvid = 0 '\000', reserved_64s = {0, 0}, reserved_ptrs = {0x0, 0x0}}, lpbk_mode = 0, rx_adv_conf = {rss_conf = {rss_key = 0x0, rss_key_len = 0 '\000', rss_hf = 0}, vmdq_dcb_conf = {nb_queue_pools = (unknown: 0), enable_default_pool = 0 '\000', default_pool = 0 '\000', nb_pool_maps = 0 '\000', pool_map = {{vlan_id = 0, pools = 0} <repeats 64 times>}, dcb_tc = "\000\000\000\000\000\000\000"}, dcb_rx_conf = {nb_tcs = (unknown: 0),dcb_tc = "\000\000\000\000\000\000\000"}, vmdq_rx_conf = {nb_queue_pools = (unknown: 0), enable_default_pool = 0 '\000', default_pool = 0 '\000', enable_loop_back = 0 '\000', nb_pool_maps = 0 '\000', rx_mode = 0, pool_map = {{vlan_id = 0, pools = 0} <repeats 64 times>}}}, tx_adv_conf = {vmdq_dcb_tx_conf = {nb_queue_pools = (unknown: 0), dcb_tc = "\000\000\000\000\000\000\000"}, dcb_tx_conf = {nb_tcs = (unknown: 0), dcb_tc = "\000\000\000\000\000\000\000"}, vmdq_tx_conf = {nb_queue_pools = (unknown: 0)}}, dcb_capability_en = 0, intr_conf = {lsc = 1, rxq = 0, rmv = 0}},mtu = 1500, min_rx_buf_size = 4294967295, rx_mbuf_alloc_failed = 0, mac_addrs = 0x600177d29da0, mac_pool_sel = {0 <repeats 128 times>}, hash_mac_addrs = 0x0, port_id = 0, promiscuous = 1 '\001', scattered_rx = 0 '\000', all_multicast = 1 '\001', dev_started = 1 '\001', lro = 0 '\000',dev_configured = 1 '\001', flow_configured = 0 '\000', rx_queue_state = "\001", '\000' <repeats 1022 times>, tx_queue_state = "\001", '\000' <repeats 1022 times>, dev_flags = 75, numa_node = -1, vlan_filter_conf = {ids = {0 <repeats 64 times>}}, owner = {id = 0, name = '\000' <repeats 63 times>},representor_id = 0, backer_port_id = 32, flow_ops_mutex = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}}
Below are the logs from mlx5_txq_devx_obj_new which is called by proc0(rte_proc_primary) for port 1and logs from txq_uar_init_secondary which gets called by proc1(rte_proc_secondary) for port 1Core:0 04/10/24 06:45:47.626580 UTC mlx5_net: Core 0 mlx5_txq_devx_obj_new 1563 ppriv: 0x600177c68540 ,ppriv->uar_table: 0x600177c68548, txq_ctrl->uar_mmap_offset:0, ppriv->uar_table[txq_data->idx]:0x7f0411ae3800, txq_data->idx: 0, txq_data->db_nc:0Core:1 04/10/24 06:45:47.767512 UTC mlx5_net: priv: 0x600177c6d7c0, priv->sh: 0x600176c25a00, priv->sh->pppriv: 0x600177c68540Core:1 04/10/24 06:45:47.767528 UTC mlx5_net: Core 1 txq_uar_init_secondary 535 txq_ctrl:0x600177c04980 priv:0x600177c6d7c0Core:1 04/10/24 06:45:47.767553 UTC mlx5_net: Core 1 txq_uar_init_secondary 562 primary_ppriv->uar_table: 0x600177c68548 ,uar_va:7f0411ae3800 offset:800 addr:0x7f0425b18800Core:1 04/10/24 06:45:47.767604 UTC mlx5_net: Core 1 txq_uar_init_secondary 566 port 1 of txq 0 ppriv:0x600179b1b500 ppriv->uar_table[txq->idx]:0x7f0425b18800, txq->idx:0, ppriv->uar_table[0]:0x7f0425b18800,ppriv->uar_table[1]:(nil)@viacheslavo@nvidia.com I noticed that data->dev_private->sh->tx_uar.obj has mmap_offset as 0 in VF case but is non-zero in PF case, Is that okay? this is returned by glue in using uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping);VF:(gdb) p *(struct mlx5dv_devx_uar *) $7
$9 = {reg_addr = 0x7f041b02d800, base_addr = 0x7f041b02d000, page_id = 26, mmap_off = 0, comp_mask = 0}PF:Please note the MLX PF RSS works fine for us where we have 4 queues there q0 is managed by primary process(proc0) and q1,q2,q3 managed by secondary processes proc1,proc2,proc3 respectively. We are only seeing the issue in MLX VF.txq_ctrl->uar_mmap_offset:9441280, ppriv->uar_table[txq_data->idx]:0x7ff88d2be800, txq_data->idx: 0, txq_data->db_nc:0
Also we are using MLNX_OFED_LINUX-5.8-2.0.3.0-ubuntu20.04-x86_64
If it works better for you, we can get on Zoom call for the same. Please let us know a convenient time for the same.
Thanks,
Samar