DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev]   Segfault when eal thread executing mlx5 nic‘s lsc event
@ 2019-03-06  3:05 Zhaohui (zhaohui, Polestar)
  2019-03-06  6:02 ` Shahaf Shuler
  0 siblings, 1 reply; 6+ messages in thread
From: Zhaohui (zhaohui, Polestar) @ 2019-03-06  3:05 UTC (permalink / raw)
  To: dev, shahafs, yskoh; +Cc: xudingke, chenchanghu, wangyunjian

Hi:
    I think the flow list may be accessed in the same time by two different threads and may cause some errors. Do it need a lock to protect the flow list?

                                                                                                   Thanks
                                                                                                              Yunjian
(gdb) bt
#0 0x00007f54c9641237 in raise () from /usr/lib64/libc.so.6
#1 0x00007f54c9642928 in abort () from /usr/lib64/libc.so.6
#2 0x00000000006a8749 in PAT_abort ()
#3 0x00000000006a588d in patchIllInsHandler ()
#4 <signal handler called>
#5 0x00007f54c6acd2c8 in flow_list_destroy (dev=dev@entry=0xad8940 <rte_eth_devices+16512>, flow=0x1444b1b00, list=0x14455e618) at /usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_flow.c:2150
#6 0x00007f54c6acfe1b in mlx5_flow_list_flush (dev=0xad8940 <rte_eth_devices+16512>, list=0x14455e618) at /usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_flow.c:2170
#7 0x00007f54c6ac5cc4 in mlx5_traffic_disable (dev=<optimized out>) at /usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_trigger.c:384
#8 0x00007f54c6ac637d in mlx5_traffic_restart (dev=0xad8940 <rte_eth_devices+16512>) at /usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_trigger.c:400
#9 0x00007f54d1db3bba in rte_eth_dev_default_mac_addr_set (port_id=<optimized out>, addr=0x140200f40) at /usr/src/debug/dpdk-18.11/lib/librte_ethdev/rte_ethdev.c:3230
#10 0x00007f54cd8dee81 in mac_address_slaves_update (bonded_eth_dev=bonded_eth_dev@entry=0xad48c0 <rte_eth_devices>) at /usr/src/debug/dpdk-18.11/drivers/net/bonding/rte_eth_bond_pmd.c:1842
#11 0x00007f54cd8e0c31 in bond_ethdev_lsc_event_callback (port_id=<optimized out>, type=<optimized out>, param=<optimized out>, ret_param=<optimized out>) at /usr/src/debug/dpdk-18.11/drivers/net/bonding/rte_eth_bond_pmd.c:3070
#12 0x00007f54cd8e117b in bond_ethdev_slave_lsc_delay (cb_arg=0xad48c0 <rte_eth_devices>) at /usr/src/debug/dpdk-18.11/drivers/net/bonding/rte_eth_bond_pmd.c:2298
#13 0x00007f54d25ebe5f in eal_alarm_callback (arg=<optimized out>) at /usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_alarm.c:90
#14 0x00007f54d25ea8aa in eal_intr_process_interrupts (nfds=<optimized out>, events=<optimized out>) at /usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_interrupts.c:838
#15 eal_intr_handle_interrupts (totalfds=<optimized out>, pfd=21) at /usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_interrupts.c:885
#16 eal_intr_thread_main (arg=<optimized out>) at /usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_interrupts.c:965
#17 0x00007f54cade6dd5 in start_thread () from /usr/lib64/libpthread.so.0
#18 0x00007f54c970950d in clone () from /usr/lib64/libc.so.6


In order to solve this problem(core dump). Something modified like this:(Looking forward to your reply)

From: zhaohui8 <zhaohui8@huawei.com>
---
 drivers/net/mlx5/mlx5.c         |  1 +
 drivers/net/mlx5/mlx5.h         |  1 +
 drivers/net/mlx5/mlx5_flow.c     |  33 ++++++++++++++++++++++++++-------
 drivers/net/mlx5/mlx5_trigger.c   |  12 +++++++++++-
 4 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9e5cab1..e8ae816 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1195,6 +1195,7 @@
 			priv->tcf_context = NULL;
 		}
 	}
+	rte_rwlock_init(&priv->flows_rwlock);
 	TAILQ_INIT(&priv->flows);
 	TAILQ_INIT(&priv->ctrl_flows);
 	/* Hint libmlx5 to use PMD allocator for data plane resources */
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index bc500b2..cb8657c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -202,6 +202,7 @@ struct priv {
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
 	struct mlx5_drop drop_queue; /* Flow drop queues. */
+	rte_rwlock_t flows_rwlock; /* flows Lock. */
 	struct mlx5_flows flows; /* RTE Flow rules. */
 	struct mlx5_flows ctrl_flows; /* Control flow rules. */
 	LIST_HEAD(counters, mlx5_flow_counter) flow_counters;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 97dc3e1..2c18602 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -2121,9 +2121,13 @@ struct rte_flow *
 		 const struct rte_flow_action actions[],
 		 struct rte_flow_error *error)
 {
-	return flow_list_create(dev,
+	struct rte_flow * flow;
+	rte_rwlock_write_lock(&((struct priv *)dev->data->dev_private)->flows_rwlock);
+	flow = flow_list_create(dev,
 				&((struct priv *)dev->data->dev_private)->flows,
 				attr, items, actions, error);
+	rte_rwlock_write_unlock(&((struct priv *)dev->data->dev_private)->flows_rwlock);
+	return flow;
 }
 
 /**
@@ -2235,12 +2239,13 @@ struct rte_flow *
 	struct priv *priv = dev->data->dev_private;
 	struct rte_flow *flow;
 	int ret = 0;
-
+	rte_rwlock_read_lock(&priv->flows_rwlock);
 	TAILQ_FOREACH(flow, &priv->flows, next) {
 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
 			dev->data->port_id, (void *)flow);
 		++ret;
 	}
+	rte_rwlock_read_unlock(&priv->flows_rwlock);
 	return ret;
 }
 
@@ -2320,10 +2325,14 @@ struct rte_flow *
 	}
 	for (i = 0; i != priv->reta_idx_n; ++i)
 		queue[i] = (*priv->reta_idx)[i];
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	flow = flow_list_create(dev, &priv->ctrl_flows,
 				&attr, items, actions, &error);
-	if (!flow)
+	if (!flow) {
+		rte_rwlock_write_unlock(&priv->flows_rwlock);
 		return -rte_errno;
+	}
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 	return 0;
 }
 
@@ -2360,8 +2369,9 @@ struct rte_flow *
 		  struct rte_flow_error *error __rte_unused)
 {
 	struct priv *priv = dev->data->dev_private;
-
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	flow_list_destroy(dev, &priv->flows, flow);
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 	return 0;
 }
 
@@ -2376,8 +2386,9 @@ struct rte_flow *
 		struct rte_flow_error *error __rte_unused)
 {
 	struct priv *priv = dev->data->dev_private;
-
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	mlx5_flow_list_flush(dev, &priv->flows);
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 	return 0;
 }
 
@@ -2729,17 +2740,22 @@ struct rte_flow *
 	ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
 	if (ret)
 		goto error;
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	flow = flow_fdir_filter_lookup(dev, fdir_flow);
 	if (flow) {
 		rte_errno = EEXIST;
+		rte_rwlock_write_unlock(&priv->flows_rwlock);
 		goto error;
 	}
 	flow = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
 				fdir_flow->items, fdir_flow->actions, NULL);
-	if (!flow)
+	if (!flow) {
+		rte_rwlock_write_unlock(&priv->flows_rwlock);
 		goto error;
+	}
 	assert(!flow->fdir);
 	flow->fdir = fdir_flow;
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 	DRV_LOG(DEBUG, "port %u created FDIR flow %p",
 		dev->data->port_id, (void *)flow);
 	return 0;
@@ -2773,6 +2789,7 @@ struct rte_flow *
 	ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
 	if (ret)
 		return -rte_errno;
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	flow = flow_fdir_filter_lookup(dev, &fdir_flow);
 	if (!flow) {
 		rte_errno = ENOENT;
@@ -2781,6 +2798,7 @@ struct rte_flow *
 	flow_list_destroy(dev, &priv->flows, flow);
 	DRV_LOG(DEBUG, "port %u deleted FDIR flow %p",
 		dev->data->port_id, (void *)flow);
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 	return 0;
 }
 
@@ -2817,8 +2835,9 @@ struct rte_flow *
 flow_fdir_filter_flush(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	mlx5_flow_list_flush(dev, &priv->flows);
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index e2a9bb7..b95c7cf 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -188,12 +188,15 @@
 			dev->data->port_id);
 		goto error;
 	}
+	rte_rwlock_read_lock(&priv->flows_rwlock);
 	ret = mlx5_flow_start(dev, &priv->flows);
 	if (ret) {
 		DRV_LOG(DEBUG, "port %u failed to set flows",
 			dev->data->port_id);
+		rte_rwlock_read_unlock(&priv->flows_rwlock);
 		goto error;
 	}
+	rte_rwlock_read_unlock(&priv->flows_rwlock);
 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
 	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
 	mlx5_dev_interrupt_handler_install(dev);
@@ -202,7 +205,9 @@
 	ret = rte_errno; /* Save rte_errno before cleanup. */
 	/* Rollback. */
 	dev->data->dev_started = 0;
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	mlx5_flow_stop(dev, &priv->flows);
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 	mlx5_traffic_disable(dev);
 	mlx5_txq_stop(dev);
 	mlx5_rxq_stop(dev);
@@ -230,7 +235,9 @@
 	rte_wmb();
 	usleep(1000 * priv->rxqs_n);
 	DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	mlx5_flow_stop(dev, &priv->flows);
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 	mlx5_traffic_disable(dev);
 	mlx5_rx_intr_vec_disable(dev);
 	mlx5_dev_interrupt_handler_uninstall(dev);
@@ -364,7 +371,9 @@
 	return 0;
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	mlx5_flow_list_flush(dev, &priv->ctrl_flows);
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 	rte_errno = ret; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -380,8 +389,9 @@
 mlx5_traffic_disable(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-
+	rte_rwlock_write_lock(&priv->flows_rwlock);
 	mlx5_flow_list_flush(dev, &priv->ctrl_flows);
+	rte_rwlock_write_unlock(&priv->flows_rwlock);
 }
 

-----邮件原件-----
发件人: wangyunjian 
发送时间: 2019年2月22日 15:34
收件人: dev@dpdk.org; shahafs@mellanox.com; yskoh@mellanox.com
抄送: xudingke <xudingke@huawei.com>; Zhaohui (zhaohui, Polestar) <zhaohui8@huawei.com>
主题: [dpdk-dev] Segfault when eal thread executing mlx5 nic‘s lsc event


^ permalink raw reply	[flat|nested] 6+ messages in thread
* [dpdk-dev] Segfault when eal thread executing mlx5 nic's lsc event
@ 2020-10-29 16:42 Weifeng LI
  0 siblings, 0 replies; 6+ messages in thread
From: Weifeng LI @ 2020-10-29 16:42 UTC (permalink / raw)
  To: dev, shahafs, matan, viacheslavo, chas3
  Cc: liweifeng2, 863348577, Weifeng Li, zhaohui8

hi

     I am using the dpdk bond of mlx5. There is a segment error in the 
process of starting the bond port. This is because EAL interrupt thread 
is processing LSC interrupt when slave_configure is executing the 
rte_eth_dev_rss_reta_update. rte_eth_dev_rss_reta_update will also use 
mlx5 flow list.

     I've also found another discussion about this 
issue.https://mails.dpdk.org/archives/dev/2019-March/125929.html 
<https://mails.dpdk.org/archives/dev/2019-March/125929.html>

     Do it need a lock to protect the mlx5 flow list?


int
slave_configure(struct rte_eth_dev *bonded_eth_dev,
         struct rte_eth_dev *slave_eth_dev)

{

     ...

     /* Start device */
     errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
     if (errval != 0) {
         RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
                 slave_eth_dev->data->port_id, errval);
         return -1;
     }

     /* If RSS is enabled for bonding, synchronize RETA */
     if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
         int i;
         struct bond_dev_private *internals;

         internals = bonded_eth_dev->data->dev_private;

         for (i = 0; i < internals->slave_count; i++) {
             if (internals->slaves[i].port_id == 
slave_eth_dev->data->port_id) {
                 errval = rte_eth_dev_rss_reta_update(
                         slave_eth_dev->data->port_id,
                         &internals->reta_conf[0],
                         internals->slaves[i].reta_size);
                 if (errval != 0) {
                     RTE_BOND_LOG(WARNING,
                              "rte_eth_dev_rss_reta_update on slave port 
%d fails (err %d)."
                              " RSS Configuration for bonding may be 
inconsistent.",
                              slave_eth_dev->data->port_id, errval);
                 }
                 break;
             }
         }
     }



^ permalink raw reply	[flat|nested] 6+ messages in thread
* [dpdk-dev]  Segfault when eal thread executing mlx5 nic‘s lsc event
@ 2019-02-22  7:33 wangyunjian
  0 siblings, 0 replies; 6+ messages in thread
From: wangyunjian @ 2019-02-22  7:33 UTC (permalink / raw)
  To: dev, shahafs, yskoh; +Cc: xudingke, Zhaohui (zhaohui, Polestar)

I think the flow list may be accessed in the same time by two
different threads and may cause some errors. Do it need a lock
to protect the flow list?

Thanks
          Yunjian

(gdb) bt
#0 0x00007f54c9641237 in raise () from /usr/lib64/libc.so.6
#1 0x00007f54c9642928 in abort () from /usr/lib64/libc.so.6
#2 0x00000000006a8749 in PAT_abort ()
#3 0x00000000006a588d in patchIllInsHandler ()
#4 <signal handler called>
#5 0x00007f54c6acd2c8 in flow_list_destroy (dev=dev@entry=0xad8940 <rte_eth_devices+16512>, flow=0x1444b1b00, list=0x14455e618)
at /usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_flow.c:2150
#6 0x00007f54c6acfe1b in mlx5_flow_list_flush (dev=0xad8940 <rte_eth_devices+16512>, list=0x14455e618) at /usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_flow.c:2170
#7 0x00007f54c6ac5cc4 in mlx5_traffic_disable (dev=<optimized out>) at /usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_trigger.c:384
#8 0x00007f54c6ac637d in mlx5_traffic_restart (dev=0xad8940 <rte_eth_devices+16512>) at /usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_trigger.c:400
#9 0x00007f54d1db3bba in rte_eth_dev_default_mac_addr_set (port_id=<optimized out>, addr=0x140200f40) at /usr/src/debug/dpdk-18.11/lib/librte_ethdev/rte_ethdev.c:3230
#10 0x00007f54cd8dee81 in mac_address_slaves_update (bonded_eth_dev=bonded_eth_dev@entry=0xad48c0 <rte_eth_devices>) at /usr/src/debug/dpdk-18.11/drivers/net/bonding/rte_eth_bond_pmd.c:1842
#11 0x00007f54cd8e0c31 in bond_ethdev_lsc_event_callback (port_id=<optimized out>, type=<optimized out>, param=<optimized out>, ret_param=<optimized out>)
at /usr/src/debug/dpdk-18.11/drivers/net/bonding/rte_eth_bond_pmd.c:3070
#12 0x00007f54cd8e117b in bond_ethdev_slave_lsc_delay (cb_arg=0xad48c0 <rte_eth_devices>) at /usr/src/debug/dpdk-18.11/drivers/net/bonding/rte_eth_bond_pmd.c:2298
#13 0x00007f54d25ebe5f in eal_alarm_callback (arg=<optimized out>) at /usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_alarm.c:90
#14 0x00007f54d25ea8aa in eal_intr_process_interrupts (nfds=<optimized out>, events=<optimized out>) at /usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_interrupts.c:838
#15 eal_intr_handle_interrupts (totalfds=<optimized out>, pfd=21) at /usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_interrupts.c:885
#16 eal_intr_thread_main (arg=<optimized out>) at /usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_interrupts.c:965
#17 0x00007f54cade6dd5 in start_thread () from /usr/lib64/libpthread.so.0
#18 0x00007f54c970950d in clone () from /usr/lib64/libc.so.6

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-10-30  7:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-06  3:05 [dpdk-dev] Segfault when eal thread executing mlx5 nic‘s lsc event Zhaohui (zhaohui, Polestar)
2019-03-06  6:02 ` Shahaf Shuler
2019-03-06  9:42   ` [dpdk-dev] 答复: " Zhaohui (zhaohui, Polestar)
2019-03-06 18:59     ` Chas Williams
  -- strict thread matches above, loose matches on Subject: below --
2020-10-29 16:42 [dpdk-dev] Segfault when eal thread executing mlx5 nic's " Weifeng LI
2019-02-22  7:33 [dpdk-dev] Segfault when eal thread executing mlx5 nic‘s " wangyunjian

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).